xref: /linux/lib/test_objpool.c (revision 4b660dbd9ee2059850fd30e0df420ca7a38a1856)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Test module for lockless object pool
5  *
6  * Copyright: wuqiang.matt@bytedance.com
7  */
8 
9 #include <linux/errno.h>
10 #include <linux/module.h>
11 #include <linux/moduleparam.h>
12 #include <linux/completion.h>
13 #include <linux/kthread.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <linux/delay.h>
17 #include <linux/hrtimer.h>
18 #include <linux/objpool.h>
19 
20 #define OT_NR_MAX_BULK (16)
21 
22 /* memory usage */
23 struct ot_mem_stat {
24 	atomic_long_t alloc;
25 	atomic_long_t free;
26 };
27 
28 /* object allocation results */
29 struct ot_obj_stat {
30 	unsigned long nhits;
31 	unsigned long nmiss;
32 };
33 
34 /* control & results per testcase */
35 struct ot_data {
36 	struct rw_semaphore start;
37 	struct completion wait;
38 	struct completion rcu;
39 	atomic_t nthreads ____cacheline_aligned_in_smp;
40 	atomic_t stop ____cacheline_aligned_in_smp;
41 	struct ot_mem_stat kmalloc;
42 	struct ot_mem_stat vmalloc;
43 	struct ot_obj_stat objects;
44 	u64    duration;
45 };
46 
47 /* testcase */
48 struct ot_test {
49 	int async; /* synchronous or asynchronous */
50 	int mode; /* only mode 0 supported */
51 	int objsz; /* object size */
52 	int duration; /* ms */
53 	int delay; /* ms */
54 	int bulk_normal;
55 	int bulk_irq;
56 	unsigned long hrtimer; /* ms */
57 	const char *name;
58 	struct ot_data data;
59 };
60 
61 /* per-cpu worker */
62 struct ot_item {
63 	struct objpool_head *pool; /* pool head */
64 	struct ot_test *test; /* test parameters */
65 
66 	void (*worker)(struct ot_item *item, int irq);
67 
68 	/* hrtimer control */
69 	ktime_t hrtcycle;
70 	struct hrtimer hrtimer;
71 
72 	int bulk[2]; /* for thread and irq */
73 	int delay;
74 	u32 niters;
75 
76 	/* summary per thread */
77 	struct ot_obj_stat stat[2]; /* thread and irq */
78 	u64 duration;
79 };
80 
81 /*
82  * memory leakage checking
83  */
84 
85 static void *ot_kzalloc(struct ot_test *test, long size)
86 {
87 	void *ptr = kzalloc(size, GFP_KERNEL);
88 
89 	if (ptr)
90 		atomic_long_add(size, &test->data.kmalloc.alloc);
91 	return ptr;
92 }
93 
94 static void ot_kfree(struct ot_test *test, void *ptr, long size)
95 {
96 	if (!ptr)
97 		return;
98 	atomic_long_add(size, &test->data.kmalloc.free);
99 	kfree(ptr);
100 }
101 
102 static void ot_mem_report(struct ot_test *test)
103 {
104 	long alloc, free;
105 
106 	pr_info("memory allocation summary for %s\n", test->name);
107 
108 	alloc = atomic_long_read(&test->data.kmalloc.alloc);
109 	free = atomic_long_read(&test->data.kmalloc.free);
110 	pr_info("  kmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free);
111 
112 	alloc = atomic_long_read(&test->data.vmalloc.alloc);
113 	free = atomic_long_read(&test->data.vmalloc.free);
114 	pr_info("  vmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free);
115 }
116 
117 /* user object instance */
118 struct ot_node {
119 	void *owner;
120 	unsigned long data;
121 	unsigned long refs;
122 	unsigned long payload[32];
123 };
124 
125 /* user objpool manager */
126 struct ot_context {
127 	struct objpool_head pool; /* objpool head */
128 	struct ot_test *test; /* test parameters */
129 	void *ptr; /* user pool buffer */
130 	unsigned long size; /* buffer size */
131 	struct rcu_head rcu;
132 };
133 
134 static DEFINE_PER_CPU(struct ot_item, ot_pcup_items);
135 
136 static int ot_init_data(struct ot_data *data)
137 {
138 	memset(data, 0, sizeof(*data));
139 	init_rwsem(&data->start);
140 	init_completion(&data->wait);
141 	init_completion(&data->rcu);
142 	atomic_set(&data->nthreads, 1);
143 
144 	return 0;
145 }
146 
147 static int ot_init_node(void *nod, void *context)
148 {
149 	struct ot_context *sop = context;
150 	struct ot_node *on = nod;
151 
152 	on->owner = &sop->pool;
153 	return 0;
154 }
155 
156 static enum hrtimer_restart ot_hrtimer_handler(struct hrtimer *hrt)
157 {
158 	struct ot_item *item = container_of(hrt, struct ot_item, hrtimer);
159 	struct ot_test *test = item->test;
160 
161 	if (atomic_read_acquire(&test->data.stop))
162 		return HRTIMER_NORESTART;
163 
164 	/* do bulk-testings for objects pop/push */
165 	item->worker(item, 1);
166 
167 	hrtimer_forward(hrt, hrt->base->get_time(), item->hrtcycle);
168 	return HRTIMER_RESTART;
169 }
170 
171 static void ot_start_hrtimer(struct ot_item *item)
172 {
173 	if (!item->test->hrtimer)
174 		return;
175 	hrtimer_start(&item->hrtimer, item->hrtcycle, HRTIMER_MODE_REL);
176 }
177 
178 static void ot_stop_hrtimer(struct ot_item *item)
179 {
180 	if (!item->test->hrtimer)
181 		return;
182 	hrtimer_cancel(&item->hrtimer);
183 }
184 
185 static int ot_init_hrtimer(struct ot_item *item, unsigned long hrtimer)
186 {
187 	struct hrtimer *hrt = &item->hrtimer;
188 
189 	if (!hrtimer)
190 		return -ENOENT;
191 
192 	item->hrtcycle = ktime_set(0, hrtimer * 1000000UL);
193 	hrtimer_init(hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
194 	hrt->function = ot_hrtimer_handler;
195 	return 0;
196 }
197 
198 static int ot_init_cpu_item(struct ot_item *item,
199 			struct ot_test *test,
200 			struct objpool_head *pool,
201 			void (*worker)(struct ot_item *, int))
202 {
203 	memset(item, 0, sizeof(*item));
204 	item->pool = pool;
205 	item->test = test;
206 	item->worker = worker;
207 
208 	item->bulk[0] = test->bulk_normal;
209 	item->bulk[1] = test->bulk_irq;
210 	item->delay = test->delay;
211 
212 	/* initialize hrtimer */
213 	ot_init_hrtimer(item, item->test->hrtimer);
214 	return 0;
215 }
216 
217 static int ot_thread_worker(void *arg)
218 {
219 	struct ot_item *item = arg;
220 	struct ot_test *test = item->test;
221 	ktime_t start;
222 
223 	atomic_inc(&test->data.nthreads);
224 	down_read(&test->data.start);
225 	up_read(&test->data.start);
226 	start = ktime_get();
227 	ot_start_hrtimer(item);
228 	do {
229 		if (atomic_read_acquire(&test->data.stop))
230 			break;
231 		/* do bulk-testings for objects pop/push */
232 		item->worker(item, 0);
233 	} while (!kthread_should_stop());
234 	ot_stop_hrtimer(item);
235 	item->duration = (u64) ktime_us_delta(ktime_get(), start);
236 	if (atomic_dec_and_test(&test->data.nthreads))
237 		complete(&test->data.wait);
238 
239 	return 0;
240 }
241 
242 static void ot_perf_report(struct ot_test *test, u64 duration)
243 {
244 	struct ot_obj_stat total, normal = {0}, irq = {0};
245 	int cpu, nthreads = 0;
246 
247 	pr_info("\n");
248 	pr_info("Testing summary for %s\n", test->name);
249 
250 	for_each_possible_cpu(cpu) {
251 		struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
252 		if (!item->duration)
253 			continue;
254 		normal.nhits += item->stat[0].nhits;
255 		normal.nmiss += item->stat[0].nmiss;
256 		irq.nhits += item->stat[1].nhits;
257 		irq.nmiss += item->stat[1].nmiss;
258 		pr_info("CPU: %d  duration: %lluus\n", cpu, item->duration);
259 		pr_info("\tthread:\t%16lu hits \t%16lu miss\n",
260 			item->stat[0].nhits, item->stat[0].nmiss);
261 		pr_info("\tirq:   \t%16lu hits \t%16lu miss\n",
262 			item->stat[1].nhits, item->stat[1].nmiss);
263 		pr_info("\ttotal: \t%16lu hits \t%16lu miss\n",
264 			item->stat[0].nhits + item->stat[1].nhits,
265 			item->stat[0].nmiss + item->stat[1].nmiss);
266 		nthreads++;
267 	}
268 
269 	total.nhits = normal.nhits + irq.nhits;
270 	total.nmiss = normal.nmiss + irq.nmiss;
271 
272 	pr_info("ALL: \tnthreads: %d  duration: %lluus\n", nthreads, duration);
273 	pr_info("SUM: \t%16lu hits \t%16lu miss\n",
274 		total.nhits, total.nmiss);
275 
276 	test->data.objects = total;
277 	test->data.duration = duration;
278 }
279 
280 /*
281  * synchronous test cases for objpool manipulation
282  */
283 
284 /* objpool manipulation for synchronous mode (percpu objpool) */
285 static struct ot_context *ot_init_sync_m0(struct ot_test *test)
286 {
287 	struct ot_context *sop = NULL;
288 	int max = num_possible_cpus() << 3;
289 	gfp_t gfp = GFP_KERNEL;
290 
291 	sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop));
292 	if (!sop)
293 		return NULL;
294 	sop->test = test;
295 	if (test->objsz < 512)
296 		gfp = GFP_ATOMIC;
297 
298 	if (objpool_init(&sop->pool, max, test->objsz,
299 			 gfp, sop, ot_init_node, NULL)) {
300 		ot_kfree(test, sop, sizeof(*sop));
301 		return NULL;
302 	}
303 	WARN_ON(max != sop->pool.nr_objs);
304 
305 	return sop;
306 }
307 
308 static void ot_fini_sync(struct ot_context *sop)
309 {
310 	objpool_fini(&sop->pool);
311 	ot_kfree(sop->test, sop, sizeof(*sop));
312 }
313 
314 static struct {
315 	struct ot_context * (*init)(struct ot_test *oc);
316 	void (*fini)(struct ot_context *sop);
317 } g_ot_sync_ops[] = {
318 	{.init = ot_init_sync_m0, .fini = ot_fini_sync},
319 };
320 
321 /*
322  * synchronous test cases: performance mode
323  */
324 
325 static void ot_bulk_sync(struct ot_item *item, int irq)
326 {
327 	struct ot_node *nods[OT_NR_MAX_BULK];
328 	int i;
329 
330 	for (i = 0; i < item->bulk[irq]; i++)
331 		nods[i] = objpool_pop(item->pool);
332 
333 	if (!irq && (item->delay || !(++(item->niters) & 0x7FFF)))
334 		msleep(item->delay);
335 
336 	while (i-- > 0) {
337 		struct ot_node *on = nods[i];
338 		if (on) {
339 			on->refs++;
340 			objpool_push(on, item->pool);
341 			item->stat[irq].nhits++;
342 		} else {
343 			item->stat[irq].nmiss++;
344 		}
345 	}
346 }
347 
348 static int ot_start_sync(struct ot_test *test)
349 {
350 	struct ot_context *sop;
351 	ktime_t start;
352 	u64 duration;
353 	unsigned long timeout;
354 	int cpu;
355 
356 	/* initialize objpool for syncrhonous testcase */
357 	sop = g_ot_sync_ops[test->mode].init(test);
358 	if (!sop)
359 		return -ENOMEM;
360 
361 	/* grab rwsem to block testing threads */
362 	down_write(&test->data.start);
363 
364 	for_each_possible_cpu(cpu) {
365 		struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
366 		struct task_struct *work;
367 
368 		ot_init_cpu_item(item, test, &sop->pool, ot_bulk_sync);
369 
370 		/* skip offline cpus */
371 		if (!cpu_online(cpu))
372 			continue;
373 
374 		work = kthread_create_on_node(ot_thread_worker, item,
375 				cpu_to_node(cpu), "ot_worker_%d", cpu);
376 		if (IS_ERR(work)) {
377 			pr_err("failed to create thread for cpu %d\n", cpu);
378 		} else {
379 			kthread_bind(work, cpu);
380 			wake_up_process(work);
381 		}
382 	}
383 
384 	/* wait a while to make sure all threads waiting at start line */
385 	msleep(20);
386 
387 	/* in case no threads were created: memory insufficient ? */
388 	if (atomic_dec_and_test(&test->data.nthreads))
389 		complete(&test->data.wait);
390 
391 	// sched_set_fifo_low(current);
392 
393 	/* start objpool testing threads */
394 	start = ktime_get();
395 	up_write(&test->data.start);
396 
397 	/* yeild cpu to worker threads for duration ms */
398 	timeout = msecs_to_jiffies(test->duration);
399 	schedule_timeout_interruptible(timeout);
400 
401 	/* tell workers threads to quit */
402 	atomic_set_release(&test->data.stop, 1);
403 
404 	/* wait all workers threads finish and quit */
405 	wait_for_completion(&test->data.wait);
406 	duration = (u64) ktime_us_delta(ktime_get(), start);
407 
408 	/* cleanup objpool */
409 	g_ot_sync_ops[test->mode].fini(sop);
410 
411 	/* report testing summary and performance results */
412 	ot_perf_report(test, duration);
413 
414 	/* report memory allocation summary */
415 	ot_mem_report(test);
416 
417 	return 0;
418 }
419 
420 /*
421  * asynchronous test cases: pool lifecycle controlled by refcount
422  */
423 
424 static void ot_fini_async_rcu(struct rcu_head *rcu)
425 {
426 	struct ot_context *sop = container_of(rcu, struct ot_context, rcu);
427 	struct ot_test *test = sop->test;
428 
429 	/* here all cpus are aware of the stop event: test->data.stop = 1 */
430 	WARN_ON(!atomic_read_acquire(&test->data.stop));
431 
432 	objpool_fini(&sop->pool);
433 	complete(&test->data.rcu);
434 }
435 
436 static void ot_fini_async(struct ot_context *sop)
437 {
438 	/* make sure the stop event is acknowledged by all cores */
439 	call_rcu(&sop->rcu, ot_fini_async_rcu);
440 }
441 
442 static int ot_objpool_release(struct objpool_head *head, void *context)
443 {
444 	struct ot_context *sop = context;
445 
446 	WARN_ON(!head || !sop || head != &sop->pool);
447 
448 	/* do context cleaning if needed */
449 	if (sop)
450 		ot_kfree(sop->test, sop, sizeof(*sop));
451 
452 	return 0;
453 }
454 
455 static struct ot_context *ot_init_async_m0(struct ot_test *test)
456 {
457 	struct ot_context *sop = NULL;
458 	int max = num_possible_cpus() << 3;
459 	gfp_t gfp = GFP_KERNEL;
460 
461 	sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop));
462 	if (!sop)
463 		return NULL;
464 	sop->test = test;
465 	if (test->objsz < 512)
466 		gfp = GFP_ATOMIC;
467 
468 	if (objpool_init(&sop->pool, max, test->objsz, gfp, sop,
469 			 ot_init_node, ot_objpool_release)) {
470 		ot_kfree(test, sop, sizeof(*sop));
471 		return NULL;
472 	}
473 	WARN_ON(max != sop->pool.nr_objs);
474 
475 	return sop;
476 }
477 
478 static struct {
479 	struct ot_context * (*init)(struct ot_test *oc);
480 	void (*fini)(struct ot_context *sop);
481 } g_ot_async_ops[] = {
482 	{.init = ot_init_async_m0, .fini = ot_fini_async},
483 };
484 
485 static void ot_nod_recycle(struct ot_node *on, struct objpool_head *pool,
486 			int release)
487 {
488 	struct ot_context *sop;
489 
490 	on->refs++;
491 
492 	if (!release) {
493 		/* push object back to opjpool for reuse */
494 		objpool_push(on, pool);
495 		return;
496 	}
497 
498 	sop = container_of(pool, struct ot_context, pool);
499 	WARN_ON(sop != pool->context);
500 
501 	/* unref objpool with nod removed forever */
502 	objpool_drop(on, pool);
503 }
504 
505 static void ot_bulk_async(struct ot_item *item, int irq)
506 {
507 	struct ot_test *test = item->test;
508 	struct ot_node *nods[OT_NR_MAX_BULK];
509 	int i, stop;
510 
511 	for (i = 0; i < item->bulk[irq]; i++)
512 		nods[i] = objpool_pop(item->pool);
513 
514 	if (!irq) {
515 		if (item->delay || !(++(item->niters) & 0x7FFF))
516 			msleep(item->delay);
517 		get_cpu();
518 	}
519 
520 	stop = atomic_read_acquire(&test->data.stop);
521 
522 	/* drop all objects and deref objpool */
523 	while (i-- > 0) {
524 		struct ot_node *on = nods[i];
525 
526 		if (on) {
527 			on->refs++;
528 			ot_nod_recycle(on, item->pool, stop);
529 			item->stat[irq].nhits++;
530 		} else {
531 			item->stat[irq].nmiss++;
532 		}
533 	}
534 
535 	if (!irq)
536 		put_cpu();
537 }
538 
539 static int ot_start_async(struct ot_test *test)
540 {
541 	struct ot_context *sop;
542 	ktime_t start;
543 	u64 duration;
544 	unsigned long timeout;
545 	int cpu;
546 
547 	/* initialize objpool for syncrhonous testcase */
548 	sop = g_ot_async_ops[test->mode].init(test);
549 	if (!sop)
550 		return -ENOMEM;
551 
552 	/* grab rwsem to block testing threads */
553 	down_write(&test->data.start);
554 
555 	for_each_possible_cpu(cpu) {
556 		struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
557 		struct task_struct *work;
558 
559 		ot_init_cpu_item(item, test, &sop->pool, ot_bulk_async);
560 
561 		/* skip offline cpus */
562 		if (!cpu_online(cpu))
563 			continue;
564 
565 		work = kthread_create_on_node(ot_thread_worker, item,
566 				cpu_to_node(cpu), "ot_worker_%d", cpu);
567 		if (IS_ERR(work)) {
568 			pr_err("failed to create thread for cpu %d\n", cpu);
569 		} else {
570 			kthread_bind(work, cpu);
571 			wake_up_process(work);
572 		}
573 	}
574 
575 	/* wait a while to make sure all threads waiting at start line */
576 	msleep(20);
577 
578 	/* in case no threads were created: memory insufficient ? */
579 	if (atomic_dec_and_test(&test->data.nthreads))
580 		complete(&test->data.wait);
581 
582 	/* start objpool testing threads */
583 	start = ktime_get();
584 	up_write(&test->data.start);
585 
586 	/* yeild cpu to worker threads for duration ms */
587 	timeout = msecs_to_jiffies(test->duration);
588 	schedule_timeout_interruptible(timeout);
589 
590 	/* tell workers threads to quit */
591 	atomic_set_release(&test->data.stop, 1);
592 
593 	/* do async-finalization */
594 	g_ot_async_ops[test->mode].fini(sop);
595 
596 	/* wait all workers threads finish and quit */
597 	wait_for_completion(&test->data.wait);
598 	duration = (u64) ktime_us_delta(ktime_get(), start);
599 
600 	/* assure rcu callback is triggered */
601 	wait_for_completion(&test->data.rcu);
602 
603 	/*
604 	 * now we are sure that objpool is finalized either
605 	 * by rcu callback or by worker threads
606 	 */
607 
608 	/* report testing summary and performance results */
609 	ot_perf_report(test, duration);
610 
611 	/* report memory allocation summary */
612 	ot_mem_report(test);
613 
614 	return 0;
615 }
616 
617 /*
618  * predefined testing cases:
619  *   synchronous case / overrun case / async case
620  *
621  * async: synchronous or asynchronous testing
622  * mode: only mode 0 supported
623  * objsz: object size
624  * duration: int, total test time in ms
625  * delay: int, delay (in ms) between each iteration
626  * bulk_normal: int, repeat times for thread worker
627  * bulk_irq: int, repeat times for irq consumer
628  * hrtimer: unsigned long, hrtimer intervnal in ms
629  * name: char *, tag for current test ot_item
630  */
631 
632 #define NODE_COMPACT sizeof(struct ot_node)
633 #define NODE_VMALLOC (512)
634 
635 static struct ot_test g_testcases[] = {
636 
637 	/* sync & normal */
638 	{0, 0, NODE_COMPACT, 1000, 0,  1,  0,  0, "sync: percpu objpool"},
639 	{0, 0, NODE_VMALLOC, 1000, 0,  1,  0,  0, "sync: percpu objpool from vmalloc"},
640 
641 	/* sync & hrtimer */
642 	{0, 0, NODE_COMPACT, 1000, 0,  1,  1,  4, "sync & hrtimer: percpu objpool"},
643 	{0, 0, NODE_VMALLOC, 1000, 0,  1,  1,  4, "sync & hrtimer: percpu objpool from vmalloc"},
644 
645 	/* sync & overrun */
646 	{0, 0, NODE_COMPACT, 1000, 0, 16,  0,  0, "sync overrun: percpu objpool"},
647 	{0, 0, NODE_VMALLOC, 1000, 0, 16,  0,  0, "sync overrun: percpu objpool from vmalloc"},
648 
649 	/* async mode */
650 	{1, 0, NODE_COMPACT, 1000, 100,  1,  0,  0, "async: percpu objpool"},
651 	{1, 0, NODE_VMALLOC, 1000, 100,  1,  0,  0, "async: percpu objpool from vmalloc"},
652 
653 	/* async + hrtimer mode */
654 	{1, 0, NODE_COMPACT, 1000, 0,  4,  4,  4, "async & hrtimer: percpu objpool"},
655 	{1, 0, NODE_VMALLOC, 1000, 0,  4,  4,  4, "async & hrtimer: percpu objpool from vmalloc"},
656 };
657 
658 static int __init ot_mod_init(void)
659 {
660 	int i;
661 
662 	/* perform testings */
663 	for (i = 0; i < ARRAY_SIZE(g_testcases); i++) {
664 		ot_init_data(&g_testcases[i].data);
665 		if (g_testcases[i].async)
666 			ot_start_async(&g_testcases[i]);
667 		else
668 			ot_start_sync(&g_testcases[i]);
669 	}
670 
671 	/* show tests summary */
672 	pr_info("\n");
673 	pr_info("Summary of testcases:\n");
674 	for (i = 0; i < ARRAY_SIZE(g_testcases); i++) {
675 		pr_info("    duration: %lluus \thits: %10lu \tmiss: %10lu \t%s\n",
676 			g_testcases[i].data.duration, g_testcases[i].data.objects.nhits,
677 			g_testcases[i].data.objects.nmiss, g_testcases[i].name);
678 	}
679 
680 	return -EAGAIN;
681 }
682 
683 static void __exit ot_mod_exit(void)
684 {
685 }
686 
687 module_init(ot_mod_init);
688 module_exit(ot_mod_exit);
689 
690 MODULE_LICENSE("GPL");