xref: /linux/lib/test_vmalloc.c (revision 5e60f363b38fd40e4d8838b5d6f4d4ecee92c777)
1  // SPDX-License-Identifier: GPL-2.0
2  
3  /*
4   * Test module for stress and analyze performance of vmalloc allocator.
5   * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com>
6   */
7  #include <linux/init.h>
8  #include <linux/kernel.h>
9  #include <linux/module.h>
10  #include <linux/vmalloc.h>
11  #include <linux/random.h>
12  #include <linux/kthread.h>
13  #include <linux/moduleparam.h>
14  #include <linux/completion.h>
15  #include <linux/delay.h>
16  #include <linux/rwsem.h>
17  #include <linux/mm.h>
18  #include <linux/rcupdate.h>
19  #include <linux/slab.h>
20  
21  #define __param(type, name, init, msg)		\
22  	static type name = init;				\
23  	module_param(name, type, 0444);			\
24  	MODULE_PARM_DESC(name, msg)				\
25  
26  __param(int, nr_threads, 0,
27  	"Number of workers to perform tests(min: 1 max: USHRT_MAX)");
28  
29  __param(bool, sequential_test_order, false,
30  	"Use sequential stress tests order");
31  
32  __param(int, test_repeat_count, 1,
33  	"Set test repeat counter");
34  
35  __param(int, test_loop_count, 1000000,
36  	"Set test loop counter");
37  
38  __param(int, run_test_mask, INT_MAX,
39  	"Set tests specified in the mask.\n\n"
40  		"\t\tid: 1,    name: fix_size_alloc_test\n"
41  		"\t\tid: 2,    name: full_fit_alloc_test\n"
42  		"\t\tid: 4,    name: long_busy_list_alloc_test\n"
43  		"\t\tid: 8,    name: random_size_alloc_test\n"
44  		"\t\tid: 16,   name: fix_align_alloc_test\n"
45  		"\t\tid: 32,   name: random_size_align_alloc_test\n"
46  		"\t\tid: 64,   name: align_shift_alloc_test\n"
47  		"\t\tid: 128,  name: pcpu_alloc_test\n"
48  		"\t\tid: 256,  name: kvfree_rcu_1_arg_vmalloc_test\n"
49  		"\t\tid: 512,  name: kvfree_rcu_2_arg_vmalloc_test\n"
50  		/* Add a new test case description here. */
51  );
52  
53  /*
54   * Read write semaphore for synchronization of setup
55   * phase that is done in main thread and workers.
56   */
57  static DECLARE_RWSEM(prepare_for_test_rwsem);
58  
59  /*
60   * Completion tracking for worker threads.
61   */
62  static DECLARE_COMPLETION(test_all_done_comp);
63  static atomic_t test_n_undone = ATOMIC_INIT(0);
64  
65  static inline void
66  test_report_one_done(void)
67  {
68  	if (atomic_dec_and_test(&test_n_undone))
69  		complete(&test_all_done_comp);
70  }
71  
72  static int random_size_align_alloc_test(void)
73  {
74  	unsigned long size, align, rnd;
75  	void *ptr;
76  	int i;
77  
78  	for (i = 0; i < test_loop_count; i++) {
79  		get_random_bytes(&rnd, sizeof(rnd));
80  
81  		/*
82  		 * Maximum 1024 pages, if PAGE_SIZE is 4096.
83  		 */
84  		align = 1 << (rnd % 23);
85  
86  		/*
87  		 * Maximum 10 pages.
88  		 */
89  		size = ((rnd % 10) + 1) * PAGE_SIZE;
90  
91  		ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0,
92  				__builtin_return_address(0));
93  		if (!ptr)
94  			return -1;
95  
96  		vfree(ptr);
97  	}
98  
99  	return 0;
100  }
101  
102  /*
103   * This test case is supposed to be failed.
104   */
105  static int align_shift_alloc_test(void)
106  {
107  	unsigned long align;
108  	void *ptr;
109  	int i;
110  
111  	for (i = 0; i < BITS_PER_LONG; i++) {
112  		align = ((unsigned long) 1) << i;
113  
114  		ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0,
115  				__builtin_return_address(0));
116  		if (!ptr)
117  			return -1;
118  
119  		vfree(ptr);
120  	}
121  
122  	return 0;
123  }
124  
125  static int fix_align_alloc_test(void)
126  {
127  	void *ptr;
128  	int i;
129  
130  	for (i = 0; i < test_loop_count; i++) {
131  		ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1,
132  				GFP_KERNEL | __GFP_ZERO, 0,
133  				__builtin_return_address(0));
134  		if (!ptr)
135  			return -1;
136  
137  		vfree(ptr);
138  	}
139  
140  	return 0;
141  }
142  
143  static int random_size_alloc_test(void)
144  {
145  	unsigned int n;
146  	void *p;
147  	int i;
148  
149  	for (i = 0; i < test_loop_count; i++) {
150  		get_random_bytes(&n, sizeof(i));
151  		n = (n % 100) + 1;
152  
153  		p = vmalloc(n * PAGE_SIZE);
154  
155  		if (!p)
156  			return -1;
157  
158  		*((__u8 *)p) = 1;
159  		vfree(p);
160  	}
161  
162  	return 0;
163  }
164  
165  static int long_busy_list_alloc_test(void)
166  {
167  	void *ptr_1, *ptr_2;
168  	void **ptr;
169  	int rv = -1;
170  	int i;
171  
172  	ptr = vmalloc(sizeof(void *) * 15000);
173  	if (!ptr)
174  		return rv;
175  
176  	for (i = 0; i < 15000; i++)
177  		ptr[i] = vmalloc(1 * PAGE_SIZE);
178  
179  	for (i = 0; i < test_loop_count; i++) {
180  		ptr_1 = vmalloc(100 * PAGE_SIZE);
181  		if (!ptr_1)
182  			goto leave;
183  
184  		ptr_2 = vmalloc(1 * PAGE_SIZE);
185  		if (!ptr_2) {
186  			vfree(ptr_1);
187  			goto leave;
188  		}
189  
190  		*((__u8 *)ptr_1) = 0;
191  		*((__u8 *)ptr_2) = 1;
192  
193  		vfree(ptr_1);
194  		vfree(ptr_2);
195  	}
196  
197  	/*  Success */
198  	rv = 0;
199  
200  leave:
201  	for (i = 0; i < 15000; i++)
202  		vfree(ptr[i]);
203  
204  	vfree(ptr);
205  	return rv;
206  }
207  
208  static int full_fit_alloc_test(void)
209  {
210  	void **ptr, **junk_ptr, *tmp;
211  	int junk_length;
212  	int rv = -1;
213  	int i;
214  
215  	junk_length = fls(num_online_cpus());
216  	junk_length *= (32 * 1024 * 1024 / PAGE_SIZE);
217  
218  	ptr = vmalloc(sizeof(void *) * junk_length);
219  	if (!ptr)
220  		return rv;
221  
222  	junk_ptr = vmalloc(sizeof(void *) * junk_length);
223  	if (!junk_ptr) {
224  		vfree(ptr);
225  		return rv;
226  	}
227  
228  	for (i = 0; i < junk_length; i++) {
229  		ptr[i] = vmalloc(1 * PAGE_SIZE);
230  		junk_ptr[i] = vmalloc(1 * PAGE_SIZE);
231  	}
232  
233  	for (i = 0; i < junk_length; i++)
234  		vfree(junk_ptr[i]);
235  
236  	for (i = 0; i < test_loop_count; i++) {
237  		tmp = vmalloc(1 * PAGE_SIZE);
238  
239  		if (!tmp)
240  			goto error;
241  
242  		*((__u8 *)tmp) = 1;
243  		vfree(tmp);
244  	}
245  
246  	/* Success */
247  	rv = 0;
248  
249  error:
250  	for (i = 0; i < junk_length; i++)
251  		vfree(ptr[i]);
252  
253  	vfree(ptr);
254  	vfree(junk_ptr);
255  
256  	return rv;
257  }
258  
259  static int fix_size_alloc_test(void)
260  {
261  	void *ptr;
262  	int i;
263  
264  	for (i = 0; i < test_loop_count; i++) {
265  		ptr = vmalloc(3 * PAGE_SIZE);
266  
267  		if (!ptr)
268  			return -1;
269  
270  		*((__u8 *)ptr) = 0;
271  
272  		vfree(ptr);
273  	}
274  
275  	return 0;
276  }
277  
278  static int
279  pcpu_alloc_test(void)
280  {
281  	int rv = 0;
282  #ifndef CONFIG_NEED_PER_CPU_KM
283  	void __percpu **pcpu;
284  	size_t size, align;
285  	int i;
286  
287  	pcpu = vmalloc(sizeof(void __percpu *) * 35000);
288  	if (!pcpu)
289  		return -1;
290  
291  	for (i = 0; i < 35000; i++) {
292  		unsigned int r;
293  
294  		get_random_bytes(&r, sizeof(i));
295  		size = (r % (PAGE_SIZE / 4)) + 1;
296  
297  		/*
298  		 * Maximum PAGE_SIZE
299  		 */
300  		get_random_bytes(&r, sizeof(i));
301  		align = 1 << ((i % 11) + 1);
302  
303  		pcpu[i] = __alloc_percpu(size, align);
304  		if (!pcpu[i])
305  			rv = -1;
306  	}
307  
308  	for (i = 0; i < 35000; i++)
309  		free_percpu(pcpu[i]);
310  
311  	vfree(pcpu);
312  #endif
313  	return rv;
314  }
315  
316  struct test_kvfree_rcu {
317  	struct rcu_head rcu;
318  	unsigned char array[20];
319  };
320  
321  static int
322  kvfree_rcu_1_arg_vmalloc_test(void)
323  {
324  	struct test_kvfree_rcu *p;
325  	int i;
326  
327  	for (i = 0; i < test_loop_count; i++) {
328  		p = vmalloc(1 * PAGE_SIZE);
329  		if (!p)
330  			return -1;
331  
332  		p->array[0] = 'a';
333  		kvfree_rcu(p);
334  	}
335  
336  	return 0;
337  }
338  
339  static int
340  kvfree_rcu_2_arg_vmalloc_test(void)
341  {
342  	struct test_kvfree_rcu *p;
343  	int i;
344  
345  	for (i = 0; i < test_loop_count; i++) {
346  		p = vmalloc(1 * PAGE_SIZE);
347  		if (!p)
348  			return -1;
349  
350  		p->array[0] = 'a';
351  		kvfree_rcu(p, rcu);
352  	}
353  
354  	return 0;
355  }
356  
357  struct test_case_desc {
358  	const char *test_name;
359  	int (*test_func)(void);
360  };
361  
362  static struct test_case_desc test_case_array[] = {
363  	{ "fix_size_alloc_test", fix_size_alloc_test },
364  	{ "full_fit_alloc_test", full_fit_alloc_test },
365  	{ "long_busy_list_alloc_test", long_busy_list_alloc_test },
366  	{ "random_size_alloc_test", random_size_alloc_test },
367  	{ "fix_align_alloc_test", fix_align_alloc_test },
368  	{ "random_size_align_alloc_test", random_size_align_alloc_test },
369  	{ "align_shift_alloc_test", align_shift_alloc_test },
370  	{ "pcpu_alloc_test", pcpu_alloc_test },
371  	{ "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test },
372  	{ "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test },
373  	/* Add a new test case here. */
374  };
375  
376  struct test_case_data {
377  	int test_failed;
378  	int test_passed;
379  	u64 time;
380  };
381  
382  static struct test_driver {
383  	struct task_struct *task;
384  	struct test_case_data data[ARRAY_SIZE(test_case_array)];
385  
386  	unsigned long start;
387  	unsigned long stop;
388  } *tdriver;
389  
390  static void shuffle_array(int *arr, int n)
391  {
392  	unsigned int rnd;
393  	int i, j, x;
394  
395  	for (i = n - 1; i > 0; i--)  {
396  		get_random_bytes(&rnd, sizeof(rnd));
397  
398  		/* Cut the range. */
399  		j = rnd % i;
400  
401  		/* Swap indexes. */
402  		x = arr[i];
403  		arr[i] = arr[j];
404  		arr[j] = x;
405  	}
406  }
407  
408  static int test_func(void *private)
409  {
410  	struct test_driver *t = private;
411  	int random_array[ARRAY_SIZE(test_case_array)];
412  	int index, i, j;
413  	ktime_t kt;
414  	u64 delta;
415  
416  	for (i = 0; i < ARRAY_SIZE(test_case_array); i++)
417  		random_array[i] = i;
418  
419  	if (!sequential_test_order)
420  		shuffle_array(random_array, ARRAY_SIZE(test_case_array));
421  
422  	/*
423  	 * Block until initialization is done.
424  	 */
425  	down_read(&prepare_for_test_rwsem);
426  
427  	t->start = get_cycles();
428  	for (i = 0; i < ARRAY_SIZE(test_case_array); i++) {
429  		index = random_array[i];
430  
431  		/*
432  		 * Skip tests if run_test_mask has been specified.
433  		 */
434  		if (!((run_test_mask & (1 << index)) >> index))
435  			continue;
436  
437  		kt = ktime_get();
438  		for (j = 0; j < test_repeat_count; j++) {
439  			if (!test_case_array[index].test_func())
440  				t->data[index].test_passed++;
441  			else
442  				t->data[index].test_failed++;
443  		}
444  
445  		/*
446  		 * Take an average time that test took.
447  		 */
448  		delta = (u64) ktime_us_delta(ktime_get(), kt);
449  		do_div(delta, (u32) test_repeat_count);
450  
451  		t->data[index].time = delta;
452  	}
453  	t->stop = get_cycles();
454  
455  	up_read(&prepare_for_test_rwsem);
456  	test_report_one_done();
457  
458  	/*
459  	 * Wait for the kthread_stop() call.
460  	 */
461  	while (!kthread_should_stop())
462  		msleep(10);
463  
464  	return 0;
465  }
466  
467  static int
468  init_test_configurtion(void)
469  {
470  	/*
471  	 * A maximum number of workers is defined as hard-coded
472  	 * value and set to USHRT_MAX. We add such gap just in
473  	 * case and for potential heavy stressing.
474  	 */
475  	nr_threads = clamp(nr_threads, 1, (int) USHRT_MAX);
476  
477  	/* Allocate the space for test instances. */
478  	tdriver = kvcalloc(nr_threads, sizeof(*tdriver), GFP_KERNEL);
479  	if (tdriver == NULL)
480  		return -1;
481  
482  	if (test_repeat_count <= 0)
483  		test_repeat_count = 1;
484  
485  	if (test_loop_count <= 0)
486  		test_loop_count = 1;
487  
488  	return 0;
489  }
490  
491  static void do_concurrent_test(void)
492  {
493  	int i, ret;
494  
495  	/*
496  	 * Set some basic configurations plus sanity check.
497  	 */
498  	ret = init_test_configurtion();
499  	if (ret < 0)
500  		return;
501  
502  	/*
503  	 * Put on hold all workers.
504  	 */
505  	down_write(&prepare_for_test_rwsem);
506  
507  	for (i = 0; i < nr_threads; i++) {
508  		struct test_driver *t = &tdriver[i];
509  
510  		t->task = kthread_run(test_func, t, "vmalloc_test/%d", i);
511  
512  		if (!IS_ERR(t->task))
513  			/* Success. */
514  			atomic_inc(&test_n_undone);
515  		else
516  			pr_err("Failed to start %d kthread\n", i);
517  	}
518  
519  	/*
520  	 * Now let the workers do their job.
521  	 */
522  	up_write(&prepare_for_test_rwsem);
523  
524  	/*
525  	 * Sleep quiet until all workers are done with 1 second
526  	 * interval. Since the test can take a lot of time we
527  	 * can run into a stack trace of the hung task. That is
528  	 * why we go with completion_timeout and HZ value.
529  	 */
530  	do {
531  		ret = wait_for_completion_timeout(&test_all_done_comp, HZ);
532  	} while (!ret);
533  
534  	for (i = 0; i < nr_threads; i++) {
535  		struct test_driver *t = &tdriver[i];
536  		int j;
537  
538  		if (!IS_ERR(t->task))
539  			kthread_stop(t->task);
540  
541  		for (j = 0; j < ARRAY_SIZE(test_case_array); j++) {
542  			if (!((run_test_mask & (1 << j)) >> j))
543  				continue;
544  
545  			pr_info(
546  				"Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n",
547  				test_case_array[j].test_name,
548  				t->data[j].test_passed,
549  				t->data[j].test_failed,
550  				test_repeat_count, test_loop_count,
551  				t->data[j].time);
552  		}
553  
554  		pr_info("All test took worker%d=%lu cycles\n",
555  			i, t->stop - t->start);
556  	}
557  
558  	kvfree(tdriver);
559  }
560  
561  static int vmalloc_test_init(void)
562  {
563  	do_concurrent_test();
564  	return -EAGAIN; /* Fail will directly unload the module */
565  }
566  
567  static void vmalloc_test_exit(void)
568  {
569  }
570  
571  module_init(vmalloc_test_init)
572  module_exit(vmalloc_test_exit)
573  
574  MODULE_LICENSE("GPL");
575  MODULE_AUTHOR("Uladzislau Rezki");
576  MODULE_DESCRIPTION("vmalloc test module");
577