xref: /linux/lib/raid/xor/xor-core.c (revision e20043b4765cdf7ec8e963d706bb91469cba8cb8)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 1996, 1997, 1998, 1999, 2000,
4  * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson.
5  *
6  * Dispatch optimized XOR parity functions.
7  */
8 
9 #include <linux/module.h>
10 #include <linux/gfp.h>
11 #include <linux/raid/xor.h>
12 #include <linux/jiffies.h>
13 #include <linux/preempt.h>
14 #include "xor_impl.h"
15 
16 /* The xor routines to use.  */
17 static struct xor_block_template *active_template;
18 
19 void
20 xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs)
21 {
22 	unsigned long *p1, *p2, *p3, *p4;
23 
24 	WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count());
25 
26 	p1 = (unsigned long *) srcs[0];
27 	if (src_count == 1) {
28 		active_template->do_2(bytes, dest, p1);
29 		return;
30 	}
31 
32 	p2 = (unsigned long *) srcs[1];
33 	if (src_count == 2) {
34 		active_template->do_3(bytes, dest, p1, p2);
35 		return;
36 	}
37 
38 	p3 = (unsigned long *) srcs[2];
39 	if (src_count == 3) {
40 		active_template->do_4(bytes, dest, p1, p2, p3);
41 		return;
42 	}
43 
44 	p4 = (unsigned long *) srcs[3];
45 	active_template->do_5(bytes, dest, p1, p2, p3, p4);
46 }
47 EXPORT_SYMBOL(xor_blocks);
48 
49 /* Set of all registered templates.  */
50 static struct xor_block_template *__initdata template_list;
51 static bool __initdata xor_forced = false;
52 
53 /**
54  * xor_register - register a XOR template
55  * @tmpl:	template to register
56  *
57  * Register a XOR implementation with the core.  Registered implementations
58  * will be measured by a trivial benchmark, and the fastest one is chosen
59  * unless an implementation is forced using xor_force().
60  */
61 void __init xor_register(struct xor_block_template *tmpl)
62 {
63 	tmpl->next = template_list;
64 	template_list = tmpl;
65 }
66 
67 /**
68  * xor_force - force use of a XOR template
69  * @tmpl:	template to register
70  *
71  * Register a XOR implementation with the core and force using it.  Forcing
72  * an implementation will make the core ignore any template registered using
73  * xor_register(), or any previous implementation forced using xor_force().
74  */
75 void __init xor_force(struct xor_block_template *tmpl)
76 {
77 	active_template = tmpl;
78 }
79 
80 #define BENCH_SIZE	4096
81 #define REPS		800U
82 
83 static void __init
84 do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
85 {
86 	int speed;
87 	unsigned long reps;
88 	ktime_t min, start, t0;
89 
90 	preempt_disable();
91 
92 	reps = 0;
93 	t0 = ktime_get();
94 	/* delay start until time has advanced */
95 	while ((start = ktime_get()) == t0)
96 		cpu_relax();
97 	do {
98 		mb(); /* prevent loop optimization */
99 		tmpl->do_2(BENCH_SIZE, b1, b2);
100 		mb();
101 	} while (reps++ < REPS || (t0 = ktime_get()) == start);
102 	min = ktime_sub(t0, start);
103 
104 	preempt_enable();
105 
106 	// bytes/ns == GB/s, multiply by 1000 to get MB/s [not MiB/s]
107 	speed = (1000 * reps * BENCH_SIZE) / (unsigned int)ktime_to_ns(min);
108 	tmpl->speed = speed;
109 
110 	pr_info("   %-16s: %5d MB/sec\n", tmpl->name, speed);
111 }
112 
113 static int __init calibrate_xor_blocks(void)
114 {
115 	void *b1, *b2;
116 	struct xor_block_template *f, *fastest;
117 
118 	if (xor_forced)
119 		return 0;
120 
121 	b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
122 	if (!b1) {
123 		pr_warn("xor: Yikes!  No memory available.\n");
124 		return -ENOMEM;
125 	}
126 	b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
127 
128 	pr_info("xor: measuring software checksum speed\n");
129 	fastest = template_list;
130 	for (f = template_list; f; f = f->next) {
131 		do_xor_speed(f, b1, b2);
132 		if (f->speed > fastest->speed)
133 			fastest = f;
134 	}
135 	active_template = fastest;
136 	pr_info("xor: using function: %s (%d MB/sec)\n",
137 	       fastest->name, fastest->speed);
138 
139 	free_pages((unsigned long)b1, 2);
140 	return 0;
141 }
142 
143 #ifdef CONFIG_XOR_BLOCKS_ARCH
144 #include "xor_arch.h" /* $SRCARCH/xor_arch.h */
145 #else
146 static void __init arch_xor_init(void)
147 {
148 	xor_register(&xor_block_8regs);
149 	xor_register(&xor_block_8regs_p);
150 	xor_register(&xor_block_32regs);
151 	xor_register(&xor_block_32regs_p);
152 }
153 #endif /* CONFIG_XOR_BLOCKS_ARCH */
154 
155 static int __init xor_init(void)
156 {
157 	arch_xor_init();
158 
159 	/*
160 	 * If this arch/cpu has a short-circuited selection, don't loop through
161 	 * all the possible functions, just use the best one.
162 	 */
163 	if (active_template) {
164 		pr_info("xor: automatically using best checksumming function   %-10s\n",
165 			active_template->name);
166 		xor_forced = true;
167 		return 0;
168 	}
169 
170 #ifdef MODULE
171 	return calibrate_xor_blocks();
172 #else
173 	/*
174 	 * Pick the first template as the temporary default until calibration
175 	 * happens.
176 	 */
177 	active_template = template_list;
178 	return 0;
179 #endif
180 }
181 
182 static __exit void xor_exit(void)
183 {
184 }
185 
186 MODULE_DESCRIPTION("RAID-5 checksumming functions");
187 MODULE_LICENSE("GPL");
188 
189 /*
190  * When built-in we must register the default template before md, but we don't
191  * want calibration to run that early as that would delay the boot process.
192  */
193 #ifndef MODULE
194 __initcall(calibrate_xor_blocks);
195 #endif
196 core_initcall(xor_init);
197 module_exit(xor_exit);
198