xref: /linux/lib/raid/xor/xor-core.c (revision 440d6635b20037bc9ad46b20817d7b61cef0fc1b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 1996, 1997, 1998, 1999, 2000,
4  * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson.
5  *
6  * Dispatch optimized XOR parity functions.
7  */
8 
9 #include <linux/module.h>
10 #include <linux/gfp.h>
11 #include <linux/raid/xor.h>
12 #include <linux/jiffies.h>
13 #include <linux/preempt.h>
14 #include <linux/static_call.h>
15 #include "xor_impl.h"
16 
17 DEFINE_STATIC_CALL_NULL(xor_gen_impl, *xor_block_8regs.xor_gen);
18 
19 /**
20  * xor_gen - generate RAID-style XOR information
21  * @dest:	destination vector
22  * @srcs:	source vectors
23  * @src_cnt:	number of source vectors
24  * @bytes:	length in bytes of each vector
25  *
26  * Performs bit-wise XOR operation into @dest for each of the @src_cnt vectors
27  * in @srcs for a length of @bytes bytes.  @src_cnt must be non-zero, and the
28  * memory pointed to by @dest and each member of @srcs must be at least 64-byte
29  * aligned.  @bytes must be non-zero and a multiple of 512.
30  *
31  * Note: for typical RAID uses, @dest either needs to be zeroed, or filled with
32  * the first disk, which then needs to be removed from @srcs.
33  */
34 void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes)
35 {
36 	WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count());
37 	WARN_ON_ONCE(bytes == 0);
38 	WARN_ON_ONCE(bytes & 511);
39 
40 	static_call(xor_gen_impl)(dest, srcs, src_cnt, bytes);
41 }
42 EXPORT_SYMBOL(xor_gen);
43 
44 /* Set of all registered templates.  */
45 static struct xor_block_template *__initdata template_list;
46 static struct xor_block_template *forced_template;
47 
48 /**
49  * xor_register - register a XOR template
50  * @tmpl:	template to register
51  *
52  * Register a XOR implementation with the core.  Registered implementations
53  * will be measured by a trivial benchmark, and the fastest one is chosen
54  * unless an implementation is forced using xor_force().
55  */
56 void __init xor_register(struct xor_block_template *tmpl)
57 {
58 	tmpl->next = template_list;
59 	template_list = tmpl;
60 }
61 
62 /**
63  * xor_force - force use of a XOR template
64  * @tmpl:	template to register
65  *
66  * Register a XOR implementation with the core and force using it.  Forcing
67  * an implementation will make the core ignore any template registered using
68  * xor_register(), or any previous implementation forced using xor_force().
69  */
70 void __init xor_force(struct xor_block_template *tmpl)
71 {
72 	forced_template = tmpl;
73 }
74 
75 #define BENCH_SIZE	4096
76 #define REPS		800U
77 
78 static void __init
79 do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
80 {
81 	int speed;
82 	unsigned long reps;
83 	ktime_t min, start, t0;
84 	void *srcs[1] = { b2 };
85 
86 	preempt_disable();
87 
88 	reps = 0;
89 	t0 = ktime_get();
90 	/* delay start until time has advanced */
91 	while ((start = ktime_get()) == t0)
92 		cpu_relax();
93 	do {
94 		mb(); /* prevent loop optimization */
95 		tmpl->xor_gen(b1, srcs, 1, BENCH_SIZE);
96 		mb();
97 	} while (reps++ < REPS || (t0 = ktime_get()) == start);
98 	min = ktime_sub(t0, start);
99 
100 	preempt_enable();
101 
102 	// bytes/ns == GB/s, multiply by 1000 to get MB/s [not MiB/s]
103 	speed = (1000 * reps * BENCH_SIZE) / (unsigned int)ktime_to_ns(min);
104 	tmpl->speed = speed;
105 
106 	pr_info("   %-16s: %5d MB/sec\n", tmpl->name, speed);
107 }
108 
109 static int __init calibrate_xor_blocks(void)
110 {
111 	void *b1, *b2;
112 	struct xor_block_template *f, *fastest;
113 
114 	if (forced_template)
115 		return 0;
116 
117 	b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
118 	if (!b1) {
119 		pr_warn("xor: Yikes!  No memory available.\n");
120 		return -ENOMEM;
121 	}
122 	b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
123 
124 	pr_info("xor: measuring software checksum speed\n");
125 	fastest = template_list;
126 	for (f = template_list; f; f = f->next) {
127 		do_xor_speed(f, b1, b2);
128 		if (f->speed > fastest->speed)
129 			fastest = f;
130 	}
131 	static_call_update(xor_gen_impl, fastest->xor_gen);
132 	pr_info("xor: using function: %s (%d MB/sec)\n",
133 	       fastest->name, fastest->speed);
134 
135 	free_pages((unsigned long)b1, 2);
136 	return 0;
137 }
138 
139 #ifdef CONFIG_XOR_BLOCKS_ARCH
140 #include "xor_arch.h" /* $SRCARCH/xor_arch.h */
141 #else
142 static void __init arch_xor_init(void)
143 {
144 	xor_register(&xor_block_8regs);
145 	xor_register(&xor_block_8regs_p);
146 	xor_register(&xor_block_32regs);
147 	xor_register(&xor_block_32regs_p);
148 }
149 #endif /* CONFIG_XOR_BLOCKS_ARCH */
150 
151 static int __init xor_init(void)
152 {
153 	arch_xor_init();
154 
155 	/*
156 	 * If this arch/cpu has a short-circuited selection, don't loop through
157 	 * all the possible functions, just use the best one.
158 	 */
159 	if (forced_template) {
160 		pr_info("xor: automatically using best checksumming function   %-10s\n",
161 			forced_template->name);
162 		static_call_update(xor_gen_impl, forced_template->xor_gen);
163 		return 0;
164 	}
165 
166 #ifdef MODULE
167 	return calibrate_xor_blocks();
168 #else
169 	/*
170 	 * Pick the first template as the temporary default until calibration
171 	 * happens.
172 	 */
173 	static_call_update(xor_gen_impl, template_list->xor_gen);
174 	return 0;
175 #endif
176 }
177 
178 static __exit void xor_exit(void)
179 {
180 }
181 
182 MODULE_DESCRIPTION("RAID-5 checksumming functions");
183 MODULE_LICENSE("GPL");
184 
185 /*
186  * When built-in we must register the default template before md, but we don't
187  * want calibration to run that early as that would delay the boot process.
188  */
189 #ifndef MODULE
190 __initcall(calibrate_xor_blocks);
191 #endif
192 core_initcall(xor_init);
193 module_exit(xor_exit);
194