xref: /linux/lib/raid/xor/xor-core.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 1996, 1997, 1998, 1999, 2000,
4  * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson.
5  *
6  * Dispatch optimized XOR parity functions.
7  */
8 
9 #include <linux/module.h>
10 #include <linux/gfp.h>
11 #include <linux/slab.h>
12 #include <linux/raid/xor.h>
13 #include <linux/jiffies.h>
14 #include <linux/preempt.h>
15 #include <linux/static_call.h>
16 #include "xor_impl.h"
17 
18 DEFINE_STATIC_CALL_NULL(xor_gen_impl, *xor_block_8regs.xor_gen);
19 
20 /**
21  * xor_gen - generate RAID-style XOR information
22  * @dest:	destination vector
23  * @srcs:	source vectors
24  * @src_cnt:	number of source vectors
25  * @bytes:	length in bytes of each vector
26  *
27  * Performs bit-wise XOR operation into @dest for each of the @src_cnt vectors
28  * in @srcs for a length of @bytes bytes.  @src_cnt must be non-zero, and the
29  * memory pointed to by @dest and each member of @srcs must be at least 64-byte
30  * aligned.  @bytes must be non-zero and a multiple of 512.
31  *
32  * Note: for typical RAID uses, @dest either needs to be zeroed, or filled with
33  * the first disk, which then needs to be removed from @srcs.
34  */
35 void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes)
36 {
37 	WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count());
38 	WARN_ON_ONCE(bytes == 0);
39 	WARN_ON_ONCE(bytes & 511);
40 
41 	static_call(xor_gen_impl)(dest, srcs, src_cnt, bytes);
42 }
43 EXPORT_SYMBOL(xor_gen);
44 
45 /* Set of all registered templates.  */
46 static struct xor_block_template *__initdata template_list;
47 static struct xor_block_template *forced_template;
48 
49 /**
50  * xor_register - register a XOR template
51  * @tmpl:	template to register
52  *
53  * Register a XOR implementation with the core.  Registered implementations
54  * will be measured by a trivial benchmark, and the fastest one is chosen
55  * unless an implementation is forced using xor_force().
56  */
57 void __init xor_register(struct xor_block_template *tmpl)
58 {
59 	tmpl->next = template_list;
60 	template_list = tmpl;
61 }
62 
63 /**
64  * xor_force - force use of a XOR template
65  * @tmpl:	template to register
66  *
67  * Register a XOR implementation with the core and force using it.  Forcing
68  * an implementation will make the core ignore any template registered using
69  * xor_register(), or any previous implementation forced using xor_force().
70  */
71 void __init xor_force(struct xor_block_template *tmpl)
72 {
73 	forced_template = tmpl;
74 }
75 
76 #define BENCH_SIZE	4096
77 #define REPS		800U
78 
79 static void __init
80 do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
81 {
82 	int speed;
83 	unsigned long reps;
84 	ktime_t min, start, t0;
85 	void *srcs[1] = { b2 };
86 
87 	preempt_disable();
88 
89 	reps = 0;
90 	t0 = ktime_get();
91 	/* delay start until time has advanced */
92 	while ((start = ktime_get()) == t0)
93 		cpu_relax();
94 	do {
95 		mb(); /* prevent loop optimization */
96 		tmpl->xor_gen(b1, srcs, 1, BENCH_SIZE);
97 		mb();
98 	} while (reps++ < REPS || (t0 = ktime_get()) == start);
99 	min = ktime_sub(t0, start);
100 
101 	preempt_enable();
102 
103 	// bytes/ns == GB/s, multiply by 1000 to get MB/s [not MiB/s]
104 	speed = (1000 * reps * BENCH_SIZE) / (unsigned int)ktime_to_ns(min);
105 	tmpl->speed = speed;
106 
107 	pr_info("   %-16s: %5d MB/sec\n", tmpl->name, speed);
108 }
109 
110 static int __init calibrate_xor_blocks(void)
111 {
112 	void *b1, *b2;
113 	struct xor_block_template *f, *fastest;
114 
115 	if (forced_template)
116 		return 0;
117 
118 	b1 = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
119 	if (!b1) {
120 		pr_warn("xor: Yikes!  No memory available.\n");
121 		return -ENOMEM;
122 	}
123 	b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
124 
125 	pr_info("xor: measuring software checksum speed\n");
126 	fastest = template_list;
127 	for (f = template_list; f; f = f->next) {
128 		do_xor_speed(f, b1, b2);
129 		if (f->speed > fastest->speed)
130 			fastest = f;
131 	}
132 	static_call_update(xor_gen_impl, fastest->xor_gen);
133 	pr_info("xor: using function: %s (%d MB/sec)\n",
134 	       fastest->name, fastest->speed);
135 
136 	kfree(b1);
137 	return 0;
138 }
139 
140 #ifdef CONFIG_XOR_BLOCKS_ARCH
141 #include "xor_arch.h" /* $SRCARCH/xor_arch.h */
142 #else
143 static void __init arch_xor_init(void)
144 {
145 	xor_register(&xor_block_8regs);
146 	xor_register(&xor_block_8regs_p);
147 	xor_register(&xor_block_32regs);
148 	xor_register(&xor_block_32regs_p);
149 }
150 #endif /* CONFIG_XOR_BLOCKS_ARCH */
151 
152 static int __init xor_init(void)
153 {
154 	arch_xor_init();
155 
156 	/*
157 	 * If this arch/cpu has a short-circuited selection, don't loop through
158 	 * all the possible functions, just use the best one.
159 	 */
160 	if (forced_template) {
161 		pr_info("xor: automatically using best checksumming function   %-10s\n",
162 			forced_template->name);
163 		static_call_update(xor_gen_impl, forced_template->xor_gen);
164 		return 0;
165 	}
166 
167 #ifdef MODULE
168 	return calibrate_xor_blocks();
169 #else
170 	/*
171 	 * Pick the first template as the temporary default until calibration
172 	 * happens.
173 	 */
174 	static_call_update(xor_gen_impl, template_list->xor_gen);
175 	return 0;
176 #endif
177 }
178 
179 static __exit void xor_exit(void)
180 {
181 }
182 
183 MODULE_DESCRIPTION("RAID-5 checksumming functions");
184 MODULE_LICENSE("GPL");
185 
186 /*
187  * When built-in we must register the default template before md, but we don't
188  * want calibration to run that early as that would delay the boot process.
189  */
190 #ifndef MODULE
191 __initcall(calibrate_xor_blocks);
192 #endif
193 core_initcall(xor_init);
194 module_exit(xor_exit);
195