1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 1996, 1997, 1998, 1999, 2000, 4 * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson. 5 * 6 * Dispatch optimized XOR parity functions. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/gfp.h> 11 #include <linux/raid/xor.h> 12 #include <linux/jiffies.h> 13 #include <linux/preempt.h> 14 #include <linux/static_call.h> 15 #include "xor_impl.h" 16 17 DEFINE_STATIC_CALL_NULL(xor_gen_impl, *xor_block_8regs.xor_gen); 18 19 /** 20 * xor_gen - generate RAID-style XOR information 21 * @dest: destination vector 22 * @srcs: source vectors 23 * @src_cnt: number of source vectors 24 * @bytes: length in bytes of each vector 25 * 26 * Performs bit-wise XOR operation into @dest for each of the @src_cnt vectors 27 * in @srcs for a length of @bytes bytes. @src_cnt must be non-zero, and the 28 * memory pointed to by @dest and each member of @srcs must be at least 64-byte 29 * aligned. @bytes must be non-zero and a multiple of 512. 30 * 31 * Note: for typical RAID uses, @dest either needs to be zeroed, or filled with 32 * the first disk, which then needs to be removed from @srcs. 33 */ 34 void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes) 35 { 36 WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count()); 37 WARN_ON_ONCE(bytes == 0); 38 WARN_ON_ONCE(bytes & 511); 39 40 static_call(xor_gen_impl)(dest, srcs, src_cnt, bytes); 41 } 42 EXPORT_SYMBOL(xor_gen); 43 44 /* Set of all registered templates. */ 45 static struct xor_block_template *__initdata template_list; 46 static struct xor_block_template *forced_template; 47 48 /** 49 * xor_register - register a XOR template 50 * @tmpl: template to register 51 * 52 * Register a XOR implementation with the core. Registered implementations 53 * will be measured by a trivial benchmark, and the fastest one is chosen 54 * unless an implementation is forced using xor_force(). 55 */ 56 void __init xor_register(struct xor_block_template *tmpl) 57 { 58 tmpl->next = template_list; 59 template_list = tmpl; 60 } 61 62 /** 63 * xor_force - force use of a XOR template 64 * @tmpl: template to register 65 * 66 * Register a XOR implementation with the core and force using it. Forcing 67 * an implementation will make the core ignore any template registered using 68 * xor_register(), or any previous implementation forced using xor_force(). 69 */ 70 void __init xor_force(struct xor_block_template *tmpl) 71 { 72 forced_template = tmpl; 73 } 74 75 #define BENCH_SIZE 4096 76 #define REPS 800U 77 78 static void __init 79 do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) 80 { 81 int speed; 82 unsigned long reps; 83 ktime_t min, start, t0; 84 void *srcs[1] = { b2 }; 85 86 preempt_disable(); 87 88 reps = 0; 89 t0 = ktime_get(); 90 /* delay start until time has advanced */ 91 while ((start = ktime_get()) == t0) 92 cpu_relax(); 93 do { 94 mb(); /* prevent loop optimization */ 95 tmpl->xor_gen(b1, srcs, 1, BENCH_SIZE); 96 mb(); 97 } while (reps++ < REPS || (t0 = ktime_get()) == start); 98 min = ktime_sub(t0, start); 99 100 preempt_enable(); 101 102 // bytes/ns == GB/s, multiply by 1000 to get MB/s [not MiB/s] 103 speed = (1000 * reps * BENCH_SIZE) / (unsigned int)ktime_to_ns(min); 104 tmpl->speed = speed; 105 106 pr_info(" %-16s: %5d MB/sec\n", tmpl->name, speed); 107 } 108 109 static int __init calibrate_xor_blocks(void) 110 { 111 void *b1, *b2; 112 struct xor_block_template *f, *fastest; 113 114 if (forced_template) 115 return 0; 116 117 b1 = (void *) __get_free_pages(GFP_KERNEL, 2); 118 if (!b1) { 119 pr_warn("xor: Yikes! No memory available.\n"); 120 return -ENOMEM; 121 } 122 b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE; 123 124 pr_info("xor: measuring software checksum speed\n"); 125 fastest = template_list; 126 for (f = template_list; f; f = f->next) { 127 do_xor_speed(f, b1, b2); 128 if (f->speed > fastest->speed) 129 fastest = f; 130 } 131 static_call_update(xor_gen_impl, fastest->xor_gen); 132 pr_info("xor: using function: %s (%d MB/sec)\n", 133 fastest->name, fastest->speed); 134 135 free_pages((unsigned long)b1, 2); 136 return 0; 137 } 138 139 #ifdef CONFIG_XOR_BLOCKS_ARCH 140 #include "xor_arch.h" /* $SRCARCH/xor_arch.h */ 141 #else 142 static void __init arch_xor_init(void) 143 { 144 xor_register(&xor_block_8regs); 145 xor_register(&xor_block_8regs_p); 146 xor_register(&xor_block_32regs); 147 xor_register(&xor_block_32regs_p); 148 } 149 #endif /* CONFIG_XOR_BLOCKS_ARCH */ 150 151 static int __init xor_init(void) 152 { 153 arch_xor_init(); 154 155 /* 156 * If this arch/cpu has a short-circuited selection, don't loop through 157 * all the possible functions, just use the best one. 158 */ 159 if (forced_template) { 160 pr_info("xor: automatically using best checksumming function %-10s\n", 161 forced_template->name); 162 static_call_update(xor_gen_impl, forced_template->xor_gen); 163 return 0; 164 } 165 166 #ifdef MODULE 167 return calibrate_xor_blocks(); 168 #else 169 /* 170 * Pick the first template as the temporary default until calibration 171 * happens. 172 */ 173 static_call_update(xor_gen_impl, template_list->xor_gen); 174 return 0; 175 #endif 176 } 177 178 static __exit void xor_exit(void) 179 { 180 } 181 182 MODULE_DESCRIPTION("RAID-5 checksumming functions"); 183 MODULE_LICENSE("GPL"); 184 185 /* 186 * When built-in we must register the default template before md, but we don't 187 * want calibration to run that early as that would delay the boot process. 188 */ 189 #ifndef MODULE 190 __initcall(calibrate_xor_blocks); 191 #endif 192 core_initcall(xor_init); 193 module_exit(xor_exit); 194