1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 1996, 1997, 1998, 1999, 2000, 4 * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson. 5 * 6 * Dispatch optimized XOR parity functions. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/gfp.h> 11 #include <linux/slab.h> 12 #include <linux/raid/xor.h> 13 #include <linux/jiffies.h> 14 #include <linux/preempt.h> 15 #include <linux/static_call.h> 16 #include "xor_impl.h" 17 18 DEFINE_STATIC_CALL_NULL(xor_gen_impl, *xor_block_8regs.xor_gen); 19 20 /** 21 * xor_gen - generate RAID-style XOR information 22 * @dest: destination vector 23 * @srcs: source vectors 24 * @src_cnt: number of source vectors 25 * @bytes: length in bytes of each vector 26 * 27 * Performs bit-wise XOR operation into @dest for each of the @src_cnt vectors 28 * in @srcs for a length of @bytes bytes. @src_cnt must be non-zero, and the 29 * memory pointed to by @dest and each member of @srcs must be at least 64-byte 30 * aligned. @bytes must be non-zero and a multiple of 512. 31 * 32 * Note: for typical RAID uses, @dest either needs to be zeroed, or filled with 33 * the first disk, which then needs to be removed from @srcs. 34 */ 35 void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes) 36 { 37 WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count()); 38 WARN_ON_ONCE(bytes == 0); 39 WARN_ON_ONCE(bytes & 511); 40 41 static_call(xor_gen_impl)(dest, srcs, src_cnt, bytes); 42 } 43 EXPORT_SYMBOL(xor_gen); 44 45 /* Set of all registered templates. */ 46 static struct xor_block_template *__initdata template_list; 47 static struct xor_block_template *forced_template; 48 49 /** 50 * xor_register - register a XOR template 51 * @tmpl: template to register 52 * 53 * Register a XOR implementation with the core. Registered implementations 54 * will be measured by a trivial benchmark, and the fastest one is chosen 55 * unless an implementation is forced using xor_force(). 56 */ 57 void __init xor_register(struct xor_block_template *tmpl) 58 { 59 tmpl->next = template_list; 60 template_list = tmpl; 61 } 62 63 /** 64 * xor_force - force use of a XOR template 65 * @tmpl: template to register 66 * 67 * Register a XOR implementation with the core and force using it. Forcing 68 * an implementation will make the core ignore any template registered using 69 * xor_register(), or any previous implementation forced using xor_force(). 70 */ 71 void __init xor_force(struct xor_block_template *tmpl) 72 { 73 forced_template = tmpl; 74 } 75 76 #define BENCH_SIZE 4096 77 #define REPS 800U 78 79 static void __init 80 do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) 81 { 82 int speed; 83 unsigned long reps; 84 ktime_t min, start, t0; 85 void *srcs[1] = { b2 }; 86 87 preempt_disable(); 88 89 reps = 0; 90 t0 = ktime_get(); 91 /* delay start until time has advanced */ 92 while ((start = ktime_get()) == t0) 93 cpu_relax(); 94 do { 95 mb(); /* prevent loop optimization */ 96 tmpl->xor_gen(b1, srcs, 1, BENCH_SIZE); 97 mb(); 98 } while (reps++ < REPS || (t0 = ktime_get()) == start); 99 min = ktime_sub(t0, start); 100 101 preempt_enable(); 102 103 // bytes/ns == GB/s, multiply by 1000 to get MB/s [not MiB/s] 104 speed = (1000 * reps * BENCH_SIZE) / (unsigned int)ktime_to_ns(min); 105 tmpl->speed = speed; 106 107 pr_info(" %-16s: %5d MB/sec\n", tmpl->name, speed); 108 } 109 110 static int __init calibrate_xor_blocks(void) 111 { 112 void *b1, *b2; 113 struct xor_block_template *f, *fastest; 114 115 if (forced_template) 116 return 0; 117 118 b1 = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); 119 if (!b1) { 120 pr_warn("xor: Yikes! No memory available.\n"); 121 return -ENOMEM; 122 } 123 b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE; 124 125 pr_info("xor: measuring software checksum speed\n"); 126 fastest = template_list; 127 for (f = template_list; f; f = f->next) { 128 do_xor_speed(f, b1, b2); 129 if (f->speed > fastest->speed) 130 fastest = f; 131 } 132 static_call_update(xor_gen_impl, fastest->xor_gen); 133 pr_info("xor: using function: %s (%d MB/sec)\n", 134 fastest->name, fastest->speed); 135 136 kfree(b1); 137 return 0; 138 } 139 140 #ifdef CONFIG_XOR_BLOCKS_ARCH 141 #include "xor_arch.h" /* $SRCARCH/xor_arch.h */ 142 #else 143 static void __init arch_xor_init(void) 144 { 145 xor_register(&xor_block_8regs); 146 xor_register(&xor_block_8regs_p); 147 xor_register(&xor_block_32regs); 148 xor_register(&xor_block_32regs_p); 149 } 150 #endif /* CONFIG_XOR_BLOCKS_ARCH */ 151 152 static int __init xor_init(void) 153 { 154 arch_xor_init(); 155 156 /* 157 * If this arch/cpu has a short-circuited selection, don't loop through 158 * all the possible functions, just use the best one. 159 */ 160 if (forced_template) { 161 pr_info("xor: automatically using best checksumming function %-10s\n", 162 forced_template->name); 163 static_call_update(xor_gen_impl, forced_template->xor_gen); 164 return 0; 165 } 166 167 #ifdef MODULE 168 return calibrate_xor_blocks(); 169 #else 170 /* 171 * Pick the first template as the temporary default until calibration 172 * happens. 173 */ 174 static_call_update(xor_gen_impl, template_list->xor_gen); 175 return 0; 176 #endif 177 } 178 179 static __exit void xor_exit(void) 180 { 181 } 182 183 MODULE_DESCRIPTION("RAID-5 checksumming functions"); 184 MODULE_LICENSE("GPL"); 185 186 /* 187 * When built-in we must register the default template before md, but we don't 188 * want calibration to run that early as that would delay the boot process. 189 */ 190 #ifndef MODULE 191 __initcall(calibrate_xor_blocks); 192 #endif 193 core_initcall(xor_init); 194 module_exit(xor_exit); 195