1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 1996, 1997, 1998, 1999, 2000, 4 * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson. 5 * 6 * Dispatch optimized XOR parity functions. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/gfp.h> 11 #include <linux/raid/xor.h> 12 #include <linux/jiffies.h> 13 #include <linux/preempt.h> 14 #include "xor_impl.h" 15 16 /* The xor routines to use. */ 17 static struct xor_block_template *active_template; 18 19 void 20 xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs) 21 { 22 unsigned long *p1, *p2, *p3, *p4; 23 24 WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count()); 25 26 p1 = (unsigned long *) srcs[0]; 27 if (src_count == 1) { 28 active_template->do_2(bytes, dest, p1); 29 return; 30 } 31 32 p2 = (unsigned long *) srcs[1]; 33 if (src_count == 2) { 34 active_template->do_3(bytes, dest, p1, p2); 35 return; 36 } 37 38 p3 = (unsigned long *) srcs[2]; 39 if (src_count == 3) { 40 active_template->do_4(bytes, dest, p1, p2, p3); 41 return; 42 } 43 44 p4 = (unsigned long *) srcs[3]; 45 active_template->do_5(bytes, dest, p1, p2, p3, p4); 46 } 47 EXPORT_SYMBOL(xor_blocks); 48 49 /* Set of all registered templates. */ 50 static struct xor_block_template *__initdata template_list; 51 static bool __initdata xor_forced = false; 52 53 /** 54 * xor_register - register a XOR template 55 * @tmpl: template to register 56 * 57 * Register a XOR implementation with the core. Registered implementations 58 * will be measured by a trivial benchmark, and the fastest one is chosen 59 * unless an implementation is forced using xor_force(). 60 */ 61 void __init xor_register(struct xor_block_template *tmpl) 62 { 63 tmpl->next = template_list; 64 template_list = tmpl; 65 } 66 67 /** 68 * xor_force - force use of a XOR template 69 * @tmpl: template to register 70 * 71 * Register a XOR implementation with the core and force using it. Forcing 72 * an implementation will make the core ignore any template registered using 73 * xor_register(), or any previous implementation forced using xor_force(). 74 */ 75 void __init xor_force(struct xor_block_template *tmpl) 76 { 77 active_template = tmpl; 78 } 79 80 #define BENCH_SIZE 4096 81 #define REPS 800U 82 83 static void __init 84 do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) 85 { 86 int speed; 87 unsigned long reps; 88 ktime_t min, start, t0; 89 90 preempt_disable(); 91 92 reps = 0; 93 t0 = ktime_get(); 94 /* delay start until time has advanced */ 95 while ((start = ktime_get()) == t0) 96 cpu_relax(); 97 do { 98 mb(); /* prevent loop optimization */ 99 tmpl->do_2(BENCH_SIZE, b1, b2); 100 mb(); 101 } while (reps++ < REPS || (t0 = ktime_get()) == start); 102 min = ktime_sub(t0, start); 103 104 preempt_enable(); 105 106 // bytes/ns == GB/s, multiply by 1000 to get MB/s [not MiB/s] 107 speed = (1000 * reps * BENCH_SIZE) / (unsigned int)ktime_to_ns(min); 108 tmpl->speed = speed; 109 110 pr_info(" %-16s: %5d MB/sec\n", tmpl->name, speed); 111 } 112 113 static int __init calibrate_xor_blocks(void) 114 { 115 void *b1, *b2; 116 struct xor_block_template *f, *fastest; 117 118 if (xor_forced) 119 return 0; 120 121 b1 = (void *) __get_free_pages(GFP_KERNEL, 2); 122 if (!b1) { 123 pr_warn("xor: Yikes! No memory available.\n"); 124 return -ENOMEM; 125 } 126 b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE; 127 128 pr_info("xor: measuring software checksum speed\n"); 129 fastest = template_list; 130 for (f = template_list; f; f = f->next) { 131 do_xor_speed(f, b1, b2); 132 if (f->speed > fastest->speed) 133 fastest = f; 134 } 135 active_template = fastest; 136 pr_info("xor: using function: %s (%d MB/sec)\n", 137 fastest->name, fastest->speed); 138 139 free_pages((unsigned long)b1, 2); 140 return 0; 141 } 142 143 #ifdef CONFIG_XOR_BLOCKS_ARCH 144 #include "xor_arch.h" /* $SRCARCH/xor_arch.h */ 145 #else 146 static void __init arch_xor_init(void) 147 { 148 xor_register(&xor_block_8regs); 149 xor_register(&xor_block_8regs_p); 150 xor_register(&xor_block_32regs); 151 xor_register(&xor_block_32regs_p); 152 } 153 #endif /* CONFIG_XOR_BLOCKS_ARCH */ 154 155 static int __init xor_init(void) 156 { 157 arch_xor_init(); 158 159 /* 160 * If this arch/cpu has a short-circuited selection, don't loop through 161 * all the possible functions, just use the best one. 162 */ 163 if (active_template) { 164 pr_info("xor: automatically using best checksumming function %-10s\n", 165 active_template->name); 166 xor_forced = true; 167 return 0; 168 } 169 170 #ifdef MODULE 171 return calibrate_xor_blocks(); 172 #else 173 /* 174 * Pick the first template as the temporary default until calibration 175 * happens. 176 */ 177 active_template = template_list; 178 return 0; 179 #endif 180 } 181 182 static __exit void xor_exit(void) 183 { 184 } 185 186 MODULE_DESCRIPTION("RAID-5 checksumming functions"); 187 MODULE_LICENSE("GPL"); 188 189 /* 190 * When built-in we must register the default template before md, but we don't 191 * want calibration to run that early as that would delay the boot process. 192 */ 193 #ifndef MODULE 194 __initcall(calibrate_xor_blocks); 195 #endif 196 core_initcall(xor_init); 197 module_exit(xor_exit); 198