1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 1996, 1997, 1998, 1999, 2000, 4 * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson. 5 * 6 * Dispatch optimized XOR parity functions. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/gfp.h> 11 #include <linux/raid/xor.h> 12 #include <linux/raid/xor_impl.h> 13 #include <linux/jiffies.h> 14 #include <linux/preempt.h> 15 #include <asm/xor.h> 16 17 /* The xor routines to use. */ 18 static struct xor_block_template *active_template; 19 20 void 21 xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs) 22 { 23 unsigned long *p1, *p2, *p3, *p4; 24 25 WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count()); 26 27 p1 = (unsigned long *) srcs[0]; 28 if (src_count == 1) { 29 active_template->do_2(bytes, dest, p1); 30 return; 31 } 32 33 p2 = (unsigned long *) srcs[1]; 34 if (src_count == 2) { 35 active_template->do_3(bytes, dest, p1, p2); 36 return; 37 } 38 39 p3 = (unsigned long *) srcs[2]; 40 if (src_count == 3) { 41 active_template->do_4(bytes, dest, p1, p2, p3); 42 return; 43 } 44 45 p4 = (unsigned long *) srcs[3]; 46 active_template->do_5(bytes, dest, p1, p2, p3, p4); 47 } 48 EXPORT_SYMBOL(xor_blocks); 49 50 /* Set of all registered templates. */ 51 static struct xor_block_template *__initdata template_list; 52 static bool __initdata xor_forced = false; 53 54 /** 55 * xor_register - register a XOR template 56 * @tmpl: template to register 57 * 58 * Register a XOR implementation with the core. Registered implementations 59 * will be measured by a trivial benchmark, and the fastest one is chosen 60 * unless an implementation is forced using xor_force(). 61 */ 62 void __init xor_register(struct xor_block_template *tmpl) 63 { 64 tmpl->next = template_list; 65 template_list = tmpl; 66 } 67 68 /** 69 * xor_force - force use of a XOR template 70 * @tmpl: template to register 71 * 72 * Register a XOR implementation with the core and force using it. Forcing 73 * an implementation will make the core ignore any template registered using 74 * xor_register(), or any previous implementation forced using xor_force(). 75 */ 76 void __init xor_force(struct xor_block_template *tmpl) 77 { 78 active_template = tmpl; 79 } 80 81 #define BENCH_SIZE 4096 82 #define REPS 800U 83 84 static void __init 85 do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) 86 { 87 int speed; 88 unsigned long reps; 89 ktime_t min, start, t0; 90 91 preempt_disable(); 92 93 reps = 0; 94 t0 = ktime_get(); 95 /* delay start until time has advanced */ 96 while ((start = ktime_get()) == t0) 97 cpu_relax(); 98 do { 99 mb(); /* prevent loop optimization */ 100 tmpl->do_2(BENCH_SIZE, b1, b2); 101 mb(); 102 } while (reps++ < REPS || (t0 = ktime_get()) == start); 103 min = ktime_sub(t0, start); 104 105 preempt_enable(); 106 107 // bytes/ns == GB/s, multiply by 1000 to get MB/s [not MiB/s] 108 speed = (1000 * reps * BENCH_SIZE) / (unsigned int)ktime_to_ns(min); 109 tmpl->speed = speed; 110 111 pr_info(" %-16s: %5d MB/sec\n", tmpl->name, speed); 112 } 113 114 static int __init calibrate_xor_blocks(void) 115 { 116 void *b1, *b2; 117 struct xor_block_template *f, *fastest; 118 119 if (xor_forced) 120 return 0; 121 122 b1 = (void *) __get_free_pages(GFP_KERNEL, 2); 123 if (!b1) { 124 pr_warn("xor: Yikes! No memory available.\n"); 125 return -ENOMEM; 126 } 127 b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE; 128 129 pr_info("xor: measuring software checksum speed\n"); 130 fastest = template_list; 131 for (f = template_list; f; f = f->next) { 132 do_xor_speed(f, b1, b2); 133 if (f->speed > fastest->speed) 134 fastest = f; 135 } 136 active_template = fastest; 137 pr_info("xor: using function: %s (%d MB/sec)\n", 138 fastest->name, fastest->speed); 139 140 free_pages((unsigned long)b1, 2); 141 return 0; 142 } 143 144 static int __init xor_init(void) 145 { 146 #ifdef arch_xor_init 147 arch_xor_init(); 148 #else 149 xor_register(&xor_block_8regs); 150 xor_register(&xor_block_8regs_p); 151 xor_register(&xor_block_32regs); 152 xor_register(&xor_block_32regs_p); 153 #endif 154 155 /* 156 * If this arch/cpu has a short-circuited selection, don't loop through 157 * all the possible functions, just use the best one. 158 */ 159 if (active_template) { 160 pr_info("xor: automatically using best checksumming function %-10s\n", 161 active_template->name); 162 xor_forced = true; 163 return 0; 164 } 165 166 #ifdef MODULE 167 return calibrate_xor_blocks(); 168 #else 169 /* 170 * Pick the first template as the temporary default until calibration 171 * happens. 172 */ 173 active_template = template_list; 174 return 0; 175 #endif 176 } 177 178 static __exit void xor_exit(void) 179 { 180 } 181 182 MODULE_DESCRIPTION("RAID-5 checksumming functions"); 183 MODULE_LICENSE("GPL"); 184 185 /* 186 * When built-in we must register the default template before md, but we don't 187 * want calibration to run that early as that would delay the boot process. 188 */ 189 #ifndef MODULE 190 __initcall(calibrate_xor_blocks); 191 #endif 192 core_initcall(xor_init); 193 module_exit(xor_exit); 194