1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 1996, 1997, 1998, 1999, 2000, 4 * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson. 5 * 6 * Dispatch optimized XOR parity functions. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/gfp.h> 11 #include <linux/raid/xor.h> 12 #include <linux/raid/xor_impl.h> 13 #include <linux/jiffies.h> 14 #include <linux/preempt.h> 15 #include <asm/xor.h> 16 17 #ifndef XOR_SELECT_TEMPLATE 18 #define XOR_SELECT_TEMPLATE(x) (x) 19 #endif 20 21 /* The xor routines to use. */ 22 static struct xor_block_template *active_template; 23 24 void 25 xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs) 26 { 27 unsigned long *p1, *p2, *p3, *p4; 28 29 WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count()); 30 31 p1 = (unsigned long *) srcs[0]; 32 if (src_count == 1) { 33 active_template->do_2(bytes, dest, p1); 34 return; 35 } 36 37 p2 = (unsigned long *) srcs[1]; 38 if (src_count == 2) { 39 active_template->do_3(bytes, dest, p1, p2); 40 return; 41 } 42 43 p3 = (unsigned long *) srcs[2]; 44 if (src_count == 3) { 45 active_template->do_4(bytes, dest, p1, p2, p3); 46 return; 47 } 48 49 p4 = (unsigned long *) srcs[3]; 50 active_template->do_5(bytes, dest, p1, p2, p3, p4); 51 } 52 EXPORT_SYMBOL(xor_blocks); 53 54 /* Set of all registered templates. */ 55 static struct xor_block_template *__initdata template_list; 56 static bool __initdata xor_forced = false; 57 58 static void __init do_xor_register(struct xor_block_template *tmpl) 59 { 60 tmpl->next = template_list; 61 template_list = tmpl; 62 } 63 64 #define BENCH_SIZE 4096 65 #define REPS 800U 66 67 static void __init 68 do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) 69 { 70 int speed; 71 unsigned long reps; 72 ktime_t min, start, t0; 73 74 preempt_disable(); 75 76 reps = 0; 77 t0 = ktime_get(); 78 /* delay start until time has advanced */ 79 while ((start = ktime_get()) == t0) 80 cpu_relax(); 81 do { 82 mb(); /* prevent loop optimization */ 83 tmpl->do_2(BENCH_SIZE, b1, b2); 84 mb(); 85 } while (reps++ < REPS || (t0 = ktime_get()) == start); 86 min = ktime_sub(t0, start); 87 88 preempt_enable(); 89 90 // bytes/ns == GB/s, multiply by 1000 to get MB/s [not MiB/s] 91 speed = (1000 * reps * BENCH_SIZE) / (unsigned int)ktime_to_ns(min); 92 tmpl->speed = speed; 93 94 pr_info(" %-16s: %5d MB/sec\n", tmpl->name, speed); 95 } 96 97 static int __init calibrate_xor_blocks(void) 98 { 99 void *b1, *b2; 100 struct xor_block_template *f, *fastest; 101 102 if (xor_forced) 103 return 0; 104 105 b1 = (void *) __get_free_pages(GFP_KERNEL, 2); 106 if (!b1) { 107 pr_warn("xor: Yikes! No memory available.\n"); 108 return -ENOMEM; 109 } 110 b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE; 111 112 pr_info("xor: measuring software checksum speed\n"); 113 fastest = template_list; 114 for (f = template_list; f; f = f->next) { 115 do_xor_speed(f, b1, b2); 116 if (f->speed > fastest->speed) 117 fastest = f; 118 } 119 active_template = fastest; 120 pr_info("xor: using function: %s (%d MB/sec)\n", 121 fastest->name, fastest->speed); 122 123 free_pages((unsigned long)b1, 2); 124 return 0; 125 } 126 127 static int __init xor_init(void) 128 { 129 /* 130 * If this arch/cpu has a short-circuited selection, don't loop through 131 * all the possible functions, just use the best one. 132 */ 133 active_template = XOR_SELECT_TEMPLATE(NULL); 134 if (active_template) { 135 pr_info("xor: automatically using best checksumming function %-10s\n", 136 active_template->name); 137 xor_forced = true; 138 return 0; 139 } 140 141 #define xor_speed do_xor_register 142 XOR_TRY_TEMPLATES; 143 #undef xor_speed 144 145 #ifdef MODULE 146 return calibrate_xor_blocks(); 147 #else 148 /* 149 * Pick the first template as the temporary default until calibration 150 * happens. 151 */ 152 active_template = template_list; 153 return 0; 154 #endif 155 } 156 157 static __exit void xor_exit(void) 158 { 159 } 160 161 MODULE_DESCRIPTION("RAID-5 checksumming functions"); 162 MODULE_LICENSE("GPL"); 163 164 /* 165 * When built-in we must register the default template before md, but we don't 166 * want calibration to run that early as that would delay the boot process. 167 */ 168 #ifndef MODULE 169 __initcall(calibrate_xor_blocks); 170 #endif 171 core_initcall(xor_init); 172 module_exit(xor_exit); 173