1 // SPDX-License-Identifier: GPL-2.0-or-later 2 #include <linux/prefetch.h> 3 #include <linux/raid/xor_impl.h> 4 #include <asm-generic/xor.h> 5 6 static void 7 xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1, 8 const unsigned long * __restrict p2) 9 { 10 long lines = bytes / (sizeof (long)) / 8 - 1; 11 12 prefetchw(p1); 13 prefetch(p2); 14 15 do { 16 register long d0, d1, d2, d3, d4, d5, d6, d7; 17 18 prefetchw(p1+8); 19 prefetch(p2+8); 20 once_more: 21 d0 = p1[0]; /* Pull the stuff into registers */ 22 d1 = p1[1]; /* ... in bursts, if possible. */ 23 d2 = p1[2]; 24 d3 = p1[3]; 25 d4 = p1[4]; 26 d5 = p1[5]; 27 d6 = p1[6]; 28 d7 = p1[7]; 29 d0 ^= p2[0]; 30 d1 ^= p2[1]; 31 d2 ^= p2[2]; 32 d3 ^= p2[3]; 33 d4 ^= p2[4]; 34 d5 ^= p2[5]; 35 d6 ^= p2[6]; 36 d7 ^= p2[7]; 37 p1[0] = d0; /* Store the result (in bursts) */ 38 p1[1] = d1; 39 p1[2] = d2; 40 p1[3] = d3; 41 p1[4] = d4; 42 p1[5] = d5; 43 p1[6] = d6; 44 p1[7] = d7; 45 p1 += 8; 46 p2 += 8; 47 } while (--lines > 0); 48 if (lines == 0) 49 goto once_more; 50 } 51 52 static void 53 xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1, 54 const unsigned long * __restrict p2, 55 const unsigned long * __restrict p3) 56 { 57 long lines = bytes / (sizeof (long)) / 8 - 1; 58 59 prefetchw(p1); 60 prefetch(p2); 61 prefetch(p3); 62 63 do { 64 register long d0, d1, d2, d3, d4, d5, d6, d7; 65 66 prefetchw(p1+8); 67 prefetch(p2+8); 68 prefetch(p3+8); 69 once_more: 70 d0 = p1[0]; /* Pull the stuff into registers */ 71 d1 = p1[1]; /* ... in bursts, if possible. */ 72 d2 = p1[2]; 73 d3 = p1[3]; 74 d4 = p1[4]; 75 d5 = p1[5]; 76 d6 = p1[6]; 77 d7 = p1[7]; 78 d0 ^= p2[0]; 79 d1 ^= p2[1]; 80 d2 ^= p2[2]; 81 d3 ^= p2[3]; 82 d4 ^= p2[4]; 83 d5 ^= p2[5]; 84 d6 ^= p2[6]; 85 d7 ^= p2[7]; 86 d0 ^= p3[0]; 87 d1 ^= p3[1]; 88 d2 ^= p3[2]; 89 d3 ^= p3[3]; 90 d4 ^= p3[4]; 91 d5 ^= p3[5]; 92 d6 ^= p3[6]; 93 d7 ^= p3[7]; 94 p1[0] = d0; /* Store the result (in bursts) */ 95 p1[1] = d1; 96 p1[2] = d2; 97 p1[3] = d3; 98 p1[4] = d4; 99 p1[5] = d5; 100 p1[6] = d6; 101 p1[7] = d7; 102 p1 += 8; 103 p2 += 8; 104 p3 += 8; 105 } while (--lines > 0); 106 if (lines == 0) 107 goto once_more; 108 } 109 110 static void 111 xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1, 112 const unsigned long * __restrict p2, 113 const unsigned long * __restrict p3, 114 const unsigned long * __restrict p4) 115 { 116 long lines = bytes / (sizeof (long)) / 8 - 1; 117 118 prefetchw(p1); 119 prefetch(p2); 120 prefetch(p3); 121 prefetch(p4); 122 123 do { 124 register long d0, d1, d2, d3, d4, d5, d6, d7; 125 126 prefetchw(p1+8); 127 prefetch(p2+8); 128 prefetch(p3+8); 129 prefetch(p4+8); 130 once_more: 131 d0 = p1[0]; /* Pull the stuff into registers */ 132 d1 = p1[1]; /* ... in bursts, if possible. */ 133 d2 = p1[2]; 134 d3 = p1[3]; 135 d4 = p1[4]; 136 d5 = p1[5]; 137 d6 = p1[6]; 138 d7 = p1[7]; 139 d0 ^= p2[0]; 140 d1 ^= p2[1]; 141 d2 ^= p2[2]; 142 d3 ^= p2[3]; 143 d4 ^= p2[4]; 144 d5 ^= p2[5]; 145 d6 ^= p2[6]; 146 d7 ^= p2[7]; 147 d0 ^= p3[0]; 148 d1 ^= p3[1]; 149 d2 ^= p3[2]; 150 d3 ^= p3[3]; 151 d4 ^= p3[4]; 152 d5 ^= p3[5]; 153 d6 ^= p3[6]; 154 d7 ^= p3[7]; 155 d0 ^= p4[0]; 156 d1 ^= p4[1]; 157 d2 ^= p4[2]; 158 d3 ^= p4[3]; 159 d4 ^= p4[4]; 160 d5 ^= p4[5]; 161 d6 ^= p4[6]; 162 d7 ^= p4[7]; 163 p1[0] = d0; /* Store the result (in bursts) */ 164 p1[1] = d1; 165 p1[2] = d2; 166 p1[3] = d3; 167 p1[4] = d4; 168 p1[5] = d5; 169 p1[6] = d6; 170 p1[7] = d7; 171 p1 += 8; 172 p2 += 8; 173 p3 += 8; 174 p4 += 8; 175 } while (--lines > 0); 176 if (lines == 0) 177 goto once_more; 178 } 179 180 static void 181 xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1, 182 const unsigned long * __restrict p2, 183 const unsigned long * __restrict p3, 184 const unsigned long * __restrict p4, 185 const unsigned long * __restrict p5) 186 { 187 long lines = bytes / (sizeof (long)) / 8 - 1; 188 189 prefetchw(p1); 190 prefetch(p2); 191 prefetch(p3); 192 prefetch(p4); 193 prefetch(p5); 194 195 do { 196 register long d0, d1, d2, d3, d4, d5, d6, d7; 197 198 prefetchw(p1+8); 199 prefetch(p2+8); 200 prefetch(p3+8); 201 prefetch(p4+8); 202 prefetch(p5+8); 203 once_more: 204 d0 = p1[0]; /* Pull the stuff into registers */ 205 d1 = p1[1]; /* ... in bursts, if possible. */ 206 d2 = p1[2]; 207 d3 = p1[3]; 208 d4 = p1[4]; 209 d5 = p1[5]; 210 d6 = p1[6]; 211 d7 = p1[7]; 212 d0 ^= p2[0]; 213 d1 ^= p2[1]; 214 d2 ^= p2[2]; 215 d3 ^= p2[3]; 216 d4 ^= p2[4]; 217 d5 ^= p2[5]; 218 d6 ^= p2[6]; 219 d7 ^= p2[7]; 220 d0 ^= p3[0]; 221 d1 ^= p3[1]; 222 d2 ^= p3[2]; 223 d3 ^= p3[3]; 224 d4 ^= p3[4]; 225 d5 ^= p3[5]; 226 d6 ^= p3[6]; 227 d7 ^= p3[7]; 228 d0 ^= p4[0]; 229 d1 ^= p4[1]; 230 d2 ^= p4[2]; 231 d3 ^= p4[3]; 232 d4 ^= p4[4]; 233 d5 ^= p4[5]; 234 d6 ^= p4[6]; 235 d7 ^= p4[7]; 236 d0 ^= p5[0]; 237 d1 ^= p5[1]; 238 d2 ^= p5[2]; 239 d3 ^= p5[3]; 240 d4 ^= p5[4]; 241 d5 ^= p5[5]; 242 d6 ^= p5[6]; 243 d7 ^= p5[7]; 244 p1[0] = d0; /* Store the result (in bursts) */ 245 p1[1] = d1; 246 p1[2] = d2; 247 p1[3] = d3; 248 p1[4] = d4; 249 p1[5] = d5; 250 p1[6] = d6; 251 p1[7] = d7; 252 p1 += 8; 253 p2 += 8; 254 p3 += 8; 255 p4 += 8; 256 p5 += 8; 257 } while (--lines > 0); 258 if (lines == 0) 259 goto once_more; 260 } 261 262 struct xor_block_template xor_block_32regs_p = { 263 .name = "32regs_prefetch", 264 .do_2 = xor_32regs_p_2, 265 .do_3 = xor_32regs_p_3, 266 .do_4 = xor_32regs_p_4, 267 .do_5 = xor_32regs_p_5, 268 }; 269