1 // SPDX-License-Identifier: GPL-2.0-or-later 2 #include <linux/prefetch.h> 3 #include "xor_impl.h" 4 5 static void 6 xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1, 7 const unsigned long * __restrict p2) 8 { 9 long lines = bytes / (sizeof (long)) / 8 - 1; 10 11 prefetchw(p1); 12 prefetch(p2); 13 14 do { 15 register long d0, d1, d2, d3, d4, d5, d6, d7; 16 17 prefetchw(p1+8); 18 prefetch(p2+8); 19 once_more: 20 d0 = p1[0]; /* Pull the stuff into registers */ 21 d1 = p1[1]; /* ... in bursts, if possible. */ 22 d2 = p1[2]; 23 d3 = p1[3]; 24 d4 = p1[4]; 25 d5 = p1[5]; 26 d6 = p1[6]; 27 d7 = p1[7]; 28 d0 ^= p2[0]; 29 d1 ^= p2[1]; 30 d2 ^= p2[2]; 31 d3 ^= p2[3]; 32 d4 ^= p2[4]; 33 d5 ^= p2[5]; 34 d6 ^= p2[6]; 35 d7 ^= p2[7]; 36 p1[0] = d0; /* Store the result (in bursts) */ 37 p1[1] = d1; 38 p1[2] = d2; 39 p1[3] = d3; 40 p1[4] = d4; 41 p1[5] = d5; 42 p1[6] = d6; 43 p1[7] = d7; 44 p1 += 8; 45 p2 += 8; 46 } while (--lines > 0); 47 if (lines == 0) 48 goto once_more; 49 } 50 51 static void 52 xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1, 53 const unsigned long * __restrict p2, 54 const unsigned long * __restrict p3) 55 { 56 long lines = bytes / (sizeof (long)) / 8 - 1; 57 58 prefetchw(p1); 59 prefetch(p2); 60 prefetch(p3); 61 62 do { 63 register long d0, d1, d2, d3, d4, d5, d6, d7; 64 65 prefetchw(p1+8); 66 prefetch(p2+8); 67 prefetch(p3+8); 68 once_more: 69 d0 = p1[0]; /* Pull the stuff into registers */ 70 d1 = p1[1]; /* ... in bursts, if possible. */ 71 d2 = p1[2]; 72 d3 = p1[3]; 73 d4 = p1[4]; 74 d5 = p1[5]; 75 d6 = p1[6]; 76 d7 = p1[7]; 77 d0 ^= p2[0]; 78 d1 ^= p2[1]; 79 d2 ^= p2[2]; 80 d3 ^= p2[3]; 81 d4 ^= p2[4]; 82 d5 ^= p2[5]; 83 d6 ^= p2[6]; 84 d7 ^= p2[7]; 85 d0 ^= p3[0]; 86 d1 ^= p3[1]; 87 d2 ^= p3[2]; 88 d3 ^= p3[3]; 89 d4 ^= p3[4]; 90 d5 ^= p3[5]; 91 d6 ^= p3[6]; 92 d7 ^= p3[7]; 93 p1[0] = d0; /* Store the result (in bursts) */ 94 p1[1] = d1; 95 p1[2] = d2; 96 p1[3] = d3; 97 p1[4] = d4; 98 p1[5] = d5; 99 p1[6] = d6; 100 p1[7] = d7; 101 p1 += 8; 102 p2 += 8; 103 p3 += 8; 104 } while (--lines > 0); 105 if (lines == 0) 106 goto once_more; 107 } 108 109 static void 110 xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1, 111 const unsigned long * __restrict p2, 112 const unsigned long * __restrict p3, 113 const unsigned long * __restrict p4) 114 { 115 long lines = bytes / (sizeof (long)) / 8 - 1; 116 117 prefetchw(p1); 118 prefetch(p2); 119 prefetch(p3); 120 prefetch(p4); 121 122 do { 123 register long d0, d1, d2, d3, d4, d5, d6, d7; 124 125 prefetchw(p1+8); 126 prefetch(p2+8); 127 prefetch(p3+8); 128 prefetch(p4+8); 129 once_more: 130 d0 = p1[0]; /* Pull the stuff into registers */ 131 d1 = p1[1]; /* ... in bursts, if possible. */ 132 d2 = p1[2]; 133 d3 = p1[3]; 134 d4 = p1[4]; 135 d5 = p1[5]; 136 d6 = p1[6]; 137 d7 = p1[7]; 138 d0 ^= p2[0]; 139 d1 ^= p2[1]; 140 d2 ^= p2[2]; 141 d3 ^= p2[3]; 142 d4 ^= p2[4]; 143 d5 ^= p2[5]; 144 d6 ^= p2[6]; 145 d7 ^= p2[7]; 146 d0 ^= p3[0]; 147 d1 ^= p3[1]; 148 d2 ^= p3[2]; 149 d3 ^= p3[3]; 150 d4 ^= p3[4]; 151 d5 ^= p3[5]; 152 d6 ^= p3[6]; 153 d7 ^= p3[7]; 154 d0 ^= p4[0]; 155 d1 ^= p4[1]; 156 d2 ^= p4[2]; 157 d3 ^= p4[3]; 158 d4 ^= p4[4]; 159 d5 ^= p4[5]; 160 d6 ^= p4[6]; 161 d7 ^= p4[7]; 162 p1[0] = d0; /* Store the result (in bursts) */ 163 p1[1] = d1; 164 p1[2] = d2; 165 p1[3] = d3; 166 p1[4] = d4; 167 p1[5] = d5; 168 p1[6] = d6; 169 p1[7] = d7; 170 p1 += 8; 171 p2 += 8; 172 p3 += 8; 173 p4 += 8; 174 } while (--lines > 0); 175 if (lines == 0) 176 goto once_more; 177 } 178 179 static void 180 xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1, 181 const unsigned long * __restrict p2, 182 const unsigned long * __restrict p3, 183 const unsigned long * __restrict p4, 184 const unsigned long * __restrict p5) 185 { 186 long lines = bytes / (sizeof (long)) / 8 - 1; 187 188 prefetchw(p1); 189 prefetch(p2); 190 prefetch(p3); 191 prefetch(p4); 192 prefetch(p5); 193 194 do { 195 register long d0, d1, d2, d3, d4, d5, d6, d7; 196 197 prefetchw(p1+8); 198 prefetch(p2+8); 199 prefetch(p3+8); 200 prefetch(p4+8); 201 prefetch(p5+8); 202 once_more: 203 d0 = p1[0]; /* Pull the stuff into registers */ 204 d1 = p1[1]; /* ... in bursts, if possible. */ 205 d2 = p1[2]; 206 d3 = p1[3]; 207 d4 = p1[4]; 208 d5 = p1[5]; 209 d6 = p1[6]; 210 d7 = p1[7]; 211 d0 ^= p2[0]; 212 d1 ^= p2[1]; 213 d2 ^= p2[2]; 214 d3 ^= p2[3]; 215 d4 ^= p2[4]; 216 d5 ^= p2[5]; 217 d6 ^= p2[6]; 218 d7 ^= p2[7]; 219 d0 ^= p3[0]; 220 d1 ^= p3[1]; 221 d2 ^= p3[2]; 222 d3 ^= p3[3]; 223 d4 ^= p3[4]; 224 d5 ^= p3[5]; 225 d6 ^= p3[6]; 226 d7 ^= p3[7]; 227 d0 ^= p4[0]; 228 d1 ^= p4[1]; 229 d2 ^= p4[2]; 230 d3 ^= p4[3]; 231 d4 ^= p4[4]; 232 d5 ^= p4[5]; 233 d6 ^= p4[6]; 234 d7 ^= p4[7]; 235 d0 ^= p5[0]; 236 d1 ^= p5[1]; 237 d2 ^= p5[2]; 238 d3 ^= p5[3]; 239 d4 ^= p5[4]; 240 d5 ^= p5[5]; 241 d6 ^= p5[6]; 242 d7 ^= p5[7]; 243 p1[0] = d0; /* Store the result (in bursts) */ 244 p1[1] = d1; 245 p1[2] = d2; 246 p1[3] = d3; 247 p1[4] = d4; 248 p1[5] = d5; 249 p1[6] = d6; 250 p1[7] = d7; 251 p1 += 8; 252 p2 += 8; 253 p3 += 8; 254 p4 += 8; 255 p5 += 8; 256 } while (--lines > 0); 257 if (lines == 0) 258 goto once_more; 259 } 260 261 DO_XOR_BLOCKS(32regs_p, xor_32regs_p_2, xor_32regs_p_3, xor_32regs_p_4, 262 xor_32regs_p_5); 263 264 struct xor_block_template xor_block_32regs_p = { 265 .name = "32regs_prefetch", 266 .xor_gen = xor_gen_32regs_p, 267 }; 268