1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3 * include/asm-generic/xor.h
4 *
5 * Generic optimized RAID-5 checksumming functions.
6 */
7
8 #include <linux/prefetch.h>
9
10 static void
xor_8regs_2(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2)11 xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1,
12 const unsigned long * __restrict p2)
13 {
14 long lines = bytes / (sizeof (long)) / 8;
15
16 do {
17 p1[0] ^= p2[0];
18 p1[1] ^= p2[1];
19 p1[2] ^= p2[2];
20 p1[3] ^= p2[3];
21 p1[4] ^= p2[4];
22 p1[5] ^= p2[5];
23 p1[6] ^= p2[6];
24 p1[7] ^= p2[7];
25 p1 += 8;
26 p2 += 8;
27 } while (--lines > 0);
28 }
29
30 static void
xor_8regs_3(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3)31 xor_8regs_3(unsigned long bytes, unsigned long * __restrict p1,
32 const unsigned long * __restrict p2,
33 const unsigned long * __restrict p3)
34 {
35 long lines = bytes / (sizeof (long)) / 8;
36
37 do {
38 p1[0] ^= p2[0] ^ p3[0];
39 p1[1] ^= p2[1] ^ p3[1];
40 p1[2] ^= p2[2] ^ p3[2];
41 p1[3] ^= p2[3] ^ p3[3];
42 p1[4] ^= p2[4] ^ p3[4];
43 p1[5] ^= p2[5] ^ p3[5];
44 p1[6] ^= p2[6] ^ p3[6];
45 p1[7] ^= p2[7] ^ p3[7];
46 p1 += 8;
47 p2 += 8;
48 p3 += 8;
49 } while (--lines > 0);
50 }
51
52 static void
xor_8regs_4(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3,const unsigned long * __restrict p4)53 xor_8regs_4(unsigned long bytes, unsigned long * __restrict p1,
54 const unsigned long * __restrict p2,
55 const unsigned long * __restrict p3,
56 const unsigned long * __restrict p4)
57 {
58 long lines = bytes / (sizeof (long)) / 8;
59
60 do {
61 p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
62 p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
63 p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
64 p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
65 p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
66 p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
67 p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
68 p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
69 p1 += 8;
70 p2 += 8;
71 p3 += 8;
72 p4 += 8;
73 } while (--lines > 0);
74 }
75
76 static void
xor_8regs_5(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3,const unsigned long * __restrict p4,const unsigned long * __restrict p5)77 xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1,
78 const unsigned long * __restrict p2,
79 const unsigned long * __restrict p3,
80 const unsigned long * __restrict p4,
81 const unsigned long * __restrict p5)
82 {
83 long lines = bytes / (sizeof (long)) / 8;
84
85 do {
86 p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
87 p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
88 p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
89 p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
90 p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
91 p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
92 p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
93 p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
94 p1 += 8;
95 p2 += 8;
96 p3 += 8;
97 p4 += 8;
98 p5 += 8;
99 } while (--lines > 0);
100 }
101
102 static void
xor_32regs_2(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2)103 xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1,
104 const unsigned long * __restrict p2)
105 {
106 long lines = bytes / (sizeof (long)) / 8;
107
108 do {
109 register long d0, d1, d2, d3, d4, d5, d6, d7;
110 d0 = p1[0]; /* Pull the stuff into registers */
111 d1 = p1[1]; /* ... in bursts, if possible. */
112 d2 = p1[2];
113 d3 = p1[3];
114 d4 = p1[4];
115 d5 = p1[5];
116 d6 = p1[6];
117 d7 = p1[7];
118 d0 ^= p2[0];
119 d1 ^= p2[1];
120 d2 ^= p2[2];
121 d3 ^= p2[3];
122 d4 ^= p2[4];
123 d5 ^= p2[5];
124 d6 ^= p2[6];
125 d7 ^= p2[7];
126 p1[0] = d0; /* Store the result (in bursts) */
127 p1[1] = d1;
128 p1[2] = d2;
129 p1[3] = d3;
130 p1[4] = d4;
131 p1[5] = d5;
132 p1[6] = d6;
133 p1[7] = d7;
134 p1 += 8;
135 p2 += 8;
136 } while (--lines > 0);
137 }
138
139 static void
xor_32regs_3(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3)140 xor_32regs_3(unsigned long bytes, unsigned long * __restrict p1,
141 const unsigned long * __restrict p2,
142 const unsigned long * __restrict p3)
143 {
144 long lines = bytes / (sizeof (long)) / 8;
145
146 do {
147 register long d0, d1, d2, d3, d4, d5, d6, d7;
148 d0 = p1[0]; /* Pull the stuff into registers */
149 d1 = p1[1]; /* ... in bursts, if possible. */
150 d2 = p1[2];
151 d3 = p1[3];
152 d4 = p1[4];
153 d5 = p1[5];
154 d6 = p1[6];
155 d7 = p1[7];
156 d0 ^= p2[0];
157 d1 ^= p2[1];
158 d2 ^= p2[2];
159 d3 ^= p2[3];
160 d4 ^= p2[4];
161 d5 ^= p2[5];
162 d6 ^= p2[6];
163 d7 ^= p2[7];
164 d0 ^= p3[0];
165 d1 ^= p3[1];
166 d2 ^= p3[2];
167 d3 ^= p3[3];
168 d4 ^= p3[4];
169 d5 ^= p3[5];
170 d6 ^= p3[6];
171 d7 ^= p3[7];
172 p1[0] = d0; /* Store the result (in bursts) */
173 p1[1] = d1;
174 p1[2] = d2;
175 p1[3] = d3;
176 p1[4] = d4;
177 p1[5] = d5;
178 p1[6] = d6;
179 p1[7] = d7;
180 p1 += 8;
181 p2 += 8;
182 p3 += 8;
183 } while (--lines > 0);
184 }
185
186 static void
xor_32regs_4(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3,const unsigned long * __restrict p4)187 xor_32regs_4(unsigned long bytes, unsigned long * __restrict p1,
188 const unsigned long * __restrict p2,
189 const unsigned long * __restrict p3,
190 const unsigned long * __restrict p4)
191 {
192 long lines = bytes / (sizeof (long)) / 8;
193
194 do {
195 register long d0, d1, d2, d3, d4, d5, d6, d7;
196 d0 = p1[0]; /* Pull the stuff into registers */
197 d1 = p1[1]; /* ... in bursts, if possible. */
198 d2 = p1[2];
199 d3 = p1[3];
200 d4 = p1[4];
201 d5 = p1[5];
202 d6 = p1[6];
203 d7 = p1[7];
204 d0 ^= p2[0];
205 d1 ^= p2[1];
206 d2 ^= p2[2];
207 d3 ^= p2[3];
208 d4 ^= p2[4];
209 d5 ^= p2[5];
210 d6 ^= p2[6];
211 d7 ^= p2[7];
212 d0 ^= p3[0];
213 d1 ^= p3[1];
214 d2 ^= p3[2];
215 d3 ^= p3[3];
216 d4 ^= p3[4];
217 d5 ^= p3[5];
218 d6 ^= p3[6];
219 d7 ^= p3[7];
220 d0 ^= p4[0];
221 d1 ^= p4[1];
222 d2 ^= p4[2];
223 d3 ^= p4[3];
224 d4 ^= p4[4];
225 d5 ^= p4[5];
226 d6 ^= p4[6];
227 d7 ^= p4[7];
228 p1[0] = d0; /* Store the result (in bursts) */
229 p1[1] = d1;
230 p1[2] = d2;
231 p1[3] = d3;
232 p1[4] = d4;
233 p1[5] = d5;
234 p1[6] = d6;
235 p1[7] = d7;
236 p1 += 8;
237 p2 += 8;
238 p3 += 8;
239 p4 += 8;
240 } while (--lines > 0);
241 }
242
243 static void
xor_32regs_5(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3,const unsigned long * __restrict p4,const unsigned long * __restrict p5)244 xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1,
245 const unsigned long * __restrict p2,
246 const unsigned long * __restrict p3,
247 const unsigned long * __restrict p4,
248 const unsigned long * __restrict p5)
249 {
250 long lines = bytes / (sizeof (long)) / 8;
251
252 do {
253 register long d0, d1, d2, d3, d4, d5, d6, d7;
254 d0 = p1[0]; /* Pull the stuff into registers */
255 d1 = p1[1]; /* ... in bursts, if possible. */
256 d2 = p1[2];
257 d3 = p1[3];
258 d4 = p1[4];
259 d5 = p1[5];
260 d6 = p1[6];
261 d7 = p1[7];
262 d0 ^= p2[0];
263 d1 ^= p2[1];
264 d2 ^= p2[2];
265 d3 ^= p2[3];
266 d4 ^= p2[4];
267 d5 ^= p2[5];
268 d6 ^= p2[6];
269 d7 ^= p2[7];
270 d0 ^= p3[0];
271 d1 ^= p3[1];
272 d2 ^= p3[2];
273 d3 ^= p3[3];
274 d4 ^= p3[4];
275 d5 ^= p3[5];
276 d6 ^= p3[6];
277 d7 ^= p3[7];
278 d0 ^= p4[0];
279 d1 ^= p4[1];
280 d2 ^= p4[2];
281 d3 ^= p4[3];
282 d4 ^= p4[4];
283 d5 ^= p4[5];
284 d6 ^= p4[6];
285 d7 ^= p4[7];
286 d0 ^= p5[0];
287 d1 ^= p5[1];
288 d2 ^= p5[2];
289 d3 ^= p5[3];
290 d4 ^= p5[4];
291 d5 ^= p5[5];
292 d6 ^= p5[6];
293 d7 ^= p5[7];
294 p1[0] = d0; /* Store the result (in bursts) */
295 p1[1] = d1;
296 p1[2] = d2;
297 p1[3] = d3;
298 p1[4] = d4;
299 p1[5] = d5;
300 p1[6] = d6;
301 p1[7] = d7;
302 p1 += 8;
303 p2 += 8;
304 p3 += 8;
305 p4 += 8;
306 p5 += 8;
307 } while (--lines > 0);
308 }
309
310 static void
xor_8regs_p_2(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2)311 xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
312 const unsigned long * __restrict p2)
313 {
314 long lines = bytes / (sizeof (long)) / 8 - 1;
315 prefetchw(p1);
316 prefetch(p2);
317
318 do {
319 prefetchw(p1+8);
320 prefetch(p2+8);
321 once_more:
322 p1[0] ^= p2[0];
323 p1[1] ^= p2[1];
324 p1[2] ^= p2[2];
325 p1[3] ^= p2[3];
326 p1[4] ^= p2[4];
327 p1[5] ^= p2[5];
328 p1[6] ^= p2[6];
329 p1[7] ^= p2[7];
330 p1 += 8;
331 p2 += 8;
332 } while (--lines > 0);
333 if (lines == 0)
334 goto once_more;
335 }
336
337 static void
xor_8regs_p_3(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3)338 xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
339 const unsigned long * __restrict p2,
340 const unsigned long * __restrict p3)
341 {
342 long lines = bytes / (sizeof (long)) / 8 - 1;
343 prefetchw(p1);
344 prefetch(p2);
345 prefetch(p3);
346
347 do {
348 prefetchw(p1+8);
349 prefetch(p2+8);
350 prefetch(p3+8);
351 once_more:
352 p1[0] ^= p2[0] ^ p3[0];
353 p1[1] ^= p2[1] ^ p3[1];
354 p1[2] ^= p2[2] ^ p3[2];
355 p1[3] ^= p2[3] ^ p3[3];
356 p1[4] ^= p2[4] ^ p3[4];
357 p1[5] ^= p2[5] ^ p3[5];
358 p1[6] ^= p2[6] ^ p3[6];
359 p1[7] ^= p2[7] ^ p3[7];
360 p1 += 8;
361 p2 += 8;
362 p3 += 8;
363 } while (--lines > 0);
364 if (lines == 0)
365 goto once_more;
366 }
367
368 static void
xor_8regs_p_4(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3,const unsigned long * __restrict p4)369 xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
370 const unsigned long * __restrict p2,
371 const unsigned long * __restrict p3,
372 const unsigned long * __restrict p4)
373 {
374 long lines = bytes / (sizeof (long)) / 8 - 1;
375
376 prefetchw(p1);
377 prefetch(p2);
378 prefetch(p3);
379 prefetch(p4);
380
381 do {
382 prefetchw(p1+8);
383 prefetch(p2+8);
384 prefetch(p3+8);
385 prefetch(p4+8);
386 once_more:
387 p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
388 p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
389 p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
390 p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
391 p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
392 p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
393 p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
394 p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
395 p1 += 8;
396 p2 += 8;
397 p3 += 8;
398 p4 += 8;
399 } while (--lines > 0);
400 if (lines == 0)
401 goto once_more;
402 }
403
404 static void
xor_8regs_p_5(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3,const unsigned long * __restrict p4,const unsigned long * __restrict p5)405 xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
406 const unsigned long * __restrict p2,
407 const unsigned long * __restrict p3,
408 const unsigned long * __restrict p4,
409 const unsigned long * __restrict p5)
410 {
411 long lines = bytes / (sizeof (long)) / 8 - 1;
412
413 prefetchw(p1);
414 prefetch(p2);
415 prefetch(p3);
416 prefetch(p4);
417 prefetch(p5);
418
419 do {
420 prefetchw(p1+8);
421 prefetch(p2+8);
422 prefetch(p3+8);
423 prefetch(p4+8);
424 prefetch(p5+8);
425 once_more:
426 p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
427 p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
428 p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
429 p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
430 p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
431 p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
432 p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
433 p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
434 p1 += 8;
435 p2 += 8;
436 p3 += 8;
437 p4 += 8;
438 p5 += 8;
439 } while (--lines > 0);
440 if (lines == 0)
441 goto once_more;
442 }
443
444 static void
xor_32regs_p_2(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2)445 xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
446 const unsigned long * __restrict p2)
447 {
448 long lines = bytes / (sizeof (long)) / 8 - 1;
449
450 prefetchw(p1);
451 prefetch(p2);
452
453 do {
454 register long d0, d1, d2, d3, d4, d5, d6, d7;
455
456 prefetchw(p1+8);
457 prefetch(p2+8);
458 once_more:
459 d0 = p1[0]; /* Pull the stuff into registers */
460 d1 = p1[1]; /* ... in bursts, if possible. */
461 d2 = p1[2];
462 d3 = p1[3];
463 d4 = p1[4];
464 d5 = p1[5];
465 d6 = p1[6];
466 d7 = p1[7];
467 d0 ^= p2[0];
468 d1 ^= p2[1];
469 d2 ^= p2[2];
470 d3 ^= p2[3];
471 d4 ^= p2[4];
472 d5 ^= p2[5];
473 d6 ^= p2[6];
474 d7 ^= p2[7];
475 p1[0] = d0; /* Store the result (in bursts) */
476 p1[1] = d1;
477 p1[2] = d2;
478 p1[3] = d3;
479 p1[4] = d4;
480 p1[5] = d5;
481 p1[6] = d6;
482 p1[7] = d7;
483 p1 += 8;
484 p2 += 8;
485 } while (--lines > 0);
486 if (lines == 0)
487 goto once_more;
488 }
489
490 static void
xor_32regs_p_3(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3)491 xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
492 const unsigned long * __restrict p2,
493 const unsigned long * __restrict p3)
494 {
495 long lines = bytes / (sizeof (long)) / 8 - 1;
496
497 prefetchw(p1);
498 prefetch(p2);
499 prefetch(p3);
500
501 do {
502 register long d0, d1, d2, d3, d4, d5, d6, d7;
503
504 prefetchw(p1+8);
505 prefetch(p2+8);
506 prefetch(p3+8);
507 once_more:
508 d0 = p1[0]; /* Pull the stuff into registers */
509 d1 = p1[1]; /* ... in bursts, if possible. */
510 d2 = p1[2];
511 d3 = p1[3];
512 d4 = p1[4];
513 d5 = p1[5];
514 d6 = p1[6];
515 d7 = p1[7];
516 d0 ^= p2[0];
517 d1 ^= p2[1];
518 d2 ^= p2[2];
519 d3 ^= p2[3];
520 d4 ^= p2[4];
521 d5 ^= p2[5];
522 d6 ^= p2[6];
523 d7 ^= p2[7];
524 d0 ^= p3[0];
525 d1 ^= p3[1];
526 d2 ^= p3[2];
527 d3 ^= p3[3];
528 d4 ^= p3[4];
529 d5 ^= p3[5];
530 d6 ^= p3[6];
531 d7 ^= p3[7];
532 p1[0] = d0; /* Store the result (in bursts) */
533 p1[1] = d1;
534 p1[2] = d2;
535 p1[3] = d3;
536 p1[4] = d4;
537 p1[5] = d5;
538 p1[6] = d6;
539 p1[7] = d7;
540 p1 += 8;
541 p2 += 8;
542 p3 += 8;
543 } while (--lines > 0);
544 if (lines == 0)
545 goto once_more;
546 }
547
548 static void
xor_32regs_p_4(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3,const unsigned long * __restrict p4)549 xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
550 const unsigned long * __restrict p2,
551 const unsigned long * __restrict p3,
552 const unsigned long * __restrict p4)
553 {
554 long lines = bytes / (sizeof (long)) / 8 - 1;
555
556 prefetchw(p1);
557 prefetch(p2);
558 prefetch(p3);
559 prefetch(p4);
560
561 do {
562 register long d0, d1, d2, d3, d4, d5, d6, d7;
563
564 prefetchw(p1+8);
565 prefetch(p2+8);
566 prefetch(p3+8);
567 prefetch(p4+8);
568 once_more:
569 d0 = p1[0]; /* Pull the stuff into registers */
570 d1 = p1[1]; /* ... in bursts, if possible. */
571 d2 = p1[2];
572 d3 = p1[3];
573 d4 = p1[4];
574 d5 = p1[5];
575 d6 = p1[6];
576 d7 = p1[7];
577 d0 ^= p2[0];
578 d1 ^= p2[1];
579 d2 ^= p2[2];
580 d3 ^= p2[3];
581 d4 ^= p2[4];
582 d5 ^= p2[5];
583 d6 ^= p2[6];
584 d7 ^= p2[7];
585 d0 ^= p3[0];
586 d1 ^= p3[1];
587 d2 ^= p3[2];
588 d3 ^= p3[3];
589 d4 ^= p3[4];
590 d5 ^= p3[5];
591 d6 ^= p3[6];
592 d7 ^= p3[7];
593 d0 ^= p4[0];
594 d1 ^= p4[1];
595 d2 ^= p4[2];
596 d3 ^= p4[3];
597 d4 ^= p4[4];
598 d5 ^= p4[5];
599 d6 ^= p4[6];
600 d7 ^= p4[7];
601 p1[0] = d0; /* Store the result (in bursts) */
602 p1[1] = d1;
603 p1[2] = d2;
604 p1[3] = d3;
605 p1[4] = d4;
606 p1[5] = d5;
607 p1[6] = d6;
608 p1[7] = d7;
609 p1 += 8;
610 p2 += 8;
611 p3 += 8;
612 p4 += 8;
613 } while (--lines > 0);
614 if (lines == 0)
615 goto once_more;
616 }
617
618 static void
xor_32regs_p_5(unsigned long bytes,unsigned long * __restrict p1,const unsigned long * __restrict p2,const unsigned long * __restrict p3,const unsigned long * __restrict p4,const unsigned long * __restrict p5)619 xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
620 const unsigned long * __restrict p2,
621 const unsigned long * __restrict p3,
622 const unsigned long * __restrict p4,
623 const unsigned long * __restrict p5)
624 {
625 long lines = bytes / (sizeof (long)) / 8 - 1;
626
627 prefetchw(p1);
628 prefetch(p2);
629 prefetch(p3);
630 prefetch(p4);
631 prefetch(p5);
632
633 do {
634 register long d0, d1, d2, d3, d4, d5, d6, d7;
635
636 prefetchw(p1+8);
637 prefetch(p2+8);
638 prefetch(p3+8);
639 prefetch(p4+8);
640 prefetch(p5+8);
641 once_more:
642 d0 = p1[0]; /* Pull the stuff into registers */
643 d1 = p1[1]; /* ... in bursts, if possible. */
644 d2 = p1[2];
645 d3 = p1[3];
646 d4 = p1[4];
647 d5 = p1[5];
648 d6 = p1[6];
649 d7 = p1[7];
650 d0 ^= p2[0];
651 d1 ^= p2[1];
652 d2 ^= p2[2];
653 d3 ^= p2[3];
654 d4 ^= p2[4];
655 d5 ^= p2[5];
656 d6 ^= p2[6];
657 d7 ^= p2[7];
658 d0 ^= p3[0];
659 d1 ^= p3[1];
660 d2 ^= p3[2];
661 d3 ^= p3[3];
662 d4 ^= p3[4];
663 d5 ^= p3[5];
664 d6 ^= p3[6];
665 d7 ^= p3[7];
666 d0 ^= p4[0];
667 d1 ^= p4[1];
668 d2 ^= p4[2];
669 d3 ^= p4[3];
670 d4 ^= p4[4];
671 d5 ^= p4[5];
672 d6 ^= p4[6];
673 d7 ^= p4[7];
674 d0 ^= p5[0];
675 d1 ^= p5[1];
676 d2 ^= p5[2];
677 d3 ^= p5[3];
678 d4 ^= p5[4];
679 d5 ^= p5[5];
680 d6 ^= p5[6];
681 d7 ^= p5[7];
682 p1[0] = d0; /* Store the result (in bursts) */
683 p1[1] = d1;
684 p1[2] = d2;
685 p1[3] = d3;
686 p1[4] = d4;
687 p1[5] = d5;
688 p1[6] = d6;
689 p1[7] = d7;
690 p1 += 8;
691 p2 += 8;
692 p3 += 8;
693 p4 += 8;
694 p5 += 8;
695 } while (--lines > 0);
696 if (lines == 0)
697 goto once_more;
698 }
699
700 static struct xor_block_template xor_block_8regs = {
701 .name = "8regs",
702 .do_2 = xor_8regs_2,
703 .do_3 = xor_8regs_3,
704 .do_4 = xor_8regs_4,
705 .do_5 = xor_8regs_5,
706 };
707
708 static struct xor_block_template xor_block_32regs = {
709 .name = "32regs",
710 .do_2 = xor_32regs_2,
711 .do_3 = xor_32regs_3,
712 .do_4 = xor_32regs_4,
713 .do_5 = xor_32regs_5,
714 };
715
716 static struct xor_block_template xor_block_8regs_p __maybe_unused = {
717 .name = "8regs_prefetch",
718 .do_2 = xor_8regs_p_2,
719 .do_3 = xor_8regs_p_3,
720 .do_4 = xor_8regs_p_4,
721 .do_5 = xor_8regs_p_5,
722 };
723
724 static struct xor_block_template xor_block_32regs_p __maybe_unused = {
725 .name = "32regs_prefetch",
726 .do_2 = xor_32regs_p_2,
727 .do_3 = xor_32regs_p_3,
728 .do_4 = xor_32regs_p_4,
729 .do_5 = xor_32regs_p_5,
730 };
731
732 #define XOR_TRY_TEMPLATES \
733 do { \
734 xor_speed(&xor_block_8regs); \
735 xor_speed(&xor_block_8regs_p); \
736 xor_speed(&xor_block_32regs); \
737 xor_speed(&xor_block_32regs_p); \
738 } while (0)
739