1 // SPDX-License-Identifier: GPL-2.0-or-later 2 #include <linux/prefetch.h> 3 #include "xor_impl.h" 4 5 static void 6 xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1, 7 const unsigned long * __restrict p2) 8 { 9 long lines = bytes / (sizeof (long)) / 8 - 1; 10 prefetchw(p1); 11 prefetch(p2); 12 13 do { 14 prefetchw(p1+8); 15 prefetch(p2+8); 16 once_more: 17 p1[0] ^= p2[0]; 18 p1[1] ^= p2[1]; 19 p1[2] ^= p2[2]; 20 p1[3] ^= p2[3]; 21 p1[4] ^= p2[4]; 22 p1[5] ^= p2[5]; 23 p1[6] ^= p2[6]; 24 p1[7] ^= p2[7]; 25 p1 += 8; 26 p2 += 8; 27 } while (--lines > 0); 28 if (lines == 0) 29 goto once_more; 30 } 31 32 static void 33 xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1, 34 const unsigned long * __restrict p2, 35 const unsigned long * __restrict p3) 36 { 37 long lines = bytes / (sizeof (long)) / 8 - 1; 38 prefetchw(p1); 39 prefetch(p2); 40 prefetch(p3); 41 42 do { 43 prefetchw(p1+8); 44 prefetch(p2+8); 45 prefetch(p3+8); 46 once_more: 47 p1[0] ^= p2[0] ^ p3[0]; 48 p1[1] ^= p2[1] ^ p3[1]; 49 p1[2] ^= p2[2] ^ p3[2]; 50 p1[3] ^= p2[3] ^ p3[3]; 51 p1[4] ^= p2[4] ^ p3[4]; 52 p1[5] ^= p2[5] ^ p3[5]; 53 p1[6] ^= p2[6] ^ p3[6]; 54 p1[7] ^= p2[7] ^ p3[7]; 55 p1 += 8; 56 p2 += 8; 57 p3 += 8; 58 } while (--lines > 0); 59 if (lines == 0) 60 goto once_more; 61 } 62 63 static void 64 xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1, 65 const unsigned long * __restrict p2, 66 const unsigned long * __restrict p3, 67 const unsigned long * __restrict p4) 68 { 69 long lines = bytes / (sizeof (long)) / 8 - 1; 70 71 prefetchw(p1); 72 prefetch(p2); 73 prefetch(p3); 74 prefetch(p4); 75 76 do { 77 prefetchw(p1+8); 78 prefetch(p2+8); 79 prefetch(p3+8); 80 prefetch(p4+8); 81 once_more: 82 p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; 83 p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; 84 p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; 85 p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; 86 p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; 87 p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; 88 p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; 89 p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; 90 p1 += 8; 91 p2 += 8; 92 p3 += 8; 93 p4 += 8; 94 } while (--lines > 0); 95 if (lines == 0) 96 goto once_more; 97 } 98 99 static void 100 xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1, 101 const unsigned long * __restrict p2, 102 const unsigned long * __restrict p3, 103 const unsigned long * __restrict p4, 104 const unsigned long * __restrict p5) 105 { 106 long lines = bytes / (sizeof (long)) / 8 - 1; 107 108 prefetchw(p1); 109 prefetch(p2); 110 prefetch(p3); 111 prefetch(p4); 112 prefetch(p5); 113 114 do { 115 prefetchw(p1+8); 116 prefetch(p2+8); 117 prefetch(p3+8); 118 prefetch(p4+8); 119 prefetch(p5+8); 120 once_more: 121 p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; 122 p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; 123 p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; 124 p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; 125 p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; 126 p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; 127 p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; 128 p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; 129 p1 += 8; 130 p2 += 8; 131 p3 += 8; 132 p4 += 8; 133 p5 += 8; 134 } while (--lines > 0); 135 if (lines == 0) 136 goto once_more; 137 } 138 139 140 DO_XOR_BLOCKS(8regs_p, xor_8regs_p_2, xor_8regs_p_3, xor_8regs_p_4, 141 xor_8regs_p_5); 142 143 struct xor_block_template xor_block_8regs_p = { 144 .name = "8regs_prefetch", 145 .xor_gen = xor_gen_8regs_p, 146 }; 147