1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* -*- linux-c -*- ------------------------------------------------------- * 3 * 4 * Copyright 2002 H. Peter Anvin - All Rights Reserved 5 * 6 * ----------------------------------------------------------------------- */ 7 8 /* 9 * raid6/sse1.c 10 * 11 * SSE-1/MMXEXT implementation of RAID-6 syndrome functions 12 * 13 * This is really an MMX implementation, but it requires SSE-1 or 14 * AMD MMXEXT for prefetch support and a few other features. The 15 * support for nontemporal memory accesses is enough to make this 16 * worthwhile as a separate implementation. 17 */ 18 19 #include <linux/raid/pq.h> 20 #include <asm/fpu/api.h> 21 22 /* Defined in raid6/mmx.c */ 23 extern const struct raid6_mmx_constants { 24 u64 x1d; 25 } raid6_mmx_constants; 26 27 static int raid6_have_sse1_or_mmxext(void) 28 { 29 /* Not really boot_cpu but "all_cpus" */ 30 return boot_cpu_has(X86_FEATURE_MMX) && 31 (boot_cpu_has(X86_FEATURE_XMM) || 32 boot_cpu_has(X86_FEATURE_MMXEXT)); 33 } 34 35 /* 36 * Plain SSE1 implementation 37 */ 38 static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) 39 { 40 u8 **dptr = (u8 **)ptrs; 41 u8 *p, *q; 42 int d, z, z0; 43 44 z0 = disks - 3; /* Highest data disk */ 45 p = dptr[z0+1]; /* XOR parity */ 46 q = dptr[z0+2]; /* RS syndrome */ 47 48 kernel_fpu_begin(); 49 50 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); 51 asm volatile("pxor %mm5,%mm5"); /* Zero temp */ 52 53 for ( d = 0 ; d < bytes ; d += 8 ) { 54 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); 55 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ 56 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); 57 asm volatile("movq %mm2,%mm4"); /* Q[0] */ 58 asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d])); 59 for ( z = z0-2 ; z >= 0 ; z-- ) { 60 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 61 asm volatile("pcmpgtb %mm4,%mm5"); 62 asm volatile("paddb %mm4,%mm4"); 63 asm volatile("pand %mm0,%mm5"); 64 asm volatile("pxor %mm5,%mm4"); 65 asm volatile("pxor %mm5,%mm5"); 66 asm volatile("pxor %mm6,%mm2"); 67 asm volatile("pxor %mm6,%mm4"); 68 asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); 69 } 70 asm volatile("pcmpgtb %mm4,%mm5"); 71 asm volatile("paddb %mm4,%mm4"); 72 asm volatile("pand %mm0,%mm5"); 73 asm volatile("pxor %mm5,%mm4"); 74 asm volatile("pxor %mm5,%mm5"); 75 asm volatile("pxor %mm6,%mm2"); 76 asm volatile("pxor %mm6,%mm4"); 77 78 asm volatile("movntq %%mm2,%0" : "=m" (p[d])); 79 asm volatile("movntq %%mm4,%0" : "=m" (q[d])); 80 } 81 82 asm volatile("sfence" : : : "memory"); 83 kernel_fpu_end(); 84 } 85 86 const struct raid6_calls raid6_sse1x1 = { 87 .gen_syndrome = raid6_sse11_gen_syndrome, 88 .valid = raid6_have_sse1_or_mmxext, 89 .name = "sse1x1", 90 .priority = 1, /* Has cache hints */ 91 }; 92 93 /* 94 * Unrolled-by-2 SSE1 implementation 95 */ 96 static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) 97 { 98 u8 **dptr = (u8 **)ptrs; 99 u8 *p, *q; 100 int d, z, z0; 101 102 z0 = disks - 3; /* Highest data disk */ 103 p = dptr[z0+1]; /* XOR parity */ 104 q = dptr[z0+2]; /* RS syndrome */ 105 106 kernel_fpu_begin(); 107 108 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); 109 asm volatile("pxor %mm5,%mm5"); /* Zero temp */ 110 asm volatile("pxor %mm7,%mm7"); /* Zero temp */ 111 112 /* We uniformly assume a single prefetch covers at least 16 bytes */ 113 for ( d = 0 ; d < bytes ; d += 16 ) { 114 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); 115 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ 116 asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */ 117 asm volatile("movq %mm2,%mm4"); /* Q[0] */ 118 asm volatile("movq %mm3,%mm6"); /* Q[1] */ 119 for ( z = z0-1 ; z >= 0 ; z-- ) { 120 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 121 asm volatile("pcmpgtb %mm4,%mm5"); 122 asm volatile("pcmpgtb %mm6,%mm7"); 123 asm volatile("paddb %mm4,%mm4"); 124 asm volatile("paddb %mm6,%mm6"); 125 asm volatile("pand %mm0,%mm5"); 126 asm volatile("pand %mm0,%mm7"); 127 asm volatile("pxor %mm5,%mm4"); 128 asm volatile("pxor %mm7,%mm6"); 129 asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); 130 asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); 131 asm volatile("pxor %mm5,%mm2"); 132 asm volatile("pxor %mm7,%mm3"); 133 asm volatile("pxor %mm5,%mm4"); 134 asm volatile("pxor %mm7,%mm6"); 135 asm volatile("pxor %mm5,%mm5"); 136 asm volatile("pxor %mm7,%mm7"); 137 } 138 asm volatile("movntq %%mm2,%0" : "=m" (p[d])); 139 asm volatile("movntq %%mm3,%0" : "=m" (p[d+8])); 140 asm volatile("movntq %%mm4,%0" : "=m" (q[d])); 141 asm volatile("movntq %%mm6,%0" : "=m" (q[d+8])); 142 } 143 144 asm volatile("sfence" : :: "memory"); 145 kernel_fpu_end(); 146 } 147 148 const struct raid6_calls raid6_sse1x2 = { 149 .gen_syndrome = raid6_sse12_gen_syndrome, 150 .valid = raid6_have_sse1_or_mmxext, 151 .name = "sse1x2", 152 .priority = 1, /* Has cache hints */ 153 }; 154