xref: /linux/lib/raid6/recov_rvv.c (revision 119b1e61a769aa98e68599f44721661a4d8c55f3)
1*6093faafSChunyan Zhang // SPDX-License-Identifier: GPL-2.0-only
2*6093faafSChunyan Zhang /*
3*6093faafSChunyan Zhang  * Copyright 2024 Institute of Software, CAS.
4*6093faafSChunyan Zhang  * Author: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
5*6093faafSChunyan Zhang  */
6*6093faafSChunyan Zhang 
7*6093faafSChunyan Zhang #include <asm/simd.h>
8*6093faafSChunyan Zhang #include <asm/vector.h>
9*6093faafSChunyan Zhang #include <crypto/internal/simd.h>
10*6093faafSChunyan Zhang #include <linux/raid/pq.h>
11*6093faafSChunyan Zhang 
rvv_has_vector(void)12*6093faafSChunyan Zhang static int rvv_has_vector(void)
13*6093faafSChunyan Zhang {
14*6093faafSChunyan Zhang 	return has_vector();
15*6093faafSChunyan Zhang }
16*6093faafSChunyan Zhang 
__raid6_2data_recov_rvv(int bytes,u8 * p,u8 * q,u8 * dp,u8 * dq,const u8 * pbmul,const u8 * qmul)17*6093faafSChunyan Zhang static void __raid6_2data_recov_rvv(int bytes, u8 *p, u8 *q, u8 *dp,
18*6093faafSChunyan Zhang 				    u8 *dq, const u8 *pbmul,
19*6093faafSChunyan Zhang 				    const u8 *qmul)
20*6093faafSChunyan Zhang {
21*6093faafSChunyan Zhang 	asm volatile (".option	push\n"
22*6093faafSChunyan Zhang 		      ".option	arch,+v\n"
23*6093faafSChunyan Zhang 		      "vsetvli	x0, %[avl], e8, m1, ta, ma\n"
24*6093faafSChunyan Zhang 		      ".option	pop\n"
25*6093faafSChunyan Zhang 		      : :
26*6093faafSChunyan Zhang 		      [avl]"r"(16)
27*6093faafSChunyan Zhang 	);
28*6093faafSChunyan Zhang 
29*6093faafSChunyan Zhang 	/*
30*6093faafSChunyan Zhang 	 * while ( bytes-- ) {
31*6093faafSChunyan Zhang 	 *	uint8_t px, qx, db;
32*6093faafSChunyan Zhang 	 *
33*6093faafSChunyan Zhang 	 *	px	  = *p ^ *dp;
34*6093faafSChunyan Zhang 	 *	qx	  = qmul[*q ^ *dq];
35*6093faafSChunyan Zhang 	 *	*dq++ = db = pbmul[px] ^ qx;
36*6093faafSChunyan Zhang 	 *	*dp++ = db ^ px;
37*6093faafSChunyan Zhang 	 *	p++; q++;
38*6093faafSChunyan Zhang 	 * }
39*6093faafSChunyan Zhang 	 */
40*6093faafSChunyan Zhang 	while (bytes) {
41*6093faafSChunyan Zhang 		/*
42*6093faafSChunyan Zhang 		 * v0:px, v1:dp,
43*6093faafSChunyan Zhang 		 * v2:qx, v3:dq,
44*6093faafSChunyan Zhang 		 * v4:vx, v5:vy,
45*6093faafSChunyan Zhang 		 * v6:qm0, v7:qm1,
46*6093faafSChunyan Zhang 		 * v8:pm0, v9:pm1,
47*6093faafSChunyan Zhang 		 * v14:p/qm[vx], v15:p/qm[vy]
48*6093faafSChunyan Zhang 		 */
49*6093faafSChunyan Zhang 		asm volatile (".option		push\n"
50*6093faafSChunyan Zhang 			      ".option		arch,+v\n"
51*6093faafSChunyan Zhang 			      "vle8.v		v0, (%[px])\n"
52*6093faafSChunyan Zhang 			      "vle8.v		v1, (%[dp])\n"
53*6093faafSChunyan Zhang 			      "vxor.vv		v0, v0, v1\n"
54*6093faafSChunyan Zhang 			      "vle8.v		v2, (%[qx])\n"
55*6093faafSChunyan Zhang 			      "vle8.v		v3, (%[dq])\n"
56*6093faafSChunyan Zhang 			      "vxor.vv		v4, v2, v3\n"
57*6093faafSChunyan Zhang 			      "vsrl.vi		v5, v4, 4\n"
58*6093faafSChunyan Zhang 			      "vand.vi		v4, v4, 0xf\n"
59*6093faafSChunyan Zhang 			      "vle8.v		v6, (%[qm0])\n"
60*6093faafSChunyan Zhang 			      "vle8.v		v7, (%[qm1])\n"
61*6093faafSChunyan Zhang 			      "vrgather.vv	v14, v6, v4\n" /* v14 = qm[vx] */
62*6093faafSChunyan Zhang 			      "vrgather.vv	v15, v7, v5\n" /* v15 = qm[vy] */
63*6093faafSChunyan Zhang 			      "vxor.vv		v2, v14, v15\n" /* v2 = qmul[*q ^ *dq] */
64*6093faafSChunyan Zhang 
65*6093faafSChunyan Zhang 			      "vsrl.vi		v5, v0, 4\n"
66*6093faafSChunyan Zhang 			      "vand.vi		v4, v0, 0xf\n"
67*6093faafSChunyan Zhang 			      "vle8.v		v8, (%[pm0])\n"
68*6093faafSChunyan Zhang 			      "vle8.v		v9, (%[pm1])\n"
69*6093faafSChunyan Zhang 			      "vrgather.vv	v14, v8, v4\n" /* v14 = pm[vx] */
70*6093faafSChunyan Zhang 			      "vrgather.vv	v15, v9, v5\n" /* v15 = pm[vy] */
71*6093faafSChunyan Zhang 			      "vxor.vv		v4, v14, v15\n" /* v4 = pbmul[px] */
72*6093faafSChunyan Zhang 			      "vxor.vv		v3, v4, v2\n" /* v3 = db = pbmul[px] ^ qx */
73*6093faafSChunyan Zhang 			      "vxor.vv		v1, v3, v0\n" /* v1 = db ^ px; */
74*6093faafSChunyan Zhang 			      "vse8.v		v3, (%[dq])\n"
75*6093faafSChunyan Zhang 			      "vse8.v		v1, (%[dp])\n"
76*6093faafSChunyan Zhang 			      ".option		pop\n"
77*6093faafSChunyan Zhang 			      : :
78*6093faafSChunyan Zhang 			      [px]"r"(p),
79*6093faafSChunyan Zhang 			      [dp]"r"(dp),
80*6093faafSChunyan Zhang 			      [qx]"r"(q),
81*6093faafSChunyan Zhang 			      [dq]"r"(dq),
82*6093faafSChunyan Zhang 			      [qm0]"r"(qmul),
83*6093faafSChunyan Zhang 			      [qm1]"r"(qmul + 16),
84*6093faafSChunyan Zhang 			      [pm0]"r"(pbmul),
85*6093faafSChunyan Zhang 			      [pm1]"r"(pbmul + 16)
86*6093faafSChunyan Zhang 			      :);
87*6093faafSChunyan Zhang 
88*6093faafSChunyan Zhang 		bytes -= 16;
89*6093faafSChunyan Zhang 		p += 16;
90*6093faafSChunyan Zhang 		q += 16;
91*6093faafSChunyan Zhang 		dp += 16;
92*6093faafSChunyan Zhang 		dq += 16;
93*6093faafSChunyan Zhang 	}
94*6093faafSChunyan Zhang }
95*6093faafSChunyan Zhang 
__raid6_datap_recov_rvv(int bytes,u8 * p,u8 * q,u8 * dq,const u8 * qmul)96*6093faafSChunyan Zhang static void __raid6_datap_recov_rvv(int bytes, u8 *p, u8 *q,
97*6093faafSChunyan Zhang 				    u8 *dq, const u8 *qmul)
98*6093faafSChunyan Zhang {
99*6093faafSChunyan Zhang 	asm volatile (".option	push\n"
100*6093faafSChunyan Zhang 		      ".option	arch,+v\n"
101*6093faafSChunyan Zhang 		      "vsetvli	x0, %[avl], e8, m1, ta, ma\n"
102*6093faafSChunyan Zhang 		      ".option	pop\n"
103*6093faafSChunyan Zhang 		      : :
104*6093faafSChunyan Zhang 		      [avl]"r"(16)
105*6093faafSChunyan Zhang 	);
106*6093faafSChunyan Zhang 
107*6093faafSChunyan Zhang 	/*
108*6093faafSChunyan Zhang 	 * while (bytes--) {
109*6093faafSChunyan Zhang 	 *  *p++ ^= *dq = qmul[*q ^ *dq];
110*6093faafSChunyan Zhang 	 *  q++; dq++;
111*6093faafSChunyan Zhang 	 * }
112*6093faafSChunyan Zhang 	 */
113*6093faafSChunyan Zhang 	while (bytes) {
114*6093faafSChunyan Zhang 		/*
115*6093faafSChunyan Zhang 		 * v0:vx, v1:vy,
116*6093faafSChunyan Zhang 		 * v2:dq, v3:p,
117*6093faafSChunyan Zhang 		 * v4:qm0, v5:qm1,
118*6093faafSChunyan Zhang 		 * v10:m[vx], v11:m[vy]
119*6093faafSChunyan Zhang 		 */
120*6093faafSChunyan Zhang 		asm volatile (".option		push\n"
121*6093faafSChunyan Zhang 			      ".option		arch,+v\n"
122*6093faafSChunyan Zhang 			      "vle8.v		v0, (%[vx])\n"
123*6093faafSChunyan Zhang 			      "vle8.v		v2, (%[dq])\n"
124*6093faafSChunyan Zhang 			      "vxor.vv		v0, v0, v2\n"
125*6093faafSChunyan Zhang 			      "vsrl.vi		v1, v0, 4\n"
126*6093faafSChunyan Zhang 			      "vand.vi		v0, v0, 0xf\n"
127*6093faafSChunyan Zhang 			      "vle8.v		v4, (%[qm0])\n"
128*6093faafSChunyan Zhang 			      "vle8.v		v5, (%[qm1])\n"
129*6093faafSChunyan Zhang 			      "vrgather.vv	v10, v4, v0\n"
130*6093faafSChunyan Zhang 			      "vrgather.vv	v11, v5, v1\n"
131*6093faafSChunyan Zhang 			      "vxor.vv		v0, v10, v11\n"
132*6093faafSChunyan Zhang 			      "vle8.v		v1, (%[vy])\n"
133*6093faafSChunyan Zhang 			      "vxor.vv		v1, v0, v1\n"
134*6093faafSChunyan Zhang 			      "vse8.v		v0, (%[dq])\n"
135*6093faafSChunyan Zhang 			      "vse8.v		v1, (%[vy])\n"
136*6093faafSChunyan Zhang 			      ".option		pop\n"
137*6093faafSChunyan Zhang 			      : :
138*6093faafSChunyan Zhang 			      [vx]"r"(q),
139*6093faafSChunyan Zhang 			      [vy]"r"(p),
140*6093faafSChunyan Zhang 			      [dq]"r"(dq),
141*6093faafSChunyan Zhang 			      [qm0]"r"(qmul),
142*6093faafSChunyan Zhang 			      [qm1]"r"(qmul + 16)
143*6093faafSChunyan Zhang 			      :);
144*6093faafSChunyan Zhang 
145*6093faafSChunyan Zhang 		bytes -= 16;
146*6093faafSChunyan Zhang 		p += 16;
147*6093faafSChunyan Zhang 		q += 16;
148*6093faafSChunyan Zhang 		dq += 16;
149*6093faafSChunyan Zhang 	}
150*6093faafSChunyan Zhang }
151*6093faafSChunyan Zhang 
raid6_2data_recov_rvv(int disks,size_t bytes,int faila,int failb,void ** ptrs)152*6093faafSChunyan Zhang static void raid6_2data_recov_rvv(int disks, size_t bytes, int faila,
153*6093faafSChunyan Zhang 				  int failb, void **ptrs)
154*6093faafSChunyan Zhang {
155*6093faafSChunyan Zhang 	u8 *p, *q, *dp, *dq;
156*6093faafSChunyan Zhang 	const u8 *pbmul;	/* P multiplier table for B data */
157*6093faafSChunyan Zhang 	const u8 *qmul;		/* Q multiplier table (for both) */
158*6093faafSChunyan Zhang 
159*6093faafSChunyan Zhang 	p = (u8 *)ptrs[disks - 2];
160*6093faafSChunyan Zhang 	q = (u8 *)ptrs[disks - 1];
161*6093faafSChunyan Zhang 
162*6093faafSChunyan Zhang 	/*
163*6093faafSChunyan Zhang 	 * Compute syndrome with zero for the missing data pages
164*6093faafSChunyan Zhang 	 * Use the dead data pages as temporary storage for
165*6093faafSChunyan Zhang 	 * delta p and delta q
166*6093faafSChunyan Zhang 	 */
167*6093faafSChunyan Zhang 	dp = (u8 *)ptrs[faila];
168*6093faafSChunyan Zhang 	ptrs[faila] = (void *)raid6_empty_zero_page;
169*6093faafSChunyan Zhang 	ptrs[disks - 2] = dp;
170*6093faafSChunyan Zhang 	dq = (u8 *)ptrs[failb];
171*6093faafSChunyan Zhang 	ptrs[failb] = (void *)raid6_empty_zero_page;
172*6093faafSChunyan Zhang 	ptrs[disks - 1] = dq;
173*6093faafSChunyan Zhang 
174*6093faafSChunyan Zhang 	raid6_call.gen_syndrome(disks, bytes, ptrs);
175*6093faafSChunyan Zhang 
176*6093faafSChunyan Zhang 	/* Restore pointer table */
177*6093faafSChunyan Zhang 	ptrs[faila]     = dp;
178*6093faafSChunyan Zhang 	ptrs[failb]     = dq;
179*6093faafSChunyan Zhang 	ptrs[disks - 2] = p;
180*6093faafSChunyan Zhang 	ptrs[disks - 1] = q;
181*6093faafSChunyan Zhang 
182*6093faafSChunyan Zhang 	/* Now, pick the proper data tables */
183*6093faafSChunyan Zhang 	pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
184*6093faafSChunyan Zhang 	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
185*6093faafSChunyan Zhang 					 raid6_gfexp[failb]]];
186*6093faafSChunyan Zhang 
187*6093faafSChunyan Zhang 	kernel_vector_begin();
188*6093faafSChunyan Zhang 	__raid6_2data_recov_rvv(bytes, p, q, dp, dq, pbmul, qmul);
189*6093faafSChunyan Zhang 	kernel_vector_end();
190*6093faafSChunyan Zhang }
191*6093faafSChunyan Zhang 
raid6_datap_recov_rvv(int disks,size_t bytes,int faila,void ** ptrs)192*6093faafSChunyan Zhang static void raid6_datap_recov_rvv(int disks, size_t bytes, int faila,
193*6093faafSChunyan Zhang 				  void **ptrs)
194*6093faafSChunyan Zhang {
195*6093faafSChunyan Zhang 	u8 *p, *q, *dq;
196*6093faafSChunyan Zhang 	const u8 *qmul;		/* Q multiplier table */
197*6093faafSChunyan Zhang 
198*6093faafSChunyan Zhang 	p = (u8 *)ptrs[disks - 2];
199*6093faafSChunyan Zhang 	q = (u8 *)ptrs[disks - 1];
200*6093faafSChunyan Zhang 
201*6093faafSChunyan Zhang 	/*
202*6093faafSChunyan Zhang 	 * Compute syndrome with zero for the missing data page
203*6093faafSChunyan Zhang 	 * Use the dead data page as temporary storage for delta q
204*6093faafSChunyan Zhang 	 */
205*6093faafSChunyan Zhang 	dq = (u8 *)ptrs[faila];
206*6093faafSChunyan Zhang 	ptrs[faila] = (void *)raid6_empty_zero_page;
207*6093faafSChunyan Zhang 	ptrs[disks - 1] = dq;
208*6093faafSChunyan Zhang 
209*6093faafSChunyan Zhang 	raid6_call.gen_syndrome(disks, bytes, ptrs);
210*6093faafSChunyan Zhang 
211*6093faafSChunyan Zhang 	/* Restore pointer table */
212*6093faafSChunyan Zhang 	ptrs[faila]     = dq;
213*6093faafSChunyan Zhang 	ptrs[disks - 1] = q;
214*6093faafSChunyan Zhang 
215*6093faafSChunyan Zhang 	/* Now, pick the proper data tables */
216*6093faafSChunyan Zhang 	qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
217*6093faafSChunyan Zhang 
218*6093faafSChunyan Zhang 	kernel_vector_begin();
219*6093faafSChunyan Zhang 	__raid6_datap_recov_rvv(bytes, p, q, dq, qmul);
220*6093faafSChunyan Zhang 	kernel_vector_end();
221*6093faafSChunyan Zhang }
222*6093faafSChunyan Zhang 
223*6093faafSChunyan Zhang const struct raid6_recov_calls raid6_recov_rvv = {
224*6093faafSChunyan Zhang 	.data2		= raid6_2data_recov_rvv,
225*6093faafSChunyan Zhang 	.datap		= raid6_datap_recov_rvv,
226*6093faafSChunyan Zhang 	.valid		= rvv_has_vector,
227*6093faafSChunyan Zhang 	.name		= "rvv",
228*6093faafSChunyan Zhang 	.priority	= 1,
229*6093faafSChunyan Zhang };
230