xref: /linux/arch/loongarch/lib/xor_simd.c (revision 03c11eb3b16dc0058589751dfd91f254be2be613)
1*75ded18aSWANG Xuerui // SPDX-License-Identifier: GPL-2.0-or-later
2*75ded18aSWANG Xuerui /*
3*75ded18aSWANG Xuerui  * LoongArch SIMD XOR operations
4*75ded18aSWANG Xuerui  *
5*75ded18aSWANG Xuerui  * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
6*75ded18aSWANG Xuerui  */
7*75ded18aSWANG Xuerui 
8*75ded18aSWANG Xuerui #include "xor_simd.h"
9*75ded18aSWANG Xuerui 
10*75ded18aSWANG Xuerui /*
11*75ded18aSWANG Xuerui  * Process one cache line (64 bytes) per loop. This is assuming all future
12*75ded18aSWANG Xuerui  * popular LoongArch cores are similar performance-characteristics-wise to the
13*75ded18aSWANG Xuerui  * current models.
14*75ded18aSWANG Xuerui  */
15*75ded18aSWANG Xuerui #define LINE_WIDTH 64
16*75ded18aSWANG Xuerui 
17*75ded18aSWANG Xuerui #ifdef CONFIG_CPU_HAS_LSX
18*75ded18aSWANG Xuerui 
19*75ded18aSWANG Xuerui #define LD(reg, base, offset)	\
20*75ded18aSWANG Xuerui 	"vld $vr" #reg ", %[" #base "], " #offset "\n\t"
21*75ded18aSWANG Xuerui #define ST(reg, base, offset)	\
22*75ded18aSWANG Xuerui 	"vst $vr" #reg ", %[" #base "], " #offset "\n\t"
23*75ded18aSWANG Xuerui #define XOR(dj, k)	"vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
24*75ded18aSWANG Xuerui 
25*75ded18aSWANG Xuerui #define LD_INOUT_LINE(base)	\
26*75ded18aSWANG Xuerui 	LD(0, base, 0)		\
27*75ded18aSWANG Xuerui 	LD(1, base, 16)		\
28*75ded18aSWANG Xuerui 	LD(2, base, 32)		\
29*75ded18aSWANG Xuerui 	LD(3, base, 48)
30*75ded18aSWANG Xuerui 
31*75ded18aSWANG Xuerui #define LD_AND_XOR_LINE(base)	\
32*75ded18aSWANG Xuerui 	LD(4, base, 0)		\
33*75ded18aSWANG Xuerui 	LD(5, base, 16)		\
34*75ded18aSWANG Xuerui 	LD(6, base, 32)		\
35*75ded18aSWANG Xuerui 	LD(7, base, 48)		\
36*75ded18aSWANG Xuerui 	XOR(0, 4)		\
37*75ded18aSWANG Xuerui 	XOR(1, 5)		\
38*75ded18aSWANG Xuerui 	XOR(2, 6)		\
39*75ded18aSWANG Xuerui 	XOR(3, 7)
40*75ded18aSWANG Xuerui 
41*75ded18aSWANG Xuerui #define ST_LINE(base)		\
42*75ded18aSWANG Xuerui 	ST(0, base, 0)		\
43*75ded18aSWANG Xuerui 	ST(1, base, 16)		\
44*75ded18aSWANG Xuerui 	ST(2, base, 32)		\
45*75ded18aSWANG Xuerui 	ST(3, base, 48)
46*75ded18aSWANG Xuerui 
47*75ded18aSWANG Xuerui #define XOR_FUNC_NAME(nr) __xor_lsx_##nr
48*75ded18aSWANG Xuerui #include "xor_template.c"
49*75ded18aSWANG Xuerui 
50*75ded18aSWANG Xuerui #undef LD
51*75ded18aSWANG Xuerui #undef ST
52*75ded18aSWANG Xuerui #undef XOR
53*75ded18aSWANG Xuerui #undef LD_INOUT_LINE
54*75ded18aSWANG Xuerui #undef LD_AND_XOR_LINE
55*75ded18aSWANG Xuerui #undef ST_LINE
56*75ded18aSWANG Xuerui #undef XOR_FUNC_NAME
57*75ded18aSWANG Xuerui 
58*75ded18aSWANG Xuerui #endif /* CONFIG_CPU_HAS_LSX */
59*75ded18aSWANG Xuerui 
60*75ded18aSWANG Xuerui #ifdef CONFIG_CPU_HAS_LASX
61*75ded18aSWANG Xuerui 
62*75ded18aSWANG Xuerui #define LD(reg, base, offset)	\
63*75ded18aSWANG Xuerui 	"xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
64*75ded18aSWANG Xuerui #define ST(reg, base, offset)	\
65*75ded18aSWANG Xuerui 	"xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
66*75ded18aSWANG Xuerui #define XOR(dj, k)	"xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
67*75ded18aSWANG Xuerui 
68*75ded18aSWANG Xuerui #define LD_INOUT_LINE(base)	\
69*75ded18aSWANG Xuerui 	LD(0, base, 0)		\
70*75ded18aSWANG Xuerui 	LD(1, base, 32)
71*75ded18aSWANG Xuerui 
72*75ded18aSWANG Xuerui #define LD_AND_XOR_LINE(base)	\
73*75ded18aSWANG Xuerui 	LD(2, base, 0)		\
74*75ded18aSWANG Xuerui 	LD(3, base, 32)		\
75*75ded18aSWANG Xuerui 	XOR(0, 2)		\
76*75ded18aSWANG Xuerui 	XOR(1, 3)
77*75ded18aSWANG Xuerui 
78*75ded18aSWANG Xuerui #define ST_LINE(base)		\
79*75ded18aSWANG Xuerui 	ST(0, base, 0)		\
80*75ded18aSWANG Xuerui 	ST(1, base, 32)
81*75ded18aSWANG Xuerui 
82*75ded18aSWANG Xuerui #define XOR_FUNC_NAME(nr) __xor_lasx_##nr
83*75ded18aSWANG Xuerui #include "xor_template.c"
84*75ded18aSWANG Xuerui 
85*75ded18aSWANG Xuerui #undef LD
86*75ded18aSWANG Xuerui #undef ST
87*75ded18aSWANG Xuerui #undef XOR
88*75ded18aSWANG Xuerui #undef LD_INOUT_LINE
89*75ded18aSWANG Xuerui #undef LD_AND_XOR_LINE
90*75ded18aSWANG Xuerui #undef ST_LINE
91*75ded18aSWANG Xuerui #undef XOR_FUNC_NAME
92*75ded18aSWANG Xuerui 
93*75ded18aSWANG Xuerui #endif /* CONFIG_CPU_HAS_LASX */
94