xref: /linux/arch/loongarch/include/asm/xor.h (revision 03c11eb3b16dc0058589751dfd91f254be2be613)
1*75ded18aSWANG Xuerui /* SPDX-License-Identifier: GPL-2.0-or-later */
2*75ded18aSWANG Xuerui /*
3*75ded18aSWANG Xuerui  * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
4*75ded18aSWANG Xuerui  */
5*75ded18aSWANG Xuerui #ifndef _ASM_LOONGARCH_XOR_H
6*75ded18aSWANG Xuerui #define _ASM_LOONGARCH_XOR_H
7*75ded18aSWANG Xuerui 
8*75ded18aSWANG Xuerui #include <asm/cpu-features.h>
9*75ded18aSWANG Xuerui #include <asm/xor_simd.h>
10*75ded18aSWANG Xuerui 
11*75ded18aSWANG Xuerui #ifdef CONFIG_CPU_HAS_LSX
12*75ded18aSWANG Xuerui static struct xor_block_template xor_block_lsx = {
13*75ded18aSWANG Xuerui 	.name = "lsx",
14*75ded18aSWANG Xuerui 	.do_2 = xor_lsx_2,
15*75ded18aSWANG Xuerui 	.do_3 = xor_lsx_3,
16*75ded18aSWANG Xuerui 	.do_4 = xor_lsx_4,
17*75ded18aSWANG Xuerui 	.do_5 = xor_lsx_5,
18*75ded18aSWANG Xuerui };
19*75ded18aSWANG Xuerui 
20*75ded18aSWANG Xuerui #define XOR_SPEED_LSX()					\
21*75ded18aSWANG Xuerui 	do {						\
22*75ded18aSWANG Xuerui 		if (cpu_has_lsx)			\
23*75ded18aSWANG Xuerui 			xor_speed(&xor_block_lsx);	\
24*75ded18aSWANG Xuerui 	} while (0)
25*75ded18aSWANG Xuerui #else /* CONFIG_CPU_HAS_LSX */
26*75ded18aSWANG Xuerui #define XOR_SPEED_LSX()
27*75ded18aSWANG Xuerui #endif /* CONFIG_CPU_HAS_LSX */
28*75ded18aSWANG Xuerui 
29*75ded18aSWANG Xuerui #ifdef CONFIG_CPU_HAS_LASX
30*75ded18aSWANG Xuerui static struct xor_block_template xor_block_lasx = {
31*75ded18aSWANG Xuerui 	.name = "lasx",
32*75ded18aSWANG Xuerui 	.do_2 = xor_lasx_2,
33*75ded18aSWANG Xuerui 	.do_3 = xor_lasx_3,
34*75ded18aSWANG Xuerui 	.do_4 = xor_lasx_4,
35*75ded18aSWANG Xuerui 	.do_5 = xor_lasx_5,
36*75ded18aSWANG Xuerui };
37*75ded18aSWANG Xuerui 
38*75ded18aSWANG Xuerui #define XOR_SPEED_LASX()					\
39*75ded18aSWANG Xuerui 	do {							\
40*75ded18aSWANG Xuerui 		if (cpu_has_lasx)				\
41*75ded18aSWANG Xuerui 			xor_speed(&xor_block_lasx);		\
42*75ded18aSWANG Xuerui 	} while (0)
43*75ded18aSWANG Xuerui #else /* CONFIG_CPU_HAS_LASX */
44*75ded18aSWANG Xuerui #define XOR_SPEED_LASX()
45*75ded18aSWANG Xuerui #endif /* CONFIG_CPU_HAS_LASX */
46*75ded18aSWANG Xuerui 
47*75ded18aSWANG Xuerui /*
48*75ded18aSWANG Xuerui  * For grins, also test the generic routines.
49*75ded18aSWANG Xuerui  *
50*75ded18aSWANG Xuerui  * More importantly: it cannot be ruled out at this point of time, that some
51*75ded18aSWANG Xuerui  * future (maybe reduced) models could run the vector algorithms slower than
52*75ded18aSWANG Xuerui  * the scalar ones, maybe for errata or micro-op reasons. It may be
53*75ded18aSWANG Xuerui  * appropriate to revisit this after one or two more uarch generations.
54*75ded18aSWANG Xuerui  */
55*75ded18aSWANG Xuerui #include <asm-generic/xor.h>
56*75ded18aSWANG Xuerui 
57*75ded18aSWANG Xuerui #undef XOR_TRY_TEMPLATES
58*75ded18aSWANG Xuerui #define XOR_TRY_TEMPLATES				\
59*75ded18aSWANG Xuerui do {							\
60*75ded18aSWANG Xuerui 	xor_speed(&xor_block_8regs);			\
61*75ded18aSWANG Xuerui 	xor_speed(&xor_block_8regs_p);			\
62*75ded18aSWANG Xuerui 	xor_speed(&xor_block_32regs);			\
63*75ded18aSWANG Xuerui 	xor_speed(&xor_block_32regs_p);			\
64*75ded18aSWANG Xuerui 	XOR_SPEED_LSX();				\
65*75ded18aSWANG Xuerui 	XOR_SPEED_LASX();				\
66*75ded18aSWANG Xuerui } while (0)
67*75ded18aSWANG Xuerui 
68*75ded18aSWANG Xuerui #endif /* _ASM_LOONGARCH_XOR_H */
69