xref: /linux/arch/riscv/include/asm/runtime-const.h (revision a44fb5722199de8338d991db5ad3d509192179bb)
1*a44fb572SCharlie Jenkins /* SPDX-License-Identifier: GPL-2.0 */
2*a44fb572SCharlie Jenkins #ifndef _ASM_RISCV_RUNTIME_CONST_H
3*a44fb572SCharlie Jenkins #define _ASM_RISCV_RUNTIME_CONST_H
4*a44fb572SCharlie Jenkins 
5*a44fb572SCharlie Jenkins #include <asm/asm.h>
6*a44fb572SCharlie Jenkins #include <asm/alternative.h>
7*a44fb572SCharlie Jenkins #include <asm/cacheflush.h>
8*a44fb572SCharlie Jenkins #include <asm/insn-def.h>
9*a44fb572SCharlie Jenkins #include <linux/memory.h>
10*a44fb572SCharlie Jenkins #include <asm/text-patching.h>
11*a44fb572SCharlie Jenkins 
12*a44fb572SCharlie Jenkins #include <linux/uaccess.h>
13*a44fb572SCharlie Jenkins 
14*a44fb572SCharlie Jenkins #ifdef CONFIG_32BIT
15*a44fb572SCharlie Jenkins #define runtime_const_ptr(sym)					\
16*a44fb572SCharlie Jenkins ({								\
17*a44fb572SCharlie Jenkins 	typeof(sym) __ret;					\
18*a44fb572SCharlie Jenkins 	asm_inline(".option push\n\t"				\
19*a44fb572SCharlie Jenkins 		".option norvc\n\t"				\
20*a44fb572SCharlie Jenkins 		"1:\t"						\
21*a44fb572SCharlie Jenkins 		"lui	%[__ret],0x89abd\n\t"			\
22*a44fb572SCharlie Jenkins 		"addi	%[__ret],%[__ret],-0x211\n\t"		\
23*a44fb572SCharlie Jenkins 		".option pop\n\t"				\
24*a44fb572SCharlie Jenkins 		".pushsection runtime_ptr_" #sym ",\"a\"\n\t"	\
25*a44fb572SCharlie Jenkins 		".long 1b - .\n\t"				\
26*a44fb572SCharlie Jenkins 		".popsection"					\
27*a44fb572SCharlie Jenkins 		: [__ret] "=r" (__ret));			\
28*a44fb572SCharlie Jenkins 	__ret;							\
29*a44fb572SCharlie Jenkins })
30*a44fb572SCharlie Jenkins #else
31*a44fb572SCharlie Jenkins /*
32*a44fb572SCharlie Jenkins  * Loading 64-bit constants into a register from immediates is a non-trivial
33*a44fb572SCharlie Jenkins  * task on riscv64. To get it somewhat performant, load 32 bits into two
34*a44fb572SCharlie Jenkins  * different registers and then combine the results.
35*a44fb572SCharlie Jenkins  *
36*a44fb572SCharlie Jenkins  * If the processor supports the Zbkb extension, we can combine the final
37*a44fb572SCharlie Jenkins  * "slli,slli,srli,add" into the single "pack" instruction. If the processor
38*a44fb572SCharlie Jenkins  * doesn't support Zbkb but does support the Zbb extension, we can
39*a44fb572SCharlie Jenkins  * combine the final "slli,srli,add" into one instruction "add.uw".
40*a44fb572SCharlie Jenkins  */
41*a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_PREAMBLE				\
42*a44fb572SCharlie Jenkins 	".option push\n\t"					\
43*a44fb572SCharlie Jenkins 	".option norvc\n\t"					\
44*a44fb572SCharlie Jenkins 	"1:\t"							\
45*a44fb572SCharlie Jenkins 	"lui	%[__ret],0x89abd\n\t"				\
46*a44fb572SCharlie Jenkins 	"lui	%[__tmp],0x1234\n\t"				\
47*a44fb572SCharlie Jenkins 	"addiw	%[__ret],%[__ret],-0x211\n\t"			\
48*a44fb572SCharlie Jenkins 	"addiw	%[__tmp],%[__tmp],0x567\n\t"			\
49*a44fb572SCharlie Jenkins 
50*a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_BASE				\
51*a44fb572SCharlie Jenkins 	"slli	%[__tmp],%[__tmp],32\n\t"			\
52*a44fb572SCharlie Jenkins 	"slli	%[__ret],%[__ret],32\n\t"			\
53*a44fb572SCharlie Jenkins 	"srli	%[__ret],%[__ret],32\n\t"			\
54*a44fb572SCharlie Jenkins 	"add	%[__ret],%[__ret],%[__tmp]\n\t"			\
55*a44fb572SCharlie Jenkins 
56*a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_ZBA				\
57*a44fb572SCharlie Jenkins 	".option push\n\t"					\
58*a44fb572SCharlie Jenkins 	".option arch,+zba\n\t"					\
59*a44fb572SCharlie Jenkins 	"slli	%[__tmp],%[__tmp],32\n\t"			\
60*a44fb572SCharlie Jenkins 	"add.uw %[__ret],%[__ret],%[__tmp]\n\t"			\
61*a44fb572SCharlie Jenkins 	"nop\n\t"						\
62*a44fb572SCharlie Jenkins 	"nop\n\t"						\
63*a44fb572SCharlie Jenkins 	".option pop\n\t"					\
64*a44fb572SCharlie Jenkins 
65*a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_ZBKB				\
66*a44fb572SCharlie Jenkins 	".option push\n\t"					\
67*a44fb572SCharlie Jenkins 	".option arch,+zbkb\n\t"				\
68*a44fb572SCharlie Jenkins 	"pack	%[__ret],%[__ret],%[__tmp]\n\t"			\
69*a44fb572SCharlie Jenkins 	"nop\n\t"						\
70*a44fb572SCharlie Jenkins 	"nop\n\t"						\
71*a44fb572SCharlie Jenkins 	"nop\n\t"						\
72*a44fb572SCharlie Jenkins 	".option pop\n\t"					\
73*a44fb572SCharlie Jenkins 
74*a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_POSTAMBLE(sym)			\
75*a44fb572SCharlie Jenkins 	".option pop\n\t"					\
76*a44fb572SCharlie Jenkins 	".pushsection runtime_ptr_" #sym ",\"a\"\n\t"		\
77*a44fb572SCharlie Jenkins 	".long 1b - .\n\t"					\
78*a44fb572SCharlie Jenkins 	".popsection"						\
79*a44fb572SCharlie Jenkins 
80*a44fb572SCharlie Jenkins #if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_RISCV_ISA_ZBKB)
81*a44fb572SCharlie Jenkins #define runtime_const_ptr(sym)						\
82*a44fb572SCharlie Jenkins ({									\
83*a44fb572SCharlie Jenkins 	typeof(sym) __ret, __tmp;					\
84*a44fb572SCharlie Jenkins 	asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE			\
85*a44fb572SCharlie Jenkins 		ALTERNATIVE_2(						\
86*a44fb572SCharlie Jenkins 			RISCV_RUNTIME_CONST_64_BASE,			\
87*a44fb572SCharlie Jenkins 			RISCV_RUNTIME_CONST_64_ZBA,			\
88*a44fb572SCharlie Jenkins 			0, RISCV_ISA_EXT_ZBA, 1,			\
89*a44fb572SCharlie Jenkins 			RISCV_RUNTIME_CONST_64_ZBKB,			\
90*a44fb572SCharlie Jenkins 			0, RISCV_ISA_EXT_ZBKB, 1			\
91*a44fb572SCharlie Jenkins 		)							\
92*a44fb572SCharlie Jenkins 		RISCV_RUNTIME_CONST_64_POSTAMBLE(sym)			\
93*a44fb572SCharlie Jenkins 		: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp));		\
94*a44fb572SCharlie Jenkins 	__ret;								\
95*a44fb572SCharlie Jenkins })
96*a44fb572SCharlie Jenkins #elif defined(CONFIG_RISCV_ISA_ZBA)
97*a44fb572SCharlie Jenkins #define runtime_const_ptr(sym)						\
98*a44fb572SCharlie Jenkins ({									\
99*a44fb572SCharlie Jenkins 	typeof(sym) __ret, __tmp;					\
100*a44fb572SCharlie Jenkins 	asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE			\
101*a44fb572SCharlie Jenkins 		ALTERNATIVE(						\
102*a44fb572SCharlie Jenkins 			RISCV_RUNTIME_CONST_64_BASE,			\
103*a44fb572SCharlie Jenkins 			RISCV_RUNTIME_CONST_64_ZBA,			\
104*a44fb572SCharlie Jenkins 			0, RISCV_ISA_EXT_ZBA, 1				\
105*a44fb572SCharlie Jenkins 		)							\
106*a44fb572SCharlie Jenkins 		RISCV_RUNTIME_CONST_64_POSTAMBLE(sym)			\
107*a44fb572SCharlie Jenkins 		: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp));		\
108*a44fb572SCharlie Jenkins 	__ret;								\
109*a44fb572SCharlie Jenkins })
110*a44fb572SCharlie Jenkins #elif defined(CONFIG_RISCV_ISA_ZBKB)
111*a44fb572SCharlie Jenkins #define runtime_const_ptr(sym)						\
112*a44fb572SCharlie Jenkins ({									\
113*a44fb572SCharlie Jenkins 	typeof(sym) __ret, __tmp;					\
114*a44fb572SCharlie Jenkins 	asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE			\
115*a44fb572SCharlie Jenkins 		ALTERNATIVE(						\
116*a44fb572SCharlie Jenkins 			RISCV_RUNTIME_CONST_64_BASE,			\
117*a44fb572SCharlie Jenkins 			RISCV_RUNTIME_CONST_64_ZBKB,			\
118*a44fb572SCharlie Jenkins 			0, RISCV_ISA_EXT_ZBKB, 1			\
119*a44fb572SCharlie Jenkins 		)							\
120*a44fb572SCharlie Jenkins 		RISCV_RUNTIME_CONST_64_POSTAMBLE(sym)			\
121*a44fb572SCharlie Jenkins 		: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp));		\
122*a44fb572SCharlie Jenkins 	__ret;								\
123*a44fb572SCharlie Jenkins })
124*a44fb572SCharlie Jenkins #else
125*a44fb572SCharlie Jenkins #define runtime_const_ptr(sym)						\
126*a44fb572SCharlie Jenkins ({									\
127*a44fb572SCharlie Jenkins 	typeof(sym) __ret, __tmp;					\
128*a44fb572SCharlie Jenkins 	asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE			\
129*a44fb572SCharlie Jenkins 		RISCV_RUNTIME_CONST_64_BASE				\
130*a44fb572SCharlie Jenkins 		RISCV_RUNTIME_CONST_64_POSTAMBLE(sym)			\
131*a44fb572SCharlie Jenkins 		: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp));		\
132*a44fb572SCharlie Jenkins 	__ret;								\
133*a44fb572SCharlie Jenkins })
134*a44fb572SCharlie Jenkins #endif
135*a44fb572SCharlie Jenkins #endif
136*a44fb572SCharlie Jenkins 
137*a44fb572SCharlie Jenkins #define runtime_const_shift_right_32(val, sym)			\
138*a44fb572SCharlie Jenkins ({								\
139*a44fb572SCharlie Jenkins 	u32 __ret;						\
140*a44fb572SCharlie Jenkins 	asm_inline(".option push\n\t"				\
141*a44fb572SCharlie Jenkins 		".option norvc\n\t"				\
142*a44fb572SCharlie Jenkins 		"1:\t"						\
143*a44fb572SCharlie Jenkins 		SRLI " %[__ret],%[__val],12\n\t"		\
144*a44fb572SCharlie Jenkins 		".option pop\n\t"				\
145*a44fb572SCharlie Jenkins 		".pushsection runtime_shift_" #sym ",\"a\"\n\t"	\
146*a44fb572SCharlie Jenkins 		".long 1b - .\n\t"				\
147*a44fb572SCharlie Jenkins 		".popsection"					\
148*a44fb572SCharlie Jenkins 		: [__ret] "=r" (__ret)				\
149*a44fb572SCharlie Jenkins 		: [__val] "r" (val));				\
150*a44fb572SCharlie Jenkins 	__ret;							\
151*a44fb572SCharlie Jenkins })
152*a44fb572SCharlie Jenkins 
153*a44fb572SCharlie Jenkins #define runtime_const_init(type, sym) do {			\
154*a44fb572SCharlie Jenkins 	extern s32 __start_runtime_##type##_##sym[];		\
155*a44fb572SCharlie Jenkins 	extern s32 __stop_runtime_##type##_##sym[];		\
156*a44fb572SCharlie Jenkins 								\
157*a44fb572SCharlie Jenkins 	runtime_const_fixup(__runtime_fixup_##type,		\
158*a44fb572SCharlie Jenkins 			    (unsigned long)(sym),		\
159*a44fb572SCharlie Jenkins 			    __start_runtime_##type##_##sym,	\
160*a44fb572SCharlie Jenkins 			    __stop_runtime_##type##_##sym);	\
161*a44fb572SCharlie Jenkins } while (0)
162*a44fb572SCharlie Jenkins 
163*a44fb572SCharlie Jenkins static inline void __runtime_fixup_caches(void *where, unsigned int insns)
164*a44fb572SCharlie Jenkins {
165*a44fb572SCharlie Jenkins 	/* On riscv there are currently only cache-wide flushes so va is ignored. */
166*a44fb572SCharlie Jenkins 	__always_unused uintptr_t va = (uintptr_t)where;
167*a44fb572SCharlie Jenkins 
168*a44fb572SCharlie Jenkins 	flush_icache_range(va, va + 4 * insns);
169*a44fb572SCharlie Jenkins }
170*a44fb572SCharlie Jenkins 
171*a44fb572SCharlie Jenkins /*
172*a44fb572SCharlie Jenkins  * The 32-bit immediate is stored in a lui+addi pairing.
173*a44fb572SCharlie Jenkins  * lui holds the upper 20 bits of the immediate in the first 20 bits of the instruction.
174*a44fb572SCharlie Jenkins  * addi holds the lower 12 bits of the immediate in the first 12 bits of the instruction.
175*a44fb572SCharlie Jenkins  */
176*a44fb572SCharlie Jenkins static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, unsigned int val)
177*a44fb572SCharlie Jenkins {
178*a44fb572SCharlie Jenkins 	unsigned int lower_immediate, upper_immediate;
179*a44fb572SCharlie Jenkins 	u32 lui_insn, addi_insn, addi_insn_mask;
180*a44fb572SCharlie Jenkins 	__le32 lui_res, addi_res;
181*a44fb572SCharlie Jenkins 
182*a44fb572SCharlie Jenkins 	/* Mask out upper 12 bit of addi */
183*a44fb572SCharlie Jenkins 	addi_insn_mask = 0x000fffff;
184*a44fb572SCharlie Jenkins 
185*a44fb572SCharlie Jenkins 	lui_insn = (u32)le16_to_cpu(lui_parcel[0]) | (u32)le16_to_cpu(lui_parcel[1]) << 16;
186*a44fb572SCharlie Jenkins 	addi_insn = (u32)le16_to_cpu(addi_parcel[0]) | (u32)le16_to_cpu(addi_parcel[1]) << 16;
187*a44fb572SCharlie Jenkins 
188*a44fb572SCharlie Jenkins 	lower_immediate = sign_extend32(val, 11);
189*a44fb572SCharlie Jenkins 	upper_immediate = (val - lower_immediate);
190*a44fb572SCharlie Jenkins 
191*a44fb572SCharlie Jenkins 	if (upper_immediate & 0xfffff000) {
192*a44fb572SCharlie Jenkins 		/* replace upper 20 bits of lui with upper immediate */
193*a44fb572SCharlie Jenkins 		lui_insn &= 0x00000fff;
194*a44fb572SCharlie Jenkins 		lui_insn |= upper_immediate & 0xfffff000;
195*a44fb572SCharlie Jenkins 	} else {
196*a44fb572SCharlie Jenkins 		/* replace lui with nop if immediate is small enough to fit in addi */
197*a44fb572SCharlie Jenkins 		lui_insn = RISCV_INSN_NOP4;
198*a44fb572SCharlie Jenkins 		/*
199*a44fb572SCharlie Jenkins 		 * lui is being skipped, so do a load instead of an add. A load
200*a44fb572SCharlie Jenkins 		 * is performed by adding with the x0 register. Setting rs to
201*a44fb572SCharlie Jenkins 		 * zero with the following mask will accomplish this goal.
202*a44fb572SCharlie Jenkins 		 */
203*a44fb572SCharlie Jenkins 		addi_insn_mask &= 0x07fff;
204*a44fb572SCharlie Jenkins 	}
205*a44fb572SCharlie Jenkins 
206*a44fb572SCharlie Jenkins 	if (lower_immediate & 0x00000fff) {
207*a44fb572SCharlie Jenkins 		/* replace upper 12 bits of addi with lower 12 bits of val */
208*a44fb572SCharlie Jenkins 		addi_insn &= addi_insn_mask;
209*a44fb572SCharlie Jenkins 		addi_insn |= (lower_immediate & 0x00000fff) << 20;
210*a44fb572SCharlie Jenkins 	} else {
211*a44fb572SCharlie Jenkins 		/* replace addi with nop if lower_immediate is empty */
212*a44fb572SCharlie Jenkins 		addi_insn = RISCV_INSN_NOP4;
213*a44fb572SCharlie Jenkins 	}
214*a44fb572SCharlie Jenkins 
215*a44fb572SCharlie Jenkins 	addi_res = cpu_to_le32(addi_insn);
216*a44fb572SCharlie Jenkins 	lui_res = cpu_to_le32(lui_insn);
217*a44fb572SCharlie Jenkins 	mutex_lock(&text_mutex);
218*a44fb572SCharlie Jenkins 	patch_insn_write(addi_parcel, &addi_res, sizeof(addi_res));
219*a44fb572SCharlie Jenkins 	patch_insn_write(lui_parcel, &lui_res, sizeof(lui_res));
220*a44fb572SCharlie Jenkins 	mutex_unlock(&text_mutex);
221*a44fb572SCharlie Jenkins }
222*a44fb572SCharlie Jenkins 
223*a44fb572SCharlie Jenkins static inline void __runtime_fixup_ptr(void *where, unsigned long val)
224*a44fb572SCharlie Jenkins {
225*a44fb572SCharlie Jenkins #ifdef CONFIG_32BIT
226*a44fb572SCharlie Jenkins 		__runtime_fixup_32(where, where + 4, val);
227*a44fb572SCharlie Jenkins 		__runtime_fixup_caches(where, 2);
228*a44fb572SCharlie Jenkins #else
229*a44fb572SCharlie Jenkins 		__runtime_fixup_32(where, where + 8, val);
230*a44fb572SCharlie Jenkins 		__runtime_fixup_32(where + 4, where + 12, val >> 32);
231*a44fb572SCharlie Jenkins 		__runtime_fixup_caches(where, 4);
232*a44fb572SCharlie Jenkins #endif
233*a44fb572SCharlie Jenkins }
234*a44fb572SCharlie Jenkins 
235*a44fb572SCharlie Jenkins /*
236*a44fb572SCharlie Jenkins  * Replace the least significant 5 bits of the srli/srliw immediate that is
237*a44fb572SCharlie Jenkins  * located at bits 20-24
238*a44fb572SCharlie Jenkins  */
239*a44fb572SCharlie Jenkins static inline void __runtime_fixup_shift(void *where, unsigned long val)
240*a44fb572SCharlie Jenkins {
241*a44fb572SCharlie Jenkins 	__le16 *parcel = where;
242*a44fb572SCharlie Jenkins 	__le32 res;
243*a44fb572SCharlie Jenkins 	u32 insn;
244*a44fb572SCharlie Jenkins 
245*a44fb572SCharlie Jenkins 	insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16;
246*a44fb572SCharlie Jenkins 
247*a44fb572SCharlie Jenkins 	insn &= 0xfe0fffff;
248*a44fb572SCharlie Jenkins 	insn |= (val & 0b11111) << 20;
249*a44fb572SCharlie Jenkins 
250*a44fb572SCharlie Jenkins 	res = cpu_to_le32(insn);
251*a44fb572SCharlie Jenkins 	mutex_lock(&text_mutex);
252*a44fb572SCharlie Jenkins 	patch_text_nosync(where, &res, sizeof(insn));
253*a44fb572SCharlie Jenkins 	mutex_unlock(&text_mutex);
254*a44fb572SCharlie Jenkins }
255*a44fb572SCharlie Jenkins 
256*a44fb572SCharlie Jenkins static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
257*a44fb572SCharlie Jenkins 				       unsigned long val, s32 *start, s32 *end)
258*a44fb572SCharlie Jenkins {
259*a44fb572SCharlie Jenkins 	while (start < end) {
260*a44fb572SCharlie Jenkins 		fn(*start + (void *)start, val);
261*a44fb572SCharlie Jenkins 		start++;
262*a44fb572SCharlie Jenkins 	}
263*a44fb572SCharlie Jenkins }
264*a44fb572SCharlie Jenkins 
265*a44fb572SCharlie Jenkins #endif /* _ASM_RISCV_RUNTIME_CONST_H */
266