xref: /linux/arch/riscv/include/asm/runtime-const.h (revision 4a1d8ababde685a77fd4fd61e58f973cbdf29f8c)
1a44fb572SCharlie Jenkins /* SPDX-License-Identifier: GPL-2.0 */
2a44fb572SCharlie Jenkins #ifndef _ASM_RISCV_RUNTIME_CONST_H
3a44fb572SCharlie Jenkins #define _ASM_RISCV_RUNTIME_CONST_H
4a44fb572SCharlie Jenkins 
5a44fb572SCharlie Jenkins #include <asm/asm.h>
6a44fb572SCharlie Jenkins #include <asm/alternative.h>
7a44fb572SCharlie Jenkins #include <asm/cacheflush.h>
8a44fb572SCharlie Jenkins #include <asm/insn-def.h>
9a44fb572SCharlie Jenkins #include <linux/memory.h>
10a44fb572SCharlie Jenkins #include <asm/text-patching.h>
11a44fb572SCharlie Jenkins 
12a44fb572SCharlie Jenkins #include <linux/uaccess.h>
13a44fb572SCharlie Jenkins 
14a44fb572SCharlie Jenkins #ifdef CONFIG_32BIT
15a44fb572SCharlie Jenkins #define runtime_const_ptr(sym)					\
16a44fb572SCharlie Jenkins ({								\
17a44fb572SCharlie Jenkins 	typeof(sym) __ret;					\
18a44fb572SCharlie Jenkins 	asm_inline(".option push\n\t"				\
19a44fb572SCharlie Jenkins 		".option norvc\n\t"				\
20a44fb572SCharlie Jenkins 		"1:\t"						\
21a44fb572SCharlie Jenkins 		"lui	%[__ret],0x89abd\n\t"			\
22a44fb572SCharlie Jenkins 		"addi	%[__ret],%[__ret],-0x211\n\t"		\
23a44fb572SCharlie Jenkins 		".option pop\n\t"				\
24a44fb572SCharlie Jenkins 		".pushsection runtime_ptr_" #sym ",\"a\"\n\t"	\
25a44fb572SCharlie Jenkins 		".long 1b - .\n\t"				\
26a44fb572SCharlie Jenkins 		".popsection"					\
27a44fb572SCharlie Jenkins 		: [__ret] "=r" (__ret));			\
28a44fb572SCharlie Jenkins 	__ret;							\
29a44fb572SCharlie Jenkins })
30a44fb572SCharlie Jenkins #else
31a44fb572SCharlie Jenkins /*
32a44fb572SCharlie Jenkins  * Loading 64-bit constants into a register from immediates is a non-trivial
33a44fb572SCharlie Jenkins  * task on riscv64. To get it somewhat performant, load 32 bits into two
34a44fb572SCharlie Jenkins  * different registers and then combine the results.
35a44fb572SCharlie Jenkins  *
36a44fb572SCharlie Jenkins  * If the processor supports the Zbkb extension, we can combine the final
37a44fb572SCharlie Jenkins  * "slli,slli,srli,add" into the single "pack" instruction. If the processor
38a44fb572SCharlie Jenkins  * doesn't support Zbkb but does support the Zbb extension, we can
39a44fb572SCharlie Jenkins  * combine the final "slli,srli,add" into one instruction "add.uw".
40a44fb572SCharlie Jenkins  */
41a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_PREAMBLE				\
42a44fb572SCharlie Jenkins 	".option push\n\t"					\
43a44fb572SCharlie Jenkins 	".option norvc\n\t"					\
44a44fb572SCharlie Jenkins 	"1:\t"							\
45a44fb572SCharlie Jenkins 	"lui	%[__ret],0x89abd\n\t"				\
46a44fb572SCharlie Jenkins 	"lui	%[__tmp],0x1234\n\t"				\
47a44fb572SCharlie Jenkins 	"addiw	%[__ret],%[__ret],-0x211\n\t"			\
48a44fb572SCharlie Jenkins 	"addiw	%[__tmp],%[__tmp],0x567\n\t"			\
49a44fb572SCharlie Jenkins 
50a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_BASE				\
51a44fb572SCharlie Jenkins 	"slli	%[__tmp],%[__tmp],32\n\t"			\
52a44fb572SCharlie Jenkins 	"slli	%[__ret],%[__ret],32\n\t"			\
53a44fb572SCharlie Jenkins 	"srli	%[__ret],%[__ret],32\n\t"			\
54a44fb572SCharlie Jenkins 	"add	%[__ret],%[__ret],%[__tmp]\n\t"			\
55a44fb572SCharlie Jenkins 
56a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_ZBA				\
57a44fb572SCharlie Jenkins 	".option push\n\t"					\
58a44fb572SCharlie Jenkins 	".option arch,+zba\n\t"					\
59*6ee92818SCharlie Jenkins 	".option norvc\n\t"					\
60a44fb572SCharlie Jenkins 	"slli	%[__tmp],%[__tmp],32\n\t"			\
61a44fb572SCharlie Jenkins 	"add.uw %[__ret],%[__ret],%[__tmp]\n\t"			\
62a44fb572SCharlie Jenkins 	"nop\n\t"						\
63a44fb572SCharlie Jenkins 	"nop\n\t"						\
64a44fb572SCharlie Jenkins 	".option pop\n\t"					\
65a44fb572SCharlie Jenkins 
66a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_ZBKB				\
67a44fb572SCharlie Jenkins 	".option push\n\t"					\
68a44fb572SCharlie Jenkins 	".option arch,+zbkb\n\t"				\
69*6ee92818SCharlie Jenkins 	".option norvc\n\t"					\
70a44fb572SCharlie Jenkins 	"pack	%[__ret],%[__ret],%[__tmp]\n\t"			\
71a44fb572SCharlie Jenkins 	"nop\n\t"						\
72a44fb572SCharlie Jenkins 	"nop\n\t"						\
73a44fb572SCharlie Jenkins 	"nop\n\t"						\
74a44fb572SCharlie Jenkins 	".option pop\n\t"					\
75a44fb572SCharlie Jenkins 
76a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_POSTAMBLE(sym)			\
77a44fb572SCharlie Jenkins 	".option pop\n\t"					\
78a44fb572SCharlie Jenkins 	".pushsection runtime_ptr_" #sym ",\"a\"\n\t"		\
79a44fb572SCharlie Jenkins 	".long 1b - .\n\t"					\
80a44fb572SCharlie Jenkins 	".popsection"						\
81a44fb572SCharlie Jenkins 
828a2f20acSAlexandre Ghiti #if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA)	\
838a2f20acSAlexandre Ghiti 	&& defined(CONFIG_RISCV_ISA_ZBKB)
84a44fb572SCharlie Jenkins #define runtime_const_ptr(sym)						\
85a44fb572SCharlie Jenkins ({									\
86a44fb572SCharlie Jenkins 	typeof(sym) __ret, __tmp;					\
87a44fb572SCharlie Jenkins 	asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE			\
88a44fb572SCharlie Jenkins 		ALTERNATIVE_2(						\
89a44fb572SCharlie Jenkins 			RISCV_RUNTIME_CONST_64_BASE,			\
90a44fb572SCharlie Jenkins 			RISCV_RUNTIME_CONST_64_ZBA,			\
91a44fb572SCharlie Jenkins 			0, RISCV_ISA_EXT_ZBA, 1,			\
92a44fb572SCharlie Jenkins 			RISCV_RUNTIME_CONST_64_ZBKB,			\
93a44fb572SCharlie Jenkins 			0, RISCV_ISA_EXT_ZBKB, 1			\
94a44fb572SCharlie Jenkins 		)							\
95a44fb572SCharlie Jenkins 		RISCV_RUNTIME_CONST_64_POSTAMBLE(sym)			\
96a44fb572SCharlie Jenkins 		: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp));		\
97a44fb572SCharlie Jenkins 	__ret;								\
98a44fb572SCharlie Jenkins })
998a2f20acSAlexandre Ghiti #elif defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA)
100a44fb572SCharlie Jenkins #define runtime_const_ptr(sym)						\
101a44fb572SCharlie Jenkins ({									\
102a44fb572SCharlie Jenkins 	typeof(sym) __ret, __tmp;					\
103a44fb572SCharlie Jenkins 	asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE			\
104a44fb572SCharlie Jenkins 		ALTERNATIVE(						\
105a44fb572SCharlie Jenkins 			RISCV_RUNTIME_CONST_64_BASE,			\
106a44fb572SCharlie Jenkins 			RISCV_RUNTIME_CONST_64_ZBA,			\
107a44fb572SCharlie Jenkins 			0, RISCV_ISA_EXT_ZBA, 1				\
108a44fb572SCharlie Jenkins 		)							\
109a44fb572SCharlie Jenkins 		RISCV_RUNTIME_CONST_64_POSTAMBLE(sym)			\
110a44fb572SCharlie Jenkins 		: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp));		\
111a44fb572SCharlie Jenkins 	__ret;								\
112a44fb572SCharlie Jenkins })
113a44fb572SCharlie Jenkins #elif defined(CONFIG_RISCV_ISA_ZBKB)
114a44fb572SCharlie Jenkins #define runtime_const_ptr(sym)						\
115a44fb572SCharlie Jenkins ({									\
116a44fb572SCharlie Jenkins 	typeof(sym) __ret, __tmp;					\
117a44fb572SCharlie Jenkins 	asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE			\
118a44fb572SCharlie Jenkins 		ALTERNATIVE(						\
119a44fb572SCharlie Jenkins 			RISCV_RUNTIME_CONST_64_BASE,			\
120a44fb572SCharlie Jenkins 			RISCV_RUNTIME_CONST_64_ZBKB,			\
121a44fb572SCharlie Jenkins 			0, RISCV_ISA_EXT_ZBKB, 1			\
122a44fb572SCharlie Jenkins 		)							\
123a44fb572SCharlie Jenkins 		RISCV_RUNTIME_CONST_64_POSTAMBLE(sym)			\
124a44fb572SCharlie Jenkins 		: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp));		\
125a44fb572SCharlie Jenkins 	__ret;								\
126a44fb572SCharlie Jenkins })
127a44fb572SCharlie Jenkins #else
128a44fb572SCharlie Jenkins #define runtime_const_ptr(sym)						\
129a44fb572SCharlie Jenkins ({									\
130a44fb572SCharlie Jenkins 	typeof(sym) __ret, __tmp;					\
131a44fb572SCharlie Jenkins 	asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE			\
132a44fb572SCharlie Jenkins 		RISCV_RUNTIME_CONST_64_BASE				\
133a44fb572SCharlie Jenkins 		RISCV_RUNTIME_CONST_64_POSTAMBLE(sym)			\
134a44fb572SCharlie Jenkins 		: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp));		\
135a44fb572SCharlie Jenkins 	__ret;								\
136a44fb572SCharlie Jenkins })
137a44fb572SCharlie Jenkins #endif
138a44fb572SCharlie Jenkins #endif
139a44fb572SCharlie Jenkins 
140a44fb572SCharlie Jenkins #define runtime_const_shift_right_32(val, sym)			\
141a44fb572SCharlie Jenkins ({								\
142a44fb572SCharlie Jenkins 	u32 __ret;						\
143a44fb572SCharlie Jenkins 	asm_inline(".option push\n\t"				\
144a44fb572SCharlie Jenkins 		".option norvc\n\t"				\
145a44fb572SCharlie Jenkins 		"1:\t"						\
146a44fb572SCharlie Jenkins 		SRLI " %[__ret],%[__val],12\n\t"		\
147a44fb572SCharlie Jenkins 		".option pop\n\t"				\
148a44fb572SCharlie Jenkins 		".pushsection runtime_shift_" #sym ",\"a\"\n\t"	\
149a44fb572SCharlie Jenkins 		".long 1b - .\n\t"				\
150a44fb572SCharlie Jenkins 		".popsection"					\
151a44fb572SCharlie Jenkins 		: [__ret] "=r" (__ret)				\
152a44fb572SCharlie Jenkins 		: [__val] "r" (val));				\
153a44fb572SCharlie Jenkins 	__ret;							\
154a44fb572SCharlie Jenkins })
155a44fb572SCharlie Jenkins 
156a44fb572SCharlie Jenkins #define runtime_const_init(type, sym) do {			\
157a44fb572SCharlie Jenkins 	extern s32 __start_runtime_##type##_##sym[];		\
158a44fb572SCharlie Jenkins 	extern s32 __stop_runtime_##type##_##sym[];		\
159a44fb572SCharlie Jenkins 								\
160a44fb572SCharlie Jenkins 	runtime_const_fixup(__runtime_fixup_##type,		\
161a44fb572SCharlie Jenkins 			    (unsigned long)(sym),		\
162a44fb572SCharlie Jenkins 			    __start_runtime_##type##_##sym,	\
163a44fb572SCharlie Jenkins 			    __stop_runtime_##type##_##sym);	\
164a44fb572SCharlie Jenkins } while (0)
165a44fb572SCharlie Jenkins 
__runtime_fixup_caches(void * where,unsigned int insns)166a44fb572SCharlie Jenkins static inline void __runtime_fixup_caches(void *where, unsigned int insns)
167a44fb572SCharlie Jenkins {
168a44fb572SCharlie Jenkins 	/* On riscv there are currently only cache-wide flushes so va is ignored. */
169a44fb572SCharlie Jenkins 	__always_unused uintptr_t va = (uintptr_t)where;
170a44fb572SCharlie Jenkins 
171a44fb572SCharlie Jenkins 	flush_icache_range(va, va + 4 * insns);
172a44fb572SCharlie Jenkins }
173a44fb572SCharlie Jenkins 
174a44fb572SCharlie Jenkins /*
175a44fb572SCharlie Jenkins  * The 32-bit immediate is stored in a lui+addi pairing.
176a44fb572SCharlie Jenkins  * lui holds the upper 20 bits of the immediate in the first 20 bits of the instruction.
177a44fb572SCharlie Jenkins  * addi holds the lower 12 bits of the immediate in the first 12 bits of the instruction.
178a44fb572SCharlie Jenkins  */
__runtime_fixup_32(__le16 * lui_parcel,__le16 * addi_parcel,unsigned int val)179a44fb572SCharlie Jenkins static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, unsigned int val)
180a44fb572SCharlie Jenkins {
181a44fb572SCharlie Jenkins 	unsigned int lower_immediate, upper_immediate;
182a44fb572SCharlie Jenkins 	u32 lui_insn, addi_insn, addi_insn_mask;
183a44fb572SCharlie Jenkins 	__le32 lui_res, addi_res;
184a44fb572SCharlie Jenkins 
185a44fb572SCharlie Jenkins 	/* Mask out upper 12 bit of addi */
186a44fb572SCharlie Jenkins 	addi_insn_mask = 0x000fffff;
187a44fb572SCharlie Jenkins 
188a44fb572SCharlie Jenkins 	lui_insn = (u32)le16_to_cpu(lui_parcel[0]) | (u32)le16_to_cpu(lui_parcel[1]) << 16;
189a44fb572SCharlie Jenkins 	addi_insn = (u32)le16_to_cpu(addi_parcel[0]) | (u32)le16_to_cpu(addi_parcel[1]) << 16;
190a44fb572SCharlie Jenkins 
191a44fb572SCharlie Jenkins 	lower_immediate = sign_extend32(val, 11);
192a44fb572SCharlie Jenkins 	upper_immediate = (val - lower_immediate);
193a44fb572SCharlie Jenkins 
194a44fb572SCharlie Jenkins 	if (upper_immediate & 0xfffff000) {
195a44fb572SCharlie Jenkins 		/* replace upper 20 bits of lui with upper immediate */
196a44fb572SCharlie Jenkins 		lui_insn &= 0x00000fff;
197a44fb572SCharlie Jenkins 		lui_insn |= upper_immediate & 0xfffff000;
198a44fb572SCharlie Jenkins 	} else {
199a44fb572SCharlie Jenkins 		/* replace lui with nop if immediate is small enough to fit in addi */
200a44fb572SCharlie Jenkins 		lui_insn = RISCV_INSN_NOP4;
201a44fb572SCharlie Jenkins 		/*
202a44fb572SCharlie Jenkins 		 * lui is being skipped, so do a load instead of an add. A load
203a44fb572SCharlie Jenkins 		 * is performed by adding with the x0 register. Setting rs to
204a44fb572SCharlie Jenkins 		 * zero with the following mask will accomplish this goal.
205a44fb572SCharlie Jenkins 		 */
206a44fb572SCharlie Jenkins 		addi_insn_mask &= 0x07fff;
207a44fb572SCharlie Jenkins 	}
208a44fb572SCharlie Jenkins 
209a44fb572SCharlie Jenkins 	if (lower_immediate & 0x00000fff) {
210a44fb572SCharlie Jenkins 		/* replace upper 12 bits of addi with lower 12 bits of val */
211a44fb572SCharlie Jenkins 		addi_insn &= addi_insn_mask;
212a44fb572SCharlie Jenkins 		addi_insn |= (lower_immediate & 0x00000fff) << 20;
213a44fb572SCharlie Jenkins 	} else {
214a44fb572SCharlie Jenkins 		/* replace addi with nop if lower_immediate is empty */
215a44fb572SCharlie Jenkins 		addi_insn = RISCV_INSN_NOP4;
216a44fb572SCharlie Jenkins 	}
217a44fb572SCharlie Jenkins 
218a44fb572SCharlie Jenkins 	addi_res = cpu_to_le32(addi_insn);
219a44fb572SCharlie Jenkins 	lui_res = cpu_to_le32(lui_insn);
220a44fb572SCharlie Jenkins 	mutex_lock(&text_mutex);
221a44fb572SCharlie Jenkins 	patch_insn_write(addi_parcel, &addi_res, sizeof(addi_res));
222a44fb572SCharlie Jenkins 	patch_insn_write(lui_parcel, &lui_res, sizeof(lui_res));
223a44fb572SCharlie Jenkins 	mutex_unlock(&text_mutex);
224a44fb572SCharlie Jenkins }
225a44fb572SCharlie Jenkins 
__runtime_fixup_ptr(void * where,unsigned long val)226a44fb572SCharlie Jenkins static inline void __runtime_fixup_ptr(void *where, unsigned long val)
227a44fb572SCharlie Jenkins {
228a44fb572SCharlie Jenkins #ifdef CONFIG_32BIT
229a44fb572SCharlie Jenkins 		__runtime_fixup_32(where, where + 4, val);
230a44fb572SCharlie Jenkins 		__runtime_fixup_caches(where, 2);
231a44fb572SCharlie Jenkins #else
232a44fb572SCharlie Jenkins 		__runtime_fixup_32(where, where + 8, val);
233a44fb572SCharlie Jenkins 		__runtime_fixup_32(where + 4, where + 12, val >> 32);
234a44fb572SCharlie Jenkins 		__runtime_fixup_caches(where, 4);
235a44fb572SCharlie Jenkins #endif
236a44fb572SCharlie Jenkins }
237a44fb572SCharlie Jenkins 
238a44fb572SCharlie Jenkins /*
239a44fb572SCharlie Jenkins  * Replace the least significant 5 bits of the srli/srliw immediate that is
240a44fb572SCharlie Jenkins  * located at bits 20-24
241a44fb572SCharlie Jenkins  */
__runtime_fixup_shift(void * where,unsigned long val)242a44fb572SCharlie Jenkins static inline void __runtime_fixup_shift(void *where, unsigned long val)
243a44fb572SCharlie Jenkins {
244a44fb572SCharlie Jenkins 	__le16 *parcel = where;
245a44fb572SCharlie Jenkins 	__le32 res;
246a44fb572SCharlie Jenkins 	u32 insn;
247a44fb572SCharlie Jenkins 
248a44fb572SCharlie Jenkins 	insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16;
249a44fb572SCharlie Jenkins 
250a44fb572SCharlie Jenkins 	insn &= 0xfe0fffff;
251a44fb572SCharlie Jenkins 	insn |= (val & 0b11111) << 20;
252a44fb572SCharlie Jenkins 
253a44fb572SCharlie Jenkins 	res = cpu_to_le32(insn);
254a44fb572SCharlie Jenkins 	mutex_lock(&text_mutex);
255a44fb572SCharlie Jenkins 	patch_text_nosync(where, &res, sizeof(insn));
256a44fb572SCharlie Jenkins 	mutex_unlock(&text_mutex);
257a44fb572SCharlie Jenkins }
258a44fb572SCharlie Jenkins 
runtime_const_fixup(void (* fn)(void *,unsigned long),unsigned long val,s32 * start,s32 * end)259a44fb572SCharlie Jenkins static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
260a44fb572SCharlie Jenkins 				       unsigned long val, s32 *start, s32 *end)
261a44fb572SCharlie Jenkins {
262a44fb572SCharlie Jenkins 	while (start < end) {
263a44fb572SCharlie Jenkins 		fn(*start + (void *)start, val);
264a44fb572SCharlie Jenkins 		start++;
265a44fb572SCharlie Jenkins 	}
266a44fb572SCharlie Jenkins }
267a44fb572SCharlie Jenkins 
268a44fb572SCharlie Jenkins #endif /* _ASM_RISCV_RUNTIME_CONST_H */
269