xref: /linux/arch/riscv/include/asm/runtime-const.h (revision 4a1d8ababde685a77fd4fd61e58f973cbdf29f8c)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _ASM_RISCV_RUNTIME_CONST_H
3 #define _ASM_RISCV_RUNTIME_CONST_H
4 
5 #include <asm/asm.h>
6 #include <asm/alternative.h>
7 #include <asm/cacheflush.h>
8 #include <asm/insn-def.h>
9 #include <linux/memory.h>
10 #include <asm/text-patching.h>
11 
12 #include <linux/uaccess.h>
13 
14 #ifdef CONFIG_32BIT
15 #define runtime_const_ptr(sym)					\
16 ({								\
17 	typeof(sym) __ret;					\
18 	asm_inline(".option push\n\t"				\
19 		".option norvc\n\t"				\
20 		"1:\t"						\
21 		"lui	%[__ret],0x89abd\n\t"			\
22 		"addi	%[__ret],%[__ret],-0x211\n\t"		\
23 		".option pop\n\t"				\
24 		".pushsection runtime_ptr_" #sym ",\"a\"\n\t"	\
25 		".long 1b - .\n\t"				\
26 		".popsection"					\
27 		: [__ret] "=r" (__ret));			\
28 	__ret;							\
29 })
30 #else
31 /*
32  * Loading 64-bit constants into a register from immediates is a non-trivial
33  * task on riscv64. To get it somewhat performant, load 32 bits into two
34  * different registers and then combine the results.
35  *
36  * If the processor supports the Zbkb extension, we can combine the final
37  * "slli,slli,srli,add" into the single "pack" instruction. If the processor
38  * doesn't support Zbkb but does support the Zbb extension, we can
39  * combine the final "slli,srli,add" into one instruction "add.uw".
40  */
41 #define RISCV_RUNTIME_CONST_64_PREAMBLE				\
42 	".option push\n\t"					\
43 	".option norvc\n\t"					\
44 	"1:\t"							\
45 	"lui	%[__ret],0x89abd\n\t"				\
46 	"lui	%[__tmp],0x1234\n\t"				\
47 	"addiw	%[__ret],%[__ret],-0x211\n\t"			\
48 	"addiw	%[__tmp],%[__tmp],0x567\n\t"			\
49 
50 #define RISCV_RUNTIME_CONST_64_BASE				\
51 	"slli	%[__tmp],%[__tmp],32\n\t"			\
52 	"slli	%[__ret],%[__ret],32\n\t"			\
53 	"srli	%[__ret],%[__ret],32\n\t"			\
54 	"add	%[__ret],%[__ret],%[__tmp]\n\t"			\
55 
56 #define RISCV_RUNTIME_CONST_64_ZBA				\
57 	".option push\n\t"					\
58 	".option arch,+zba\n\t"					\
59 	".option norvc\n\t"					\
60 	"slli	%[__tmp],%[__tmp],32\n\t"			\
61 	"add.uw %[__ret],%[__ret],%[__tmp]\n\t"			\
62 	"nop\n\t"						\
63 	"nop\n\t"						\
64 	".option pop\n\t"					\
65 
66 #define RISCV_RUNTIME_CONST_64_ZBKB				\
67 	".option push\n\t"					\
68 	".option arch,+zbkb\n\t"				\
69 	".option norvc\n\t"					\
70 	"pack	%[__ret],%[__ret],%[__tmp]\n\t"			\
71 	"nop\n\t"						\
72 	"nop\n\t"						\
73 	"nop\n\t"						\
74 	".option pop\n\t"					\
75 
76 #define RISCV_RUNTIME_CONST_64_POSTAMBLE(sym)			\
77 	".option pop\n\t"					\
78 	".pushsection runtime_ptr_" #sym ",\"a\"\n\t"		\
79 	".long 1b - .\n\t"					\
80 	".popsection"						\
81 
82 #if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA)	\
83 	&& defined(CONFIG_RISCV_ISA_ZBKB)
84 #define runtime_const_ptr(sym)						\
85 ({									\
86 	typeof(sym) __ret, __tmp;					\
87 	asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE			\
88 		ALTERNATIVE_2(						\
89 			RISCV_RUNTIME_CONST_64_BASE,			\
90 			RISCV_RUNTIME_CONST_64_ZBA,			\
91 			0, RISCV_ISA_EXT_ZBA, 1,			\
92 			RISCV_RUNTIME_CONST_64_ZBKB,			\
93 			0, RISCV_ISA_EXT_ZBKB, 1			\
94 		)							\
95 		RISCV_RUNTIME_CONST_64_POSTAMBLE(sym)			\
96 		: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp));		\
97 	__ret;								\
98 })
99 #elif defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA)
100 #define runtime_const_ptr(sym)						\
101 ({									\
102 	typeof(sym) __ret, __tmp;					\
103 	asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE			\
104 		ALTERNATIVE(						\
105 			RISCV_RUNTIME_CONST_64_BASE,			\
106 			RISCV_RUNTIME_CONST_64_ZBA,			\
107 			0, RISCV_ISA_EXT_ZBA, 1				\
108 		)							\
109 		RISCV_RUNTIME_CONST_64_POSTAMBLE(sym)			\
110 		: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp));		\
111 	__ret;								\
112 })
113 #elif defined(CONFIG_RISCV_ISA_ZBKB)
114 #define runtime_const_ptr(sym)						\
115 ({									\
116 	typeof(sym) __ret, __tmp;					\
117 	asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE			\
118 		ALTERNATIVE(						\
119 			RISCV_RUNTIME_CONST_64_BASE,			\
120 			RISCV_RUNTIME_CONST_64_ZBKB,			\
121 			0, RISCV_ISA_EXT_ZBKB, 1			\
122 		)							\
123 		RISCV_RUNTIME_CONST_64_POSTAMBLE(sym)			\
124 		: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp));		\
125 	__ret;								\
126 })
127 #else
128 #define runtime_const_ptr(sym)						\
129 ({									\
130 	typeof(sym) __ret, __tmp;					\
131 	asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE			\
132 		RISCV_RUNTIME_CONST_64_BASE				\
133 		RISCV_RUNTIME_CONST_64_POSTAMBLE(sym)			\
134 		: [__ret] "=r" (__ret), [__tmp] "=r" (__tmp));		\
135 	__ret;								\
136 })
137 #endif
138 #endif
139 
140 #define runtime_const_shift_right_32(val, sym)			\
141 ({								\
142 	u32 __ret;						\
143 	asm_inline(".option push\n\t"				\
144 		".option norvc\n\t"				\
145 		"1:\t"						\
146 		SRLI " %[__ret],%[__val],12\n\t"		\
147 		".option pop\n\t"				\
148 		".pushsection runtime_shift_" #sym ",\"a\"\n\t"	\
149 		".long 1b - .\n\t"				\
150 		".popsection"					\
151 		: [__ret] "=r" (__ret)				\
152 		: [__val] "r" (val));				\
153 	__ret;							\
154 })
155 
156 #define runtime_const_init(type, sym) do {			\
157 	extern s32 __start_runtime_##type##_##sym[];		\
158 	extern s32 __stop_runtime_##type##_##sym[];		\
159 								\
160 	runtime_const_fixup(__runtime_fixup_##type,		\
161 			    (unsigned long)(sym),		\
162 			    __start_runtime_##type##_##sym,	\
163 			    __stop_runtime_##type##_##sym);	\
164 } while (0)
165 
__runtime_fixup_caches(void * where,unsigned int insns)166 static inline void __runtime_fixup_caches(void *where, unsigned int insns)
167 {
168 	/* On riscv there are currently only cache-wide flushes so va is ignored. */
169 	__always_unused uintptr_t va = (uintptr_t)where;
170 
171 	flush_icache_range(va, va + 4 * insns);
172 }
173 
174 /*
175  * The 32-bit immediate is stored in a lui+addi pairing.
176  * lui holds the upper 20 bits of the immediate in the first 20 bits of the instruction.
177  * addi holds the lower 12 bits of the immediate in the first 12 bits of the instruction.
178  */
__runtime_fixup_32(__le16 * lui_parcel,__le16 * addi_parcel,unsigned int val)179 static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, unsigned int val)
180 {
181 	unsigned int lower_immediate, upper_immediate;
182 	u32 lui_insn, addi_insn, addi_insn_mask;
183 	__le32 lui_res, addi_res;
184 
185 	/* Mask out upper 12 bit of addi */
186 	addi_insn_mask = 0x000fffff;
187 
188 	lui_insn = (u32)le16_to_cpu(lui_parcel[0]) | (u32)le16_to_cpu(lui_parcel[1]) << 16;
189 	addi_insn = (u32)le16_to_cpu(addi_parcel[0]) | (u32)le16_to_cpu(addi_parcel[1]) << 16;
190 
191 	lower_immediate = sign_extend32(val, 11);
192 	upper_immediate = (val - lower_immediate);
193 
194 	if (upper_immediate & 0xfffff000) {
195 		/* replace upper 20 bits of lui with upper immediate */
196 		lui_insn &= 0x00000fff;
197 		lui_insn |= upper_immediate & 0xfffff000;
198 	} else {
199 		/* replace lui with nop if immediate is small enough to fit in addi */
200 		lui_insn = RISCV_INSN_NOP4;
201 		/*
202 		 * lui is being skipped, so do a load instead of an add. A load
203 		 * is performed by adding with the x0 register. Setting rs to
204 		 * zero with the following mask will accomplish this goal.
205 		 */
206 		addi_insn_mask &= 0x07fff;
207 	}
208 
209 	if (lower_immediate & 0x00000fff) {
210 		/* replace upper 12 bits of addi with lower 12 bits of val */
211 		addi_insn &= addi_insn_mask;
212 		addi_insn |= (lower_immediate & 0x00000fff) << 20;
213 	} else {
214 		/* replace addi with nop if lower_immediate is empty */
215 		addi_insn = RISCV_INSN_NOP4;
216 	}
217 
218 	addi_res = cpu_to_le32(addi_insn);
219 	lui_res = cpu_to_le32(lui_insn);
220 	mutex_lock(&text_mutex);
221 	patch_insn_write(addi_parcel, &addi_res, sizeof(addi_res));
222 	patch_insn_write(lui_parcel, &lui_res, sizeof(lui_res));
223 	mutex_unlock(&text_mutex);
224 }
225 
__runtime_fixup_ptr(void * where,unsigned long val)226 static inline void __runtime_fixup_ptr(void *where, unsigned long val)
227 {
228 #ifdef CONFIG_32BIT
229 		__runtime_fixup_32(where, where + 4, val);
230 		__runtime_fixup_caches(where, 2);
231 #else
232 		__runtime_fixup_32(where, where + 8, val);
233 		__runtime_fixup_32(where + 4, where + 12, val >> 32);
234 		__runtime_fixup_caches(where, 4);
235 #endif
236 }
237 
238 /*
239  * Replace the least significant 5 bits of the srli/srliw immediate that is
240  * located at bits 20-24
241  */
__runtime_fixup_shift(void * where,unsigned long val)242 static inline void __runtime_fixup_shift(void *where, unsigned long val)
243 {
244 	__le16 *parcel = where;
245 	__le32 res;
246 	u32 insn;
247 
248 	insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16;
249 
250 	insn &= 0xfe0fffff;
251 	insn |= (val & 0b11111) << 20;
252 
253 	res = cpu_to_le32(insn);
254 	mutex_lock(&text_mutex);
255 	patch_text_nosync(where, &res, sizeof(insn));
256 	mutex_unlock(&text_mutex);
257 }
258 
runtime_const_fixup(void (* fn)(void *,unsigned long),unsigned long val,s32 * start,s32 * end)259 static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
260 				       unsigned long val, s32 *start, s32 *end)
261 {
262 	while (start < end) {
263 		fn(*start + (void *)start, val);
264 		start++;
265 	}
266 }
267 
268 #endif /* _ASM_RISCV_RUNTIME_CONST_H */
269