1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _ASM_RISCV_RUNTIME_CONST_H
3 #define _ASM_RISCV_RUNTIME_CONST_H
4
5 #ifdef MODULE
6 #error "Cannot use runtime-const infrastructure from modules"
7 #endif
8
9 #include <asm/asm.h>
10 #include <asm/alternative.h>
11 #include <asm/cacheflush.h>
12 #include <asm/insn-def.h>
13 #include <linux/memory.h>
14 #include <asm/text-patching.h>
15
16 #include <linux/uaccess.h>
17
18 #ifdef CONFIG_32BIT
19 #define runtime_const_ptr(sym) \
20 ({ \
21 typeof(sym) __ret; \
22 asm_inline(".option push\n\t" \
23 ".option norvc\n\t" \
24 "1:\t" \
25 "lui %[__ret],0x89abd\n\t" \
26 "addi %[__ret],%[__ret],-0x211\n\t" \
27 ".option pop\n\t" \
28 ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
29 ".long 1b - .\n\t" \
30 ".popsection" \
31 : [__ret] "=r" (__ret)); \
32 __ret; \
33 })
34 #else
35 /*
36 * Loading 64-bit constants into a register from immediates is a non-trivial
37 * task on riscv64. To get it somewhat performant, load 32 bits into two
38 * different registers and then combine the results.
39 *
40 * If the processor supports the Zbkb extension, we can combine the final
41 * "slli,slli,srli,add" into the single "pack" instruction. If the processor
42 * doesn't support Zbkb but does support the Zbb extension, we can
43 * combine the final "slli,srli,add" into one instruction "add.uw".
44 */
45 #define RISCV_RUNTIME_CONST_64_PREAMBLE \
46 ".option push\n\t" \
47 ".option norvc\n\t" \
48 "1:\t" \
49 "lui %[__ret],0x89abd\n\t" \
50 "lui %[__tmp],0x1234\n\t" \
51 "addiw %[__ret],%[__ret],-0x211\n\t" \
52 "addiw %[__tmp],%[__tmp],0x567\n\t" \
53
54 #define RISCV_RUNTIME_CONST_64_BASE \
55 "slli %[__tmp],%[__tmp],32\n\t" \
56 "slli %[__ret],%[__ret],32\n\t" \
57 "srli %[__ret],%[__ret],32\n\t" \
58 "add %[__ret],%[__ret],%[__tmp]\n\t" \
59
60 #define RISCV_RUNTIME_CONST_64_ZBA \
61 ".option push\n\t" \
62 ".option arch,+zba\n\t" \
63 ".option norvc\n\t" \
64 "slli %[__tmp],%[__tmp],32\n\t" \
65 "add.uw %[__ret],%[__ret],%[__tmp]\n\t" \
66 "nop\n\t" \
67 "nop\n\t" \
68 ".option pop\n\t" \
69
70 #define RISCV_RUNTIME_CONST_64_ZBKB \
71 ".option push\n\t" \
72 ".option arch,+zbkb\n\t" \
73 ".option norvc\n\t" \
74 "pack %[__ret],%[__ret],%[__tmp]\n\t" \
75 "nop\n\t" \
76 "nop\n\t" \
77 "nop\n\t" \
78 ".option pop\n\t" \
79
80 #define RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
81 ".option pop\n\t" \
82 ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
83 ".long 1b - .\n\t" \
84 ".popsection" \
85
86 #if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA) \
87 && defined(CONFIG_RISCV_ISA_ZBKB)
88 #define runtime_const_ptr(sym) \
89 ({ \
90 typeof(sym) __ret, __tmp; \
91 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
92 ALTERNATIVE_2( \
93 RISCV_RUNTIME_CONST_64_BASE, \
94 RISCV_RUNTIME_CONST_64_ZBA, \
95 0, RISCV_ISA_EXT_ZBA, 1, \
96 RISCV_RUNTIME_CONST_64_ZBKB, \
97 0, RISCV_ISA_EXT_ZBKB, 1 \
98 ) \
99 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
100 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
101 __ret; \
102 })
103 #elif defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA)
104 #define runtime_const_ptr(sym) \
105 ({ \
106 typeof(sym) __ret, __tmp; \
107 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
108 ALTERNATIVE( \
109 RISCV_RUNTIME_CONST_64_BASE, \
110 RISCV_RUNTIME_CONST_64_ZBA, \
111 0, RISCV_ISA_EXT_ZBA, 1 \
112 ) \
113 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
114 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
115 __ret; \
116 })
117 #elif defined(CONFIG_RISCV_ISA_ZBKB)
118 #define runtime_const_ptr(sym) \
119 ({ \
120 typeof(sym) __ret, __tmp; \
121 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
122 ALTERNATIVE( \
123 RISCV_RUNTIME_CONST_64_BASE, \
124 RISCV_RUNTIME_CONST_64_ZBKB, \
125 0, RISCV_ISA_EXT_ZBKB, 1 \
126 ) \
127 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
128 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
129 __ret; \
130 })
131 #else
132 #define runtime_const_ptr(sym) \
133 ({ \
134 typeof(sym) __ret, __tmp; \
135 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
136 RISCV_RUNTIME_CONST_64_BASE \
137 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
138 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
139 __ret; \
140 })
141 #endif
142 #endif
143
144 #define runtime_const_shift_right_32(val, sym) \
145 ({ \
146 u32 __ret; \
147 asm_inline(".option push\n\t" \
148 ".option norvc\n\t" \
149 "1:\t" \
150 SRLI " %[__ret],%[__val],12\n\t" \
151 ".option pop\n\t" \
152 ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
153 ".long 1b - .\n\t" \
154 ".popsection" \
155 : [__ret] "=r" (__ret) \
156 : [__val] "r" (val)); \
157 __ret; \
158 })
159
160 #define runtime_const_init(type, sym) do { \
161 extern s32 __start_runtime_##type##_##sym[]; \
162 extern s32 __stop_runtime_##type##_##sym[]; \
163 \
164 runtime_const_fixup(__runtime_fixup_##type, \
165 (unsigned long)(sym), \
166 __start_runtime_##type##_##sym, \
167 __stop_runtime_##type##_##sym); \
168 } while (0)
169
__runtime_fixup_caches(void * where,unsigned int insns)170 static inline void __runtime_fixup_caches(void *where, unsigned int insns)
171 {
172 /* On riscv there are currently only cache-wide flushes so va is ignored. */
173 __always_unused uintptr_t va = (uintptr_t)where;
174
175 flush_icache_range(va, va + 4 * insns);
176 }
177
178 /*
179 * The 32-bit immediate is stored in a lui+addi pairing.
180 * lui holds the upper 20 bits of the immediate in the first 20 bits of the instruction.
181 * addi holds the lower 12 bits of the immediate in the first 12 bits of the instruction.
182 */
__runtime_fixup_32(__le16 * lui_parcel,__le16 * addi_parcel,unsigned int val)183 static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, unsigned int val)
184 {
185 unsigned int lower_immediate, upper_immediate;
186 u32 lui_insn, addi_insn, addi_insn_mask;
187 __le32 lui_res, addi_res;
188
189 /* Mask out upper 12 bit of addi */
190 addi_insn_mask = 0x000fffff;
191
192 lui_insn = (u32)le16_to_cpu(lui_parcel[0]) | (u32)le16_to_cpu(lui_parcel[1]) << 16;
193 addi_insn = (u32)le16_to_cpu(addi_parcel[0]) | (u32)le16_to_cpu(addi_parcel[1]) << 16;
194
195 lower_immediate = sign_extend32(val, 11);
196 upper_immediate = (val - lower_immediate);
197
198 if (upper_immediate & 0xfffff000) {
199 /* replace upper 20 bits of lui with upper immediate */
200 lui_insn &= 0x00000fff;
201 lui_insn |= upper_immediate & 0xfffff000;
202 } else {
203 /* replace lui with nop if immediate is small enough to fit in addi */
204 lui_insn = RISCV_INSN_NOP4;
205 /*
206 * lui is being skipped, so do a load instead of an add. A load
207 * is performed by adding with the x0 register. Setting rs to
208 * zero with the following mask will accomplish this goal.
209 */
210 addi_insn_mask &= 0x07fff;
211 }
212
213 if (lower_immediate & 0x00000fff || lui_insn == RISCV_INSN_NOP4) {
214 /* replace upper 12 bits of addi with lower 12 bits of val */
215 addi_insn &= addi_insn_mask;
216 addi_insn |= (lower_immediate & 0x00000fff) << 20;
217 } else {
218 /* replace addi with nop if lower_immediate is empty */
219 addi_insn = RISCV_INSN_NOP4;
220 }
221
222 addi_res = cpu_to_le32(addi_insn);
223 lui_res = cpu_to_le32(lui_insn);
224 mutex_lock(&text_mutex);
225 patch_insn_write(addi_parcel, &addi_res, sizeof(addi_res));
226 patch_insn_write(lui_parcel, &lui_res, sizeof(lui_res));
227 mutex_unlock(&text_mutex);
228 }
229
__runtime_fixup_ptr(void * where,unsigned long val)230 static inline void __runtime_fixup_ptr(void *where, unsigned long val)
231 {
232 #ifdef CONFIG_32BIT
233 __runtime_fixup_32(where, where + 4, val);
234 __runtime_fixup_caches(where, 2);
235 #else
236 __runtime_fixup_32(where, where + 8, val);
237 __runtime_fixup_32(where + 4, where + 12, val >> 32);
238 __runtime_fixup_caches(where, 4);
239 #endif
240 }
241
242 /*
243 * Replace the least significant 5 bits of the srli/srliw immediate that is
244 * located at bits 20-24
245 */
__runtime_fixup_shift(void * where,unsigned long val)246 static inline void __runtime_fixup_shift(void *where, unsigned long val)
247 {
248 __le16 *parcel = where;
249 __le32 res;
250 u32 insn;
251
252 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16;
253
254 insn &= 0xfe0fffff;
255 insn |= (val & 0b11111) << 20;
256
257 res = cpu_to_le32(insn);
258 mutex_lock(&text_mutex);
259 patch_text_nosync(where, &res, sizeof(insn));
260 mutex_unlock(&text_mutex);
261 }
262
runtime_const_fixup(void (* fn)(void *,unsigned long),unsigned long val,s32 * start,s32 * end)263 static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
264 unsigned long val, s32 *start, s32 *end)
265 {
266 while (start < end) {
267 fn(*start + (void *)start, val);
268 start++;
269 }
270 }
271
272 #endif /* _ASM_RISCV_RUNTIME_CONST_H */
273