1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _ASM_RISCV_RUNTIME_CONST_H
3 #define _ASM_RISCV_RUNTIME_CONST_H
4
5 #include <asm/asm.h>
6 #include <asm/alternative.h>
7 #include <asm/cacheflush.h>
8 #include <asm/insn-def.h>
9 #include <linux/memory.h>
10 #include <asm/text-patching.h>
11
12 #include <linux/uaccess.h>
13
14 #ifdef CONFIG_32BIT
15 #define runtime_const_ptr(sym) \
16 ({ \
17 typeof(sym) __ret; \
18 asm_inline(".option push\n\t" \
19 ".option norvc\n\t" \
20 "1:\t" \
21 "lui %[__ret],0x89abd\n\t" \
22 "addi %[__ret],%[__ret],-0x211\n\t" \
23 ".option pop\n\t" \
24 ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
25 ".long 1b - .\n\t" \
26 ".popsection" \
27 : [__ret] "=r" (__ret)); \
28 __ret; \
29 })
30 #else
31 /*
32 * Loading 64-bit constants into a register from immediates is a non-trivial
33 * task on riscv64. To get it somewhat performant, load 32 bits into two
34 * different registers and then combine the results.
35 *
36 * If the processor supports the Zbkb extension, we can combine the final
37 * "slli,slli,srli,add" into the single "pack" instruction. If the processor
38 * doesn't support Zbkb but does support the Zbb extension, we can
39 * combine the final "slli,srli,add" into one instruction "add.uw".
40 */
41 #define RISCV_RUNTIME_CONST_64_PREAMBLE \
42 ".option push\n\t" \
43 ".option norvc\n\t" \
44 "1:\t" \
45 "lui %[__ret],0x89abd\n\t" \
46 "lui %[__tmp],0x1234\n\t" \
47 "addiw %[__ret],%[__ret],-0x211\n\t" \
48 "addiw %[__tmp],%[__tmp],0x567\n\t" \
49
50 #define RISCV_RUNTIME_CONST_64_BASE \
51 "slli %[__tmp],%[__tmp],32\n\t" \
52 "slli %[__ret],%[__ret],32\n\t" \
53 "srli %[__ret],%[__ret],32\n\t" \
54 "add %[__ret],%[__ret],%[__tmp]\n\t" \
55
56 #define RISCV_RUNTIME_CONST_64_ZBA \
57 ".option push\n\t" \
58 ".option arch,+zba\n\t" \
59 ".option norvc\n\t" \
60 "slli %[__tmp],%[__tmp],32\n\t" \
61 "add.uw %[__ret],%[__ret],%[__tmp]\n\t" \
62 "nop\n\t" \
63 "nop\n\t" \
64 ".option pop\n\t" \
65
66 #define RISCV_RUNTIME_CONST_64_ZBKB \
67 ".option push\n\t" \
68 ".option arch,+zbkb\n\t" \
69 ".option norvc\n\t" \
70 "pack %[__ret],%[__ret],%[__tmp]\n\t" \
71 "nop\n\t" \
72 "nop\n\t" \
73 "nop\n\t" \
74 ".option pop\n\t" \
75
76 #define RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
77 ".option pop\n\t" \
78 ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
79 ".long 1b - .\n\t" \
80 ".popsection" \
81
82 #if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA) \
83 && defined(CONFIG_RISCV_ISA_ZBKB)
84 #define runtime_const_ptr(sym) \
85 ({ \
86 typeof(sym) __ret, __tmp; \
87 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
88 ALTERNATIVE_2( \
89 RISCV_RUNTIME_CONST_64_BASE, \
90 RISCV_RUNTIME_CONST_64_ZBA, \
91 0, RISCV_ISA_EXT_ZBA, 1, \
92 RISCV_RUNTIME_CONST_64_ZBKB, \
93 0, RISCV_ISA_EXT_ZBKB, 1 \
94 ) \
95 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
96 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
97 __ret; \
98 })
99 #elif defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA)
100 #define runtime_const_ptr(sym) \
101 ({ \
102 typeof(sym) __ret, __tmp; \
103 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
104 ALTERNATIVE( \
105 RISCV_RUNTIME_CONST_64_BASE, \
106 RISCV_RUNTIME_CONST_64_ZBA, \
107 0, RISCV_ISA_EXT_ZBA, 1 \
108 ) \
109 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
110 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
111 __ret; \
112 })
113 #elif defined(CONFIG_RISCV_ISA_ZBKB)
114 #define runtime_const_ptr(sym) \
115 ({ \
116 typeof(sym) __ret, __tmp; \
117 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
118 ALTERNATIVE( \
119 RISCV_RUNTIME_CONST_64_BASE, \
120 RISCV_RUNTIME_CONST_64_ZBKB, \
121 0, RISCV_ISA_EXT_ZBKB, 1 \
122 ) \
123 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
124 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
125 __ret; \
126 })
127 #else
128 #define runtime_const_ptr(sym) \
129 ({ \
130 typeof(sym) __ret, __tmp; \
131 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
132 RISCV_RUNTIME_CONST_64_BASE \
133 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
134 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
135 __ret; \
136 })
137 #endif
138 #endif
139
140 #define runtime_const_shift_right_32(val, sym) \
141 ({ \
142 u32 __ret; \
143 asm_inline(".option push\n\t" \
144 ".option norvc\n\t" \
145 "1:\t" \
146 SRLI " %[__ret],%[__val],12\n\t" \
147 ".option pop\n\t" \
148 ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
149 ".long 1b - .\n\t" \
150 ".popsection" \
151 : [__ret] "=r" (__ret) \
152 : [__val] "r" (val)); \
153 __ret; \
154 })
155
156 #define runtime_const_init(type, sym) do { \
157 extern s32 __start_runtime_##type##_##sym[]; \
158 extern s32 __stop_runtime_##type##_##sym[]; \
159 \
160 runtime_const_fixup(__runtime_fixup_##type, \
161 (unsigned long)(sym), \
162 __start_runtime_##type##_##sym, \
163 __stop_runtime_##type##_##sym); \
164 } while (0)
165
__runtime_fixup_caches(void * where,unsigned int insns)166 static inline void __runtime_fixup_caches(void *where, unsigned int insns)
167 {
168 /* On riscv there are currently only cache-wide flushes so va is ignored. */
169 __always_unused uintptr_t va = (uintptr_t)where;
170
171 flush_icache_range(va, va + 4 * insns);
172 }
173
174 /*
175 * The 32-bit immediate is stored in a lui+addi pairing.
176 * lui holds the upper 20 bits of the immediate in the first 20 bits of the instruction.
177 * addi holds the lower 12 bits of the immediate in the first 12 bits of the instruction.
178 */
__runtime_fixup_32(__le16 * lui_parcel,__le16 * addi_parcel,unsigned int val)179 static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, unsigned int val)
180 {
181 unsigned int lower_immediate, upper_immediate;
182 u32 lui_insn, addi_insn, addi_insn_mask;
183 __le32 lui_res, addi_res;
184
185 /* Mask out upper 12 bit of addi */
186 addi_insn_mask = 0x000fffff;
187
188 lui_insn = (u32)le16_to_cpu(lui_parcel[0]) | (u32)le16_to_cpu(lui_parcel[1]) << 16;
189 addi_insn = (u32)le16_to_cpu(addi_parcel[0]) | (u32)le16_to_cpu(addi_parcel[1]) << 16;
190
191 lower_immediate = sign_extend32(val, 11);
192 upper_immediate = (val - lower_immediate);
193
194 if (upper_immediate & 0xfffff000) {
195 /* replace upper 20 bits of lui with upper immediate */
196 lui_insn &= 0x00000fff;
197 lui_insn |= upper_immediate & 0xfffff000;
198 } else {
199 /* replace lui with nop if immediate is small enough to fit in addi */
200 lui_insn = RISCV_INSN_NOP4;
201 /*
202 * lui is being skipped, so do a load instead of an add. A load
203 * is performed by adding with the x0 register. Setting rs to
204 * zero with the following mask will accomplish this goal.
205 */
206 addi_insn_mask &= 0x07fff;
207 }
208
209 if (lower_immediate & 0x00000fff) {
210 /* replace upper 12 bits of addi with lower 12 bits of val */
211 addi_insn &= addi_insn_mask;
212 addi_insn |= (lower_immediate & 0x00000fff) << 20;
213 } else {
214 /* replace addi with nop if lower_immediate is empty */
215 addi_insn = RISCV_INSN_NOP4;
216 }
217
218 addi_res = cpu_to_le32(addi_insn);
219 lui_res = cpu_to_le32(lui_insn);
220 mutex_lock(&text_mutex);
221 patch_insn_write(addi_parcel, &addi_res, sizeof(addi_res));
222 patch_insn_write(lui_parcel, &lui_res, sizeof(lui_res));
223 mutex_unlock(&text_mutex);
224 }
225
__runtime_fixup_ptr(void * where,unsigned long val)226 static inline void __runtime_fixup_ptr(void *where, unsigned long val)
227 {
228 #ifdef CONFIG_32BIT
229 __runtime_fixup_32(where, where + 4, val);
230 __runtime_fixup_caches(where, 2);
231 #else
232 __runtime_fixup_32(where, where + 8, val);
233 __runtime_fixup_32(where + 4, where + 12, val >> 32);
234 __runtime_fixup_caches(where, 4);
235 #endif
236 }
237
238 /*
239 * Replace the least significant 5 bits of the srli/srliw immediate that is
240 * located at bits 20-24
241 */
__runtime_fixup_shift(void * where,unsigned long val)242 static inline void __runtime_fixup_shift(void *where, unsigned long val)
243 {
244 __le16 *parcel = where;
245 __le32 res;
246 u32 insn;
247
248 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16;
249
250 insn &= 0xfe0fffff;
251 insn |= (val & 0b11111) << 20;
252
253 res = cpu_to_le32(insn);
254 mutex_lock(&text_mutex);
255 patch_text_nosync(where, &res, sizeof(insn));
256 mutex_unlock(&text_mutex);
257 }
258
runtime_const_fixup(void (* fn)(void *,unsigned long),unsigned long val,s32 * start,s32 * end)259 static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
260 unsigned long val, s32 *start, s32 *end)
261 {
262 while (start < end) {
263 fn(*start + (void *)start, val);
264 start++;
265 }
266 }
267
268 #endif /* _ASM_RISCV_RUNTIME_CONST_H */
269