1a44fb572SCharlie Jenkins /* SPDX-License-Identifier: GPL-2.0 */
2a44fb572SCharlie Jenkins #ifndef _ASM_RISCV_RUNTIME_CONST_H
3a44fb572SCharlie Jenkins #define _ASM_RISCV_RUNTIME_CONST_H
4a44fb572SCharlie Jenkins
5a44fb572SCharlie Jenkins #include <asm/asm.h>
6a44fb572SCharlie Jenkins #include <asm/alternative.h>
7a44fb572SCharlie Jenkins #include <asm/cacheflush.h>
8a44fb572SCharlie Jenkins #include <asm/insn-def.h>
9a44fb572SCharlie Jenkins #include <linux/memory.h>
10a44fb572SCharlie Jenkins #include <asm/text-patching.h>
11a44fb572SCharlie Jenkins
12a44fb572SCharlie Jenkins #include <linux/uaccess.h>
13a44fb572SCharlie Jenkins
14a44fb572SCharlie Jenkins #ifdef CONFIG_32BIT
15a44fb572SCharlie Jenkins #define runtime_const_ptr(sym) \
16a44fb572SCharlie Jenkins ({ \
17a44fb572SCharlie Jenkins typeof(sym) __ret; \
18a44fb572SCharlie Jenkins asm_inline(".option push\n\t" \
19a44fb572SCharlie Jenkins ".option norvc\n\t" \
20a44fb572SCharlie Jenkins "1:\t" \
21a44fb572SCharlie Jenkins "lui %[__ret],0x89abd\n\t" \
22a44fb572SCharlie Jenkins "addi %[__ret],%[__ret],-0x211\n\t" \
23a44fb572SCharlie Jenkins ".option pop\n\t" \
24a44fb572SCharlie Jenkins ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
25a44fb572SCharlie Jenkins ".long 1b - .\n\t" \
26a44fb572SCharlie Jenkins ".popsection" \
27a44fb572SCharlie Jenkins : [__ret] "=r" (__ret)); \
28a44fb572SCharlie Jenkins __ret; \
29a44fb572SCharlie Jenkins })
30a44fb572SCharlie Jenkins #else
31a44fb572SCharlie Jenkins /*
32a44fb572SCharlie Jenkins * Loading 64-bit constants into a register from immediates is a non-trivial
33a44fb572SCharlie Jenkins * task on riscv64. To get it somewhat performant, load 32 bits into two
34a44fb572SCharlie Jenkins * different registers and then combine the results.
35a44fb572SCharlie Jenkins *
36a44fb572SCharlie Jenkins * If the processor supports the Zbkb extension, we can combine the final
37a44fb572SCharlie Jenkins * "slli,slli,srli,add" into the single "pack" instruction. If the processor
38a44fb572SCharlie Jenkins * doesn't support Zbkb but does support the Zbb extension, we can
39a44fb572SCharlie Jenkins * combine the final "slli,srli,add" into one instruction "add.uw".
40a44fb572SCharlie Jenkins */
41a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_PREAMBLE \
42a44fb572SCharlie Jenkins ".option push\n\t" \
43a44fb572SCharlie Jenkins ".option norvc\n\t" \
44a44fb572SCharlie Jenkins "1:\t" \
45a44fb572SCharlie Jenkins "lui %[__ret],0x89abd\n\t" \
46a44fb572SCharlie Jenkins "lui %[__tmp],0x1234\n\t" \
47a44fb572SCharlie Jenkins "addiw %[__ret],%[__ret],-0x211\n\t" \
48a44fb572SCharlie Jenkins "addiw %[__tmp],%[__tmp],0x567\n\t" \
49a44fb572SCharlie Jenkins
50a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_BASE \
51a44fb572SCharlie Jenkins "slli %[__tmp],%[__tmp],32\n\t" \
52a44fb572SCharlie Jenkins "slli %[__ret],%[__ret],32\n\t" \
53a44fb572SCharlie Jenkins "srli %[__ret],%[__ret],32\n\t" \
54a44fb572SCharlie Jenkins "add %[__ret],%[__ret],%[__tmp]\n\t" \
55a44fb572SCharlie Jenkins
56a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_ZBA \
57a44fb572SCharlie Jenkins ".option push\n\t" \
58a44fb572SCharlie Jenkins ".option arch,+zba\n\t" \
59*6ee92818SCharlie Jenkins ".option norvc\n\t" \
60a44fb572SCharlie Jenkins "slli %[__tmp],%[__tmp],32\n\t" \
61a44fb572SCharlie Jenkins "add.uw %[__ret],%[__ret],%[__tmp]\n\t" \
62a44fb572SCharlie Jenkins "nop\n\t" \
63a44fb572SCharlie Jenkins "nop\n\t" \
64a44fb572SCharlie Jenkins ".option pop\n\t" \
65a44fb572SCharlie Jenkins
66a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_ZBKB \
67a44fb572SCharlie Jenkins ".option push\n\t" \
68a44fb572SCharlie Jenkins ".option arch,+zbkb\n\t" \
69*6ee92818SCharlie Jenkins ".option norvc\n\t" \
70a44fb572SCharlie Jenkins "pack %[__ret],%[__ret],%[__tmp]\n\t" \
71a44fb572SCharlie Jenkins "nop\n\t" \
72a44fb572SCharlie Jenkins "nop\n\t" \
73a44fb572SCharlie Jenkins "nop\n\t" \
74a44fb572SCharlie Jenkins ".option pop\n\t" \
75a44fb572SCharlie Jenkins
76a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
77a44fb572SCharlie Jenkins ".option pop\n\t" \
78a44fb572SCharlie Jenkins ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
79a44fb572SCharlie Jenkins ".long 1b - .\n\t" \
80a44fb572SCharlie Jenkins ".popsection" \
81a44fb572SCharlie Jenkins
828a2f20acSAlexandre Ghiti #if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA) \
838a2f20acSAlexandre Ghiti && defined(CONFIG_RISCV_ISA_ZBKB)
84a44fb572SCharlie Jenkins #define runtime_const_ptr(sym) \
85a44fb572SCharlie Jenkins ({ \
86a44fb572SCharlie Jenkins typeof(sym) __ret, __tmp; \
87a44fb572SCharlie Jenkins asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
88a44fb572SCharlie Jenkins ALTERNATIVE_2( \
89a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_BASE, \
90a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_ZBA, \
91a44fb572SCharlie Jenkins 0, RISCV_ISA_EXT_ZBA, 1, \
92a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_ZBKB, \
93a44fb572SCharlie Jenkins 0, RISCV_ISA_EXT_ZBKB, 1 \
94a44fb572SCharlie Jenkins ) \
95a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
96a44fb572SCharlie Jenkins : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
97a44fb572SCharlie Jenkins __ret; \
98a44fb572SCharlie Jenkins })
998a2f20acSAlexandre Ghiti #elif defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA)
100a44fb572SCharlie Jenkins #define runtime_const_ptr(sym) \
101a44fb572SCharlie Jenkins ({ \
102a44fb572SCharlie Jenkins typeof(sym) __ret, __tmp; \
103a44fb572SCharlie Jenkins asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
104a44fb572SCharlie Jenkins ALTERNATIVE( \
105a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_BASE, \
106a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_ZBA, \
107a44fb572SCharlie Jenkins 0, RISCV_ISA_EXT_ZBA, 1 \
108a44fb572SCharlie Jenkins ) \
109a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
110a44fb572SCharlie Jenkins : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
111a44fb572SCharlie Jenkins __ret; \
112a44fb572SCharlie Jenkins })
113a44fb572SCharlie Jenkins #elif defined(CONFIG_RISCV_ISA_ZBKB)
114a44fb572SCharlie Jenkins #define runtime_const_ptr(sym) \
115a44fb572SCharlie Jenkins ({ \
116a44fb572SCharlie Jenkins typeof(sym) __ret, __tmp; \
117a44fb572SCharlie Jenkins asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
118a44fb572SCharlie Jenkins ALTERNATIVE( \
119a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_BASE, \
120a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_ZBKB, \
121a44fb572SCharlie Jenkins 0, RISCV_ISA_EXT_ZBKB, 1 \
122a44fb572SCharlie Jenkins ) \
123a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
124a44fb572SCharlie Jenkins : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
125a44fb572SCharlie Jenkins __ret; \
126a44fb572SCharlie Jenkins })
127a44fb572SCharlie Jenkins #else
128a44fb572SCharlie Jenkins #define runtime_const_ptr(sym) \
129a44fb572SCharlie Jenkins ({ \
130a44fb572SCharlie Jenkins typeof(sym) __ret, __tmp; \
131a44fb572SCharlie Jenkins asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \
132a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_BASE \
133a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \
134a44fb572SCharlie Jenkins : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \
135a44fb572SCharlie Jenkins __ret; \
136a44fb572SCharlie Jenkins })
137a44fb572SCharlie Jenkins #endif
138a44fb572SCharlie Jenkins #endif
139a44fb572SCharlie Jenkins
140a44fb572SCharlie Jenkins #define runtime_const_shift_right_32(val, sym) \
141a44fb572SCharlie Jenkins ({ \
142a44fb572SCharlie Jenkins u32 __ret; \
143a44fb572SCharlie Jenkins asm_inline(".option push\n\t" \
144a44fb572SCharlie Jenkins ".option norvc\n\t" \
145a44fb572SCharlie Jenkins "1:\t" \
146a44fb572SCharlie Jenkins SRLI " %[__ret],%[__val],12\n\t" \
147a44fb572SCharlie Jenkins ".option pop\n\t" \
148a44fb572SCharlie Jenkins ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
149a44fb572SCharlie Jenkins ".long 1b - .\n\t" \
150a44fb572SCharlie Jenkins ".popsection" \
151a44fb572SCharlie Jenkins : [__ret] "=r" (__ret) \
152a44fb572SCharlie Jenkins : [__val] "r" (val)); \
153a44fb572SCharlie Jenkins __ret; \
154a44fb572SCharlie Jenkins })
155a44fb572SCharlie Jenkins
156a44fb572SCharlie Jenkins #define runtime_const_init(type, sym) do { \
157a44fb572SCharlie Jenkins extern s32 __start_runtime_##type##_##sym[]; \
158a44fb572SCharlie Jenkins extern s32 __stop_runtime_##type##_##sym[]; \
159a44fb572SCharlie Jenkins \
160a44fb572SCharlie Jenkins runtime_const_fixup(__runtime_fixup_##type, \
161a44fb572SCharlie Jenkins (unsigned long)(sym), \
162a44fb572SCharlie Jenkins __start_runtime_##type##_##sym, \
163a44fb572SCharlie Jenkins __stop_runtime_##type##_##sym); \
164a44fb572SCharlie Jenkins } while (0)
165a44fb572SCharlie Jenkins
__runtime_fixup_caches(void * where,unsigned int insns)166a44fb572SCharlie Jenkins static inline void __runtime_fixup_caches(void *where, unsigned int insns)
167a44fb572SCharlie Jenkins {
168a44fb572SCharlie Jenkins /* On riscv there are currently only cache-wide flushes so va is ignored. */
169a44fb572SCharlie Jenkins __always_unused uintptr_t va = (uintptr_t)where;
170a44fb572SCharlie Jenkins
171a44fb572SCharlie Jenkins flush_icache_range(va, va + 4 * insns);
172a44fb572SCharlie Jenkins }
173a44fb572SCharlie Jenkins
174a44fb572SCharlie Jenkins /*
175a44fb572SCharlie Jenkins * The 32-bit immediate is stored in a lui+addi pairing.
176a44fb572SCharlie Jenkins * lui holds the upper 20 bits of the immediate in the first 20 bits of the instruction.
177a44fb572SCharlie Jenkins * addi holds the lower 12 bits of the immediate in the first 12 bits of the instruction.
178a44fb572SCharlie Jenkins */
__runtime_fixup_32(__le16 * lui_parcel,__le16 * addi_parcel,unsigned int val)179a44fb572SCharlie Jenkins static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, unsigned int val)
180a44fb572SCharlie Jenkins {
181a44fb572SCharlie Jenkins unsigned int lower_immediate, upper_immediate;
182a44fb572SCharlie Jenkins u32 lui_insn, addi_insn, addi_insn_mask;
183a44fb572SCharlie Jenkins __le32 lui_res, addi_res;
184a44fb572SCharlie Jenkins
185a44fb572SCharlie Jenkins /* Mask out upper 12 bit of addi */
186a44fb572SCharlie Jenkins addi_insn_mask = 0x000fffff;
187a44fb572SCharlie Jenkins
188a44fb572SCharlie Jenkins lui_insn = (u32)le16_to_cpu(lui_parcel[0]) | (u32)le16_to_cpu(lui_parcel[1]) << 16;
189a44fb572SCharlie Jenkins addi_insn = (u32)le16_to_cpu(addi_parcel[0]) | (u32)le16_to_cpu(addi_parcel[1]) << 16;
190a44fb572SCharlie Jenkins
191a44fb572SCharlie Jenkins lower_immediate = sign_extend32(val, 11);
192a44fb572SCharlie Jenkins upper_immediate = (val - lower_immediate);
193a44fb572SCharlie Jenkins
194a44fb572SCharlie Jenkins if (upper_immediate & 0xfffff000) {
195a44fb572SCharlie Jenkins /* replace upper 20 bits of lui with upper immediate */
196a44fb572SCharlie Jenkins lui_insn &= 0x00000fff;
197a44fb572SCharlie Jenkins lui_insn |= upper_immediate & 0xfffff000;
198a44fb572SCharlie Jenkins } else {
199a44fb572SCharlie Jenkins /* replace lui with nop if immediate is small enough to fit in addi */
200a44fb572SCharlie Jenkins lui_insn = RISCV_INSN_NOP4;
201a44fb572SCharlie Jenkins /*
202a44fb572SCharlie Jenkins * lui is being skipped, so do a load instead of an add. A load
203a44fb572SCharlie Jenkins * is performed by adding with the x0 register. Setting rs to
204a44fb572SCharlie Jenkins * zero with the following mask will accomplish this goal.
205a44fb572SCharlie Jenkins */
206a44fb572SCharlie Jenkins addi_insn_mask &= 0x07fff;
207a44fb572SCharlie Jenkins }
208a44fb572SCharlie Jenkins
209a44fb572SCharlie Jenkins if (lower_immediate & 0x00000fff) {
210a44fb572SCharlie Jenkins /* replace upper 12 bits of addi with lower 12 bits of val */
211a44fb572SCharlie Jenkins addi_insn &= addi_insn_mask;
212a44fb572SCharlie Jenkins addi_insn |= (lower_immediate & 0x00000fff) << 20;
213a44fb572SCharlie Jenkins } else {
214a44fb572SCharlie Jenkins /* replace addi with nop if lower_immediate is empty */
215a44fb572SCharlie Jenkins addi_insn = RISCV_INSN_NOP4;
216a44fb572SCharlie Jenkins }
217a44fb572SCharlie Jenkins
218a44fb572SCharlie Jenkins addi_res = cpu_to_le32(addi_insn);
219a44fb572SCharlie Jenkins lui_res = cpu_to_le32(lui_insn);
220a44fb572SCharlie Jenkins mutex_lock(&text_mutex);
221a44fb572SCharlie Jenkins patch_insn_write(addi_parcel, &addi_res, sizeof(addi_res));
222a44fb572SCharlie Jenkins patch_insn_write(lui_parcel, &lui_res, sizeof(lui_res));
223a44fb572SCharlie Jenkins mutex_unlock(&text_mutex);
224a44fb572SCharlie Jenkins }
225a44fb572SCharlie Jenkins
__runtime_fixup_ptr(void * where,unsigned long val)226a44fb572SCharlie Jenkins static inline void __runtime_fixup_ptr(void *where, unsigned long val)
227a44fb572SCharlie Jenkins {
228a44fb572SCharlie Jenkins #ifdef CONFIG_32BIT
229a44fb572SCharlie Jenkins __runtime_fixup_32(where, where + 4, val);
230a44fb572SCharlie Jenkins __runtime_fixup_caches(where, 2);
231a44fb572SCharlie Jenkins #else
232a44fb572SCharlie Jenkins __runtime_fixup_32(where, where + 8, val);
233a44fb572SCharlie Jenkins __runtime_fixup_32(where + 4, where + 12, val >> 32);
234a44fb572SCharlie Jenkins __runtime_fixup_caches(where, 4);
235a44fb572SCharlie Jenkins #endif
236a44fb572SCharlie Jenkins }
237a44fb572SCharlie Jenkins
238a44fb572SCharlie Jenkins /*
239a44fb572SCharlie Jenkins * Replace the least significant 5 bits of the srli/srliw immediate that is
240a44fb572SCharlie Jenkins * located at bits 20-24
241a44fb572SCharlie Jenkins */
__runtime_fixup_shift(void * where,unsigned long val)242a44fb572SCharlie Jenkins static inline void __runtime_fixup_shift(void *where, unsigned long val)
243a44fb572SCharlie Jenkins {
244a44fb572SCharlie Jenkins __le16 *parcel = where;
245a44fb572SCharlie Jenkins __le32 res;
246a44fb572SCharlie Jenkins u32 insn;
247a44fb572SCharlie Jenkins
248a44fb572SCharlie Jenkins insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16;
249a44fb572SCharlie Jenkins
250a44fb572SCharlie Jenkins insn &= 0xfe0fffff;
251a44fb572SCharlie Jenkins insn |= (val & 0b11111) << 20;
252a44fb572SCharlie Jenkins
253a44fb572SCharlie Jenkins res = cpu_to_le32(insn);
254a44fb572SCharlie Jenkins mutex_lock(&text_mutex);
255a44fb572SCharlie Jenkins patch_text_nosync(where, &res, sizeof(insn));
256a44fb572SCharlie Jenkins mutex_unlock(&text_mutex);
257a44fb572SCharlie Jenkins }
258a44fb572SCharlie Jenkins
runtime_const_fixup(void (* fn)(void *,unsigned long),unsigned long val,s32 * start,s32 * end)259a44fb572SCharlie Jenkins static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
260a44fb572SCharlie Jenkins unsigned long val, s32 *start, s32 *end)
261a44fb572SCharlie Jenkins {
262a44fb572SCharlie Jenkins while (start < end) {
263a44fb572SCharlie Jenkins fn(*start + (void *)start, val);
264a44fb572SCharlie Jenkins start++;
265a44fb572SCharlie Jenkins }
266a44fb572SCharlie Jenkins }
267a44fb572SCharlie Jenkins
268a44fb572SCharlie Jenkins #endif /* _ASM_RISCV_RUNTIME_CONST_H */
269