1a44fb572SCharlie Jenkins /* SPDX-License-Identifier: GPL-2.0 */ 2a44fb572SCharlie Jenkins #ifndef _ASM_RISCV_RUNTIME_CONST_H 3a44fb572SCharlie Jenkins #define _ASM_RISCV_RUNTIME_CONST_H 4a44fb572SCharlie Jenkins 5a44fb572SCharlie Jenkins #include <asm/asm.h> 6a44fb572SCharlie Jenkins #include <asm/alternative.h> 7a44fb572SCharlie Jenkins #include <asm/cacheflush.h> 8a44fb572SCharlie Jenkins #include <asm/insn-def.h> 9a44fb572SCharlie Jenkins #include <linux/memory.h> 10a44fb572SCharlie Jenkins #include <asm/text-patching.h> 11a44fb572SCharlie Jenkins 12a44fb572SCharlie Jenkins #include <linux/uaccess.h> 13a44fb572SCharlie Jenkins 14a44fb572SCharlie Jenkins #ifdef CONFIG_32BIT 15a44fb572SCharlie Jenkins #define runtime_const_ptr(sym) \ 16a44fb572SCharlie Jenkins ({ \ 17a44fb572SCharlie Jenkins typeof(sym) __ret; \ 18a44fb572SCharlie Jenkins asm_inline(".option push\n\t" \ 19a44fb572SCharlie Jenkins ".option norvc\n\t" \ 20a44fb572SCharlie Jenkins "1:\t" \ 21a44fb572SCharlie Jenkins "lui %[__ret],0x89abd\n\t" \ 22a44fb572SCharlie Jenkins "addi %[__ret],%[__ret],-0x211\n\t" \ 23a44fb572SCharlie Jenkins ".option pop\n\t" \ 24a44fb572SCharlie Jenkins ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ 25a44fb572SCharlie Jenkins ".long 1b - .\n\t" \ 26a44fb572SCharlie Jenkins ".popsection" \ 27a44fb572SCharlie Jenkins : [__ret] "=r" (__ret)); \ 28a44fb572SCharlie Jenkins __ret; \ 29a44fb572SCharlie Jenkins }) 30a44fb572SCharlie Jenkins #else 31a44fb572SCharlie Jenkins /* 32a44fb572SCharlie Jenkins * Loading 64-bit constants into a register from immediates is a non-trivial 33a44fb572SCharlie Jenkins * task on riscv64. To get it somewhat performant, load 32 bits into two 34a44fb572SCharlie Jenkins * different registers and then combine the results. 35a44fb572SCharlie Jenkins * 36a44fb572SCharlie Jenkins * If the processor supports the Zbkb extension, we can combine the final 37a44fb572SCharlie Jenkins * "slli,slli,srli,add" into the single "pack" instruction. If the processor 38a44fb572SCharlie Jenkins * doesn't support Zbkb but does support the Zbb extension, we can 39a44fb572SCharlie Jenkins * combine the final "slli,srli,add" into one instruction "add.uw". 40a44fb572SCharlie Jenkins */ 41a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_PREAMBLE \ 42a44fb572SCharlie Jenkins ".option push\n\t" \ 43a44fb572SCharlie Jenkins ".option norvc\n\t" \ 44a44fb572SCharlie Jenkins "1:\t" \ 45a44fb572SCharlie Jenkins "lui %[__ret],0x89abd\n\t" \ 46a44fb572SCharlie Jenkins "lui %[__tmp],0x1234\n\t" \ 47a44fb572SCharlie Jenkins "addiw %[__ret],%[__ret],-0x211\n\t" \ 48a44fb572SCharlie Jenkins "addiw %[__tmp],%[__tmp],0x567\n\t" \ 49a44fb572SCharlie Jenkins 50a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_BASE \ 51a44fb572SCharlie Jenkins "slli %[__tmp],%[__tmp],32\n\t" \ 52a44fb572SCharlie Jenkins "slli %[__ret],%[__ret],32\n\t" \ 53a44fb572SCharlie Jenkins "srli %[__ret],%[__ret],32\n\t" \ 54a44fb572SCharlie Jenkins "add %[__ret],%[__ret],%[__tmp]\n\t" \ 55a44fb572SCharlie Jenkins 56a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_ZBA \ 57a44fb572SCharlie Jenkins ".option push\n\t" \ 58a44fb572SCharlie Jenkins ".option arch,+zba\n\t" \ 59*6ee92818SCharlie Jenkins ".option norvc\n\t" \ 60a44fb572SCharlie Jenkins "slli %[__tmp],%[__tmp],32\n\t" \ 61a44fb572SCharlie Jenkins "add.uw %[__ret],%[__ret],%[__tmp]\n\t" \ 62a44fb572SCharlie Jenkins "nop\n\t" \ 63a44fb572SCharlie Jenkins "nop\n\t" \ 64a44fb572SCharlie Jenkins ".option pop\n\t" \ 65a44fb572SCharlie Jenkins 66a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_ZBKB \ 67a44fb572SCharlie Jenkins ".option push\n\t" \ 68a44fb572SCharlie Jenkins ".option arch,+zbkb\n\t" \ 69*6ee92818SCharlie Jenkins ".option norvc\n\t" \ 70a44fb572SCharlie Jenkins "pack %[__ret],%[__ret],%[__tmp]\n\t" \ 71a44fb572SCharlie Jenkins "nop\n\t" \ 72a44fb572SCharlie Jenkins "nop\n\t" \ 73a44fb572SCharlie Jenkins "nop\n\t" \ 74a44fb572SCharlie Jenkins ".option pop\n\t" \ 75a44fb572SCharlie Jenkins 76a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 77a44fb572SCharlie Jenkins ".option pop\n\t" \ 78a44fb572SCharlie Jenkins ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ 79a44fb572SCharlie Jenkins ".long 1b - .\n\t" \ 80a44fb572SCharlie Jenkins ".popsection" \ 81a44fb572SCharlie Jenkins 828a2f20acSAlexandre Ghiti #if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA) \ 838a2f20acSAlexandre Ghiti && defined(CONFIG_RISCV_ISA_ZBKB) 84a44fb572SCharlie Jenkins #define runtime_const_ptr(sym) \ 85a44fb572SCharlie Jenkins ({ \ 86a44fb572SCharlie Jenkins typeof(sym) __ret, __tmp; \ 87a44fb572SCharlie Jenkins asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 88a44fb572SCharlie Jenkins ALTERNATIVE_2( \ 89a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_BASE, \ 90a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_ZBA, \ 91a44fb572SCharlie Jenkins 0, RISCV_ISA_EXT_ZBA, 1, \ 92a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_ZBKB, \ 93a44fb572SCharlie Jenkins 0, RISCV_ISA_EXT_ZBKB, 1 \ 94a44fb572SCharlie Jenkins ) \ 95a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 96a44fb572SCharlie Jenkins : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 97a44fb572SCharlie Jenkins __ret; \ 98a44fb572SCharlie Jenkins }) 998a2f20acSAlexandre Ghiti #elif defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA) 100a44fb572SCharlie Jenkins #define runtime_const_ptr(sym) \ 101a44fb572SCharlie Jenkins ({ \ 102a44fb572SCharlie Jenkins typeof(sym) __ret, __tmp; \ 103a44fb572SCharlie Jenkins asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 104a44fb572SCharlie Jenkins ALTERNATIVE( \ 105a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_BASE, \ 106a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_ZBA, \ 107a44fb572SCharlie Jenkins 0, RISCV_ISA_EXT_ZBA, 1 \ 108a44fb572SCharlie Jenkins ) \ 109a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 110a44fb572SCharlie Jenkins : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 111a44fb572SCharlie Jenkins __ret; \ 112a44fb572SCharlie Jenkins }) 113a44fb572SCharlie Jenkins #elif defined(CONFIG_RISCV_ISA_ZBKB) 114a44fb572SCharlie Jenkins #define runtime_const_ptr(sym) \ 115a44fb572SCharlie Jenkins ({ \ 116a44fb572SCharlie Jenkins typeof(sym) __ret, __tmp; \ 117a44fb572SCharlie Jenkins asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 118a44fb572SCharlie Jenkins ALTERNATIVE( \ 119a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_BASE, \ 120a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_ZBKB, \ 121a44fb572SCharlie Jenkins 0, RISCV_ISA_EXT_ZBKB, 1 \ 122a44fb572SCharlie Jenkins ) \ 123a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 124a44fb572SCharlie Jenkins : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 125a44fb572SCharlie Jenkins __ret; \ 126a44fb572SCharlie Jenkins }) 127a44fb572SCharlie Jenkins #else 128a44fb572SCharlie Jenkins #define runtime_const_ptr(sym) \ 129a44fb572SCharlie Jenkins ({ \ 130a44fb572SCharlie Jenkins typeof(sym) __ret, __tmp; \ 131a44fb572SCharlie Jenkins asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 132a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_BASE \ 133a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 134a44fb572SCharlie Jenkins : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 135a44fb572SCharlie Jenkins __ret; \ 136a44fb572SCharlie Jenkins }) 137a44fb572SCharlie Jenkins #endif 138a44fb572SCharlie Jenkins #endif 139a44fb572SCharlie Jenkins 140a44fb572SCharlie Jenkins #define runtime_const_shift_right_32(val, sym) \ 141a44fb572SCharlie Jenkins ({ \ 142a44fb572SCharlie Jenkins u32 __ret; \ 143a44fb572SCharlie Jenkins asm_inline(".option push\n\t" \ 144a44fb572SCharlie Jenkins ".option norvc\n\t" \ 145a44fb572SCharlie Jenkins "1:\t" \ 146a44fb572SCharlie Jenkins SRLI " %[__ret],%[__val],12\n\t" \ 147a44fb572SCharlie Jenkins ".option pop\n\t" \ 148a44fb572SCharlie Jenkins ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ 149a44fb572SCharlie Jenkins ".long 1b - .\n\t" \ 150a44fb572SCharlie Jenkins ".popsection" \ 151a44fb572SCharlie Jenkins : [__ret] "=r" (__ret) \ 152a44fb572SCharlie Jenkins : [__val] "r" (val)); \ 153a44fb572SCharlie Jenkins __ret; \ 154a44fb572SCharlie Jenkins }) 155a44fb572SCharlie Jenkins 156a44fb572SCharlie Jenkins #define runtime_const_init(type, sym) do { \ 157a44fb572SCharlie Jenkins extern s32 __start_runtime_##type##_##sym[]; \ 158a44fb572SCharlie Jenkins extern s32 __stop_runtime_##type##_##sym[]; \ 159a44fb572SCharlie Jenkins \ 160a44fb572SCharlie Jenkins runtime_const_fixup(__runtime_fixup_##type, \ 161a44fb572SCharlie Jenkins (unsigned long)(sym), \ 162a44fb572SCharlie Jenkins __start_runtime_##type##_##sym, \ 163a44fb572SCharlie Jenkins __stop_runtime_##type##_##sym); \ 164a44fb572SCharlie Jenkins } while (0) 165a44fb572SCharlie Jenkins 166a44fb572SCharlie Jenkins static inline void __runtime_fixup_caches(void *where, unsigned int insns) 167a44fb572SCharlie Jenkins { 168a44fb572SCharlie Jenkins /* On riscv there are currently only cache-wide flushes so va is ignored. */ 169a44fb572SCharlie Jenkins __always_unused uintptr_t va = (uintptr_t)where; 170a44fb572SCharlie Jenkins 171a44fb572SCharlie Jenkins flush_icache_range(va, va + 4 * insns); 172a44fb572SCharlie Jenkins } 173a44fb572SCharlie Jenkins 174a44fb572SCharlie Jenkins /* 175a44fb572SCharlie Jenkins * The 32-bit immediate is stored in a lui+addi pairing. 176a44fb572SCharlie Jenkins * lui holds the upper 20 bits of the immediate in the first 20 bits of the instruction. 177a44fb572SCharlie Jenkins * addi holds the lower 12 bits of the immediate in the first 12 bits of the instruction. 178a44fb572SCharlie Jenkins */ 179a44fb572SCharlie Jenkins static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, unsigned int val) 180a44fb572SCharlie Jenkins { 181a44fb572SCharlie Jenkins unsigned int lower_immediate, upper_immediate; 182a44fb572SCharlie Jenkins u32 lui_insn, addi_insn, addi_insn_mask; 183a44fb572SCharlie Jenkins __le32 lui_res, addi_res; 184a44fb572SCharlie Jenkins 185a44fb572SCharlie Jenkins /* Mask out upper 12 bit of addi */ 186a44fb572SCharlie Jenkins addi_insn_mask = 0x000fffff; 187a44fb572SCharlie Jenkins 188a44fb572SCharlie Jenkins lui_insn = (u32)le16_to_cpu(lui_parcel[0]) | (u32)le16_to_cpu(lui_parcel[1]) << 16; 189a44fb572SCharlie Jenkins addi_insn = (u32)le16_to_cpu(addi_parcel[0]) | (u32)le16_to_cpu(addi_parcel[1]) << 16; 190a44fb572SCharlie Jenkins 191a44fb572SCharlie Jenkins lower_immediate = sign_extend32(val, 11); 192a44fb572SCharlie Jenkins upper_immediate = (val - lower_immediate); 193a44fb572SCharlie Jenkins 194a44fb572SCharlie Jenkins if (upper_immediate & 0xfffff000) { 195a44fb572SCharlie Jenkins /* replace upper 20 bits of lui with upper immediate */ 196a44fb572SCharlie Jenkins lui_insn &= 0x00000fff; 197a44fb572SCharlie Jenkins lui_insn |= upper_immediate & 0xfffff000; 198a44fb572SCharlie Jenkins } else { 199a44fb572SCharlie Jenkins /* replace lui with nop if immediate is small enough to fit in addi */ 200a44fb572SCharlie Jenkins lui_insn = RISCV_INSN_NOP4; 201a44fb572SCharlie Jenkins /* 202a44fb572SCharlie Jenkins * lui is being skipped, so do a load instead of an add. A load 203a44fb572SCharlie Jenkins * is performed by adding with the x0 register. Setting rs to 204a44fb572SCharlie Jenkins * zero with the following mask will accomplish this goal. 205a44fb572SCharlie Jenkins */ 206a44fb572SCharlie Jenkins addi_insn_mask &= 0x07fff; 207a44fb572SCharlie Jenkins } 208a44fb572SCharlie Jenkins 209a44fb572SCharlie Jenkins if (lower_immediate & 0x00000fff) { 210a44fb572SCharlie Jenkins /* replace upper 12 bits of addi with lower 12 bits of val */ 211a44fb572SCharlie Jenkins addi_insn &= addi_insn_mask; 212a44fb572SCharlie Jenkins addi_insn |= (lower_immediate & 0x00000fff) << 20; 213a44fb572SCharlie Jenkins } else { 214a44fb572SCharlie Jenkins /* replace addi with nop if lower_immediate is empty */ 215a44fb572SCharlie Jenkins addi_insn = RISCV_INSN_NOP4; 216a44fb572SCharlie Jenkins } 217a44fb572SCharlie Jenkins 218a44fb572SCharlie Jenkins addi_res = cpu_to_le32(addi_insn); 219a44fb572SCharlie Jenkins lui_res = cpu_to_le32(lui_insn); 220a44fb572SCharlie Jenkins mutex_lock(&text_mutex); 221a44fb572SCharlie Jenkins patch_insn_write(addi_parcel, &addi_res, sizeof(addi_res)); 222a44fb572SCharlie Jenkins patch_insn_write(lui_parcel, &lui_res, sizeof(lui_res)); 223a44fb572SCharlie Jenkins mutex_unlock(&text_mutex); 224a44fb572SCharlie Jenkins } 225a44fb572SCharlie Jenkins 226a44fb572SCharlie Jenkins static inline void __runtime_fixup_ptr(void *where, unsigned long val) 227a44fb572SCharlie Jenkins { 228a44fb572SCharlie Jenkins #ifdef CONFIG_32BIT 229a44fb572SCharlie Jenkins __runtime_fixup_32(where, where + 4, val); 230a44fb572SCharlie Jenkins __runtime_fixup_caches(where, 2); 231a44fb572SCharlie Jenkins #else 232a44fb572SCharlie Jenkins __runtime_fixup_32(where, where + 8, val); 233a44fb572SCharlie Jenkins __runtime_fixup_32(where + 4, where + 12, val >> 32); 234a44fb572SCharlie Jenkins __runtime_fixup_caches(where, 4); 235a44fb572SCharlie Jenkins #endif 236a44fb572SCharlie Jenkins } 237a44fb572SCharlie Jenkins 238a44fb572SCharlie Jenkins /* 239a44fb572SCharlie Jenkins * Replace the least significant 5 bits of the srli/srliw immediate that is 240a44fb572SCharlie Jenkins * located at bits 20-24 241a44fb572SCharlie Jenkins */ 242a44fb572SCharlie Jenkins static inline void __runtime_fixup_shift(void *where, unsigned long val) 243a44fb572SCharlie Jenkins { 244a44fb572SCharlie Jenkins __le16 *parcel = where; 245a44fb572SCharlie Jenkins __le32 res; 246a44fb572SCharlie Jenkins u32 insn; 247a44fb572SCharlie Jenkins 248a44fb572SCharlie Jenkins insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16; 249a44fb572SCharlie Jenkins 250a44fb572SCharlie Jenkins insn &= 0xfe0fffff; 251a44fb572SCharlie Jenkins insn |= (val & 0b11111) << 20; 252a44fb572SCharlie Jenkins 253a44fb572SCharlie Jenkins res = cpu_to_le32(insn); 254a44fb572SCharlie Jenkins mutex_lock(&text_mutex); 255a44fb572SCharlie Jenkins patch_text_nosync(where, &res, sizeof(insn)); 256a44fb572SCharlie Jenkins mutex_unlock(&text_mutex); 257a44fb572SCharlie Jenkins } 258a44fb572SCharlie Jenkins 259a44fb572SCharlie Jenkins static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), 260a44fb572SCharlie Jenkins unsigned long val, s32 *start, s32 *end) 261a44fb572SCharlie Jenkins { 262a44fb572SCharlie Jenkins while (start < end) { 263a44fb572SCharlie Jenkins fn(*start + (void *)start, val); 264a44fb572SCharlie Jenkins start++; 265a44fb572SCharlie Jenkins } 266a44fb572SCharlie Jenkins } 267a44fb572SCharlie Jenkins 268a44fb572SCharlie Jenkins #endif /* _ASM_RISCV_RUNTIME_CONST_H */ 269