1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _ASM_RISCV_RUNTIME_CONST_H 3 #define _ASM_RISCV_RUNTIME_CONST_H 4 5 #include <asm/asm.h> 6 #include <asm/alternative.h> 7 #include <asm/cacheflush.h> 8 #include <asm/insn-def.h> 9 #include <linux/memory.h> 10 #include <asm/text-patching.h> 11 12 #include <linux/uaccess.h> 13 14 #ifdef CONFIG_32BIT 15 #define runtime_const_ptr(sym) \ 16 ({ \ 17 typeof(sym) __ret; \ 18 asm_inline(".option push\n\t" \ 19 ".option norvc\n\t" \ 20 "1:\t" \ 21 "lui %[__ret],0x89abd\n\t" \ 22 "addi %[__ret],%[__ret],-0x211\n\t" \ 23 ".option pop\n\t" \ 24 ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ 25 ".long 1b - .\n\t" \ 26 ".popsection" \ 27 : [__ret] "=r" (__ret)); \ 28 __ret; \ 29 }) 30 #else 31 /* 32 * Loading 64-bit constants into a register from immediates is a non-trivial 33 * task on riscv64. To get it somewhat performant, load 32 bits into two 34 * different registers and then combine the results. 35 * 36 * If the processor supports the Zbkb extension, we can combine the final 37 * "slli,slli,srli,add" into the single "pack" instruction. If the processor 38 * doesn't support Zbkb but does support the Zbb extension, we can 39 * combine the final "slli,srli,add" into one instruction "add.uw". 40 */ 41 #define RISCV_RUNTIME_CONST_64_PREAMBLE \ 42 ".option push\n\t" \ 43 ".option norvc\n\t" \ 44 "1:\t" \ 45 "lui %[__ret],0x89abd\n\t" \ 46 "lui %[__tmp],0x1234\n\t" \ 47 "addiw %[__ret],%[__ret],-0x211\n\t" \ 48 "addiw %[__tmp],%[__tmp],0x567\n\t" \ 49 50 #define RISCV_RUNTIME_CONST_64_BASE \ 51 "slli %[__tmp],%[__tmp],32\n\t" \ 52 "slli %[__ret],%[__ret],32\n\t" \ 53 "srli %[__ret],%[__ret],32\n\t" \ 54 "add %[__ret],%[__ret],%[__tmp]\n\t" \ 55 56 #define RISCV_RUNTIME_CONST_64_ZBA \ 57 ".option push\n\t" \ 58 ".option arch,+zba\n\t" \ 59 ".option norvc\n\t" \ 60 "slli %[__tmp],%[__tmp],32\n\t" \ 61 "add.uw %[__ret],%[__ret],%[__tmp]\n\t" \ 62 "nop\n\t" \ 63 "nop\n\t" \ 64 ".option pop\n\t" \ 65 66 #define RISCV_RUNTIME_CONST_64_ZBKB \ 67 ".option push\n\t" \ 68 ".option arch,+zbkb\n\t" \ 69 ".option norvc\n\t" \ 70 "pack %[__ret],%[__ret],%[__tmp]\n\t" \ 71 "nop\n\t" \ 72 "nop\n\t" \ 73 "nop\n\t" \ 74 ".option pop\n\t" \ 75 76 #define RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 77 ".option pop\n\t" \ 78 ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ 79 ".long 1b - .\n\t" \ 80 ".popsection" \ 81 82 #if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA) \ 83 && defined(CONFIG_RISCV_ISA_ZBKB) 84 #define runtime_const_ptr(sym) \ 85 ({ \ 86 typeof(sym) __ret, __tmp; \ 87 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 88 ALTERNATIVE_2( \ 89 RISCV_RUNTIME_CONST_64_BASE, \ 90 RISCV_RUNTIME_CONST_64_ZBA, \ 91 0, RISCV_ISA_EXT_ZBA, 1, \ 92 RISCV_RUNTIME_CONST_64_ZBKB, \ 93 0, RISCV_ISA_EXT_ZBKB, 1 \ 94 ) \ 95 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 96 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 97 __ret; \ 98 }) 99 #elif defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA) 100 #define runtime_const_ptr(sym) \ 101 ({ \ 102 typeof(sym) __ret, __tmp; \ 103 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 104 ALTERNATIVE( \ 105 RISCV_RUNTIME_CONST_64_BASE, \ 106 RISCV_RUNTIME_CONST_64_ZBA, \ 107 0, RISCV_ISA_EXT_ZBA, 1 \ 108 ) \ 109 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 110 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 111 __ret; \ 112 }) 113 #elif defined(CONFIG_RISCV_ISA_ZBKB) 114 #define runtime_const_ptr(sym) \ 115 ({ \ 116 typeof(sym) __ret, __tmp; \ 117 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 118 ALTERNATIVE( \ 119 RISCV_RUNTIME_CONST_64_BASE, \ 120 RISCV_RUNTIME_CONST_64_ZBKB, \ 121 0, RISCV_ISA_EXT_ZBKB, 1 \ 122 ) \ 123 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 124 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 125 __ret; \ 126 }) 127 #else 128 #define runtime_const_ptr(sym) \ 129 ({ \ 130 typeof(sym) __ret, __tmp; \ 131 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 132 RISCV_RUNTIME_CONST_64_BASE \ 133 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 134 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 135 __ret; \ 136 }) 137 #endif 138 #endif 139 140 #define runtime_const_shift_right_32(val, sym) \ 141 ({ \ 142 u32 __ret; \ 143 asm_inline(".option push\n\t" \ 144 ".option norvc\n\t" \ 145 "1:\t" \ 146 SRLI " %[__ret],%[__val],12\n\t" \ 147 ".option pop\n\t" \ 148 ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ 149 ".long 1b - .\n\t" \ 150 ".popsection" \ 151 : [__ret] "=r" (__ret) \ 152 : [__val] "r" (val)); \ 153 __ret; \ 154 }) 155 156 #define runtime_const_init(type, sym) do { \ 157 extern s32 __start_runtime_##type##_##sym[]; \ 158 extern s32 __stop_runtime_##type##_##sym[]; \ 159 \ 160 runtime_const_fixup(__runtime_fixup_##type, \ 161 (unsigned long)(sym), \ 162 __start_runtime_##type##_##sym, \ 163 __stop_runtime_##type##_##sym); \ 164 } while (0) 165 166 static inline void __runtime_fixup_caches(void *where, unsigned int insns) 167 { 168 /* On riscv there are currently only cache-wide flushes so va is ignored. */ 169 __always_unused uintptr_t va = (uintptr_t)where; 170 171 flush_icache_range(va, va + 4 * insns); 172 } 173 174 /* 175 * The 32-bit immediate is stored in a lui+addi pairing. 176 * lui holds the upper 20 bits of the immediate in the first 20 bits of the instruction. 177 * addi holds the lower 12 bits of the immediate in the first 12 bits of the instruction. 178 */ 179 static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, unsigned int val) 180 { 181 unsigned int lower_immediate, upper_immediate; 182 u32 lui_insn, addi_insn, addi_insn_mask; 183 __le32 lui_res, addi_res; 184 185 /* Mask out upper 12 bit of addi */ 186 addi_insn_mask = 0x000fffff; 187 188 lui_insn = (u32)le16_to_cpu(lui_parcel[0]) | (u32)le16_to_cpu(lui_parcel[1]) << 16; 189 addi_insn = (u32)le16_to_cpu(addi_parcel[0]) | (u32)le16_to_cpu(addi_parcel[1]) << 16; 190 191 lower_immediate = sign_extend32(val, 11); 192 upper_immediate = (val - lower_immediate); 193 194 if (upper_immediate & 0xfffff000) { 195 /* replace upper 20 bits of lui with upper immediate */ 196 lui_insn &= 0x00000fff; 197 lui_insn |= upper_immediate & 0xfffff000; 198 } else { 199 /* replace lui with nop if immediate is small enough to fit in addi */ 200 lui_insn = RISCV_INSN_NOP4; 201 /* 202 * lui is being skipped, so do a load instead of an add. A load 203 * is performed by adding with the x0 register. Setting rs to 204 * zero with the following mask will accomplish this goal. 205 */ 206 addi_insn_mask &= 0x07fff; 207 } 208 209 if (lower_immediate & 0x00000fff || lui_insn == RISCV_INSN_NOP4) { 210 /* replace upper 12 bits of addi with lower 12 bits of val */ 211 addi_insn &= addi_insn_mask; 212 addi_insn |= (lower_immediate & 0x00000fff) << 20; 213 } else { 214 /* replace addi with nop if lower_immediate is empty */ 215 addi_insn = RISCV_INSN_NOP4; 216 } 217 218 addi_res = cpu_to_le32(addi_insn); 219 lui_res = cpu_to_le32(lui_insn); 220 mutex_lock(&text_mutex); 221 patch_insn_write(addi_parcel, &addi_res, sizeof(addi_res)); 222 patch_insn_write(lui_parcel, &lui_res, sizeof(lui_res)); 223 mutex_unlock(&text_mutex); 224 } 225 226 static inline void __runtime_fixup_ptr(void *where, unsigned long val) 227 { 228 #ifdef CONFIG_32BIT 229 __runtime_fixup_32(where, where + 4, val); 230 __runtime_fixup_caches(where, 2); 231 #else 232 __runtime_fixup_32(where, where + 8, val); 233 __runtime_fixup_32(where + 4, where + 12, val >> 32); 234 __runtime_fixup_caches(where, 4); 235 #endif 236 } 237 238 /* 239 * Replace the least significant 5 bits of the srli/srliw immediate that is 240 * located at bits 20-24 241 */ 242 static inline void __runtime_fixup_shift(void *where, unsigned long val) 243 { 244 __le16 *parcel = where; 245 __le32 res; 246 u32 insn; 247 248 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16; 249 250 insn &= 0xfe0fffff; 251 insn |= (val & 0b11111) << 20; 252 253 res = cpu_to_le32(insn); 254 mutex_lock(&text_mutex); 255 patch_text_nosync(where, &res, sizeof(insn)); 256 mutex_unlock(&text_mutex); 257 } 258 259 static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), 260 unsigned long val, s32 *start, s32 *end) 261 { 262 while (start < end) { 263 fn(*start + (void *)start, val); 264 start++; 265 } 266 } 267 268 #endif /* _ASM_RISCV_RUNTIME_CONST_H */ 269