1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _ASM_RISCV_RUNTIME_CONST_H 3 #define _ASM_RISCV_RUNTIME_CONST_H 4 5 #ifdef MODULE 6 #error "Cannot use runtime-const infrastructure from modules" 7 #endif 8 9 #include <asm/asm.h> 10 #include <asm/alternative.h> 11 #include <asm/cacheflush.h> 12 #include <asm/insn-def.h> 13 #include <linux/memory.h> 14 #include <asm/text-patching.h> 15 16 #include <linux/uaccess.h> 17 18 #ifdef CONFIG_32BIT 19 #define runtime_const_ptr(sym) \ 20 ({ \ 21 typeof(sym) __ret; \ 22 asm_inline(".option push\n\t" \ 23 ".option norvc\n\t" \ 24 "1:\t" \ 25 "lui %[__ret],0x89abd\n\t" \ 26 "addi %[__ret],%[__ret],-0x211\n\t" \ 27 ".option pop\n\t" \ 28 ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ 29 ".long 1b - .\n\t" \ 30 ".popsection" \ 31 : [__ret] "=r" (__ret)); \ 32 __ret; \ 33 }) 34 #else 35 /* 36 * Loading 64-bit constants into a register from immediates is a non-trivial 37 * task on riscv64. To get it somewhat performant, load 32 bits into two 38 * different registers and then combine the results. 39 * 40 * If the processor supports the Zbkb extension, we can combine the final 41 * "slli,slli,srli,add" into the single "pack" instruction. If the processor 42 * doesn't support Zbkb but does support the Zbb extension, we can 43 * combine the final "slli,srli,add" into one instruction "add.uw". 44 */ 45 #define RISCV_RUNTIME_CONST_64_PREAMBLE \ 46 ".option push\n\t" \ 47 ".option norvc\n\t" \ 48 "1:\t" \ 49 "lui %[__ret],0x89abd\n\t" \ 50 "lui %[__tmp],0x1234\n\t" \ 51 "addiw %[__ret],%[__ret],-0x211\n\t" \ 52 "addiw %[__tmp],%[__tmp],0x567\n\t" \ 53 54 #define RISCV_RUNTIME_CONST_64_BASE \ 55 "slli %[__tmp],%[__tmp],32\n\t" \ 56 "slli %[__ret],%[__ret],32\n\t" \ 57 "srli %[__ret],%[__ret],32\n\t" \ 58 "add %[__ret],%[__ret],%[__tmp]\n\t" \ 59 60 #define RISCV_RUNTIME_CONST_64_ZBA \ 61 ".option push\n\t" \ 62 ".option arch,+zba\n\t" \ 63 ".option norvc\n\t" \ 64 "slli %[__tmp],%[__tmp],32\n\t" \ 65 "add.uw %[__ret],%[__ret],%[__tmp]\n\t" \ 66 "nop\n\t" \ 67 "nop\n\t" \ 68 ".option pop\n\t" \ 69 70 #define RISCV_RUNTIME_CONST_64_ZBKB \ 71 ".option push\n\t" \ 72 ".option arch,+zbkb\n\t" \ 73 ".option norvc\n\t" \ 74 "pack %[__ret],%[__ret],%[__tmp]\n\t" \ 75 "nop\n\t" \ 76 "nop\n\t" \ 77 "nop\n\t" \ 78 ".option pop\n\t" \ 79 80 #define RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 81 ".option pop\n\t" \ 82 ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ 83 ".long 1b - .\n\t" \ 84 ".popsection" \ 85 86 #if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA) \ 87 && defined(CONFIG_RISCV_ISA_ZBKB) 88 #define runtime_const_ptr(sym) \ 89 ({ \ 90 typeof(sym) __ret, __tmp; \ 91 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 92 ALTERNATIVE_2( \ 93 RISCV_RUNTIME_CONST_64_BASE, \ 94 RISCV_RUNTIME_CONST_64_ZBA, \ 95 0, RISCV_ISA_EXT_ZBA, 1, \ 96 RISCV_RUNTIME_CONST_64_ZBKB, \ 97 0, RISCV_ISA_EXT_ZBKB, 1 \ 98 ) \ 99 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 100 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 101 __ret; \ 102 }) 103 #elif defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_TOOLCHAIN_HAS_ZBA) 104 #define runtime_const_ptr(sym) \ 105 ({ \ 106 typeof(sym) __ret, __tmp; \ 107 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 108 ALTERNATIVE( \ 109 RISCV_RUNTIME_CONST_64_BASE, \ 110 RISCV_RUNTIME_CONST_64_ZBA, \ 111 0, RISCV_ISA_EXT_ZBA, 1 \ 112 ) \ 113 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 114 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 115 __ret; \ 116 }) 117 #elif defined(CONFIG_RISCV_ISA_ZBKB) 118 #define runtime_const_ptr(sym) \ 119 ({ \ 120 typeof(sym) __ret, __tmp; \ 121 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 122 ALTERNATIVE( \ 123 RISCV_RUNTIME_CONST_64_BASE, \ 124 RISCV_RUNTIME_CONST_64_ZBKB, \ 125 0, RISCV_ISA_EXT_ZBKB, 1 \ 126 ) \ 127 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 128 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 129 __ret; \ 130 }) 131 #else 132 #define runtime_const_ptr(sym) \ 133 ({ \ 134 typeof(sym) __ret, __tmp; \ 135 asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 136 RISCV_RUNTIME_CONST_64_BASE \ 137 RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 138 : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 139 __ret; \ 140 }) 141 #endif 142 #endif 143 144 #define runtime_const_shift_right_32(val, sym) \ 145 ({ \ 146 u32 __ret; \ 147 asm_inline(".option push\n\t" \ 148 ".option norvc\n\t" \ 149 "1:\t" \ 150 SRLI " %[__ret],%[__val],12\n\t" \ 151 ".option pop\n\t" \ 152 ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ 153 ".long 1b - .\n\t" \ 154 ".popsection" \ 155 : [__ret] "=r" (__ret) \ 156 : [__val] "r" (val)); \ 157 __ret; \ 158 }) 159 160 #define runtime_const_init(type, sym) do { \ 161 extern s32 __start_runtime_##type##_##sym[]; \ 162 extern s32 __stop_runtime_##type##_##sym[]; \ 163 \ 164 runtime_const_fixup(__runtime_fixup_##type, \ 165 (unsigned long)(sym), \ 166 __start_runtime_##type##_##sym, \ 167 __stop_runtime_##type##_##sym); \ 168 } while (0) 169 170 static inline void __runtime_fixup_caches(void *where, unsigned int insns) 171 { 172 /* On riscv there are currently only cache-wide flushes so va is ignored. */ 173 __always_unused uintptr_t va = (uintptr_t)where; 174 175 flush_icache_range(va, va + 4 * insns); 176 } 177 178 /* 179 * The 32-bit immediate is stored in a lui+addi pairing. 180 * lui holds the upper 20 bits of the immediate in the first 20 bits of the instruction. 181 * addi holds the lower 12 bits of the immediate in the first 12 bits of the instruction. 182 */ 183 static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, unsigned int val) 184 { 185 unsigned int lower_immediate, upper_immediate; 186 u32 lui_insn, addi_insn, addi_insn_mask; 187 __le32 lui_res, addi_res; 188 189 /* Mask out upper 12 bit of addi */ 190 addi_insn_mask = 0x000fffff; 191 192 lui_insn = (u32)le16_to_cpu(lui_parcel[0]) | (u32)le16_to_cpu(lui_parcel[1]) << 16; 193 addi_insn = (u32)le16_to_cpu(addi_parcel[0]) | (u32)le16_to_cpu(addi_parcel[1]) << 16; 194 195 lower_immediate = sign_extend32(val, 11); 196 upper_immediate = (val - lower_immediate); 197 198 if (upper_immediate & 0xfffff000) { 199 /* replace upper 20 bits of lui with upper immediate */ 200 lui_insn &= 0x00000fff; 201 lui_insn |= upper_immediate & 0xfffff000; 202 } else { 203 /* replace lui with nop if immediate is small enough to fit in addi */ 204 lui_insn = RISCV_INSN_NOP4; 205 /* 206 * lui is being skipped, so do a load instead of an add. A load 207 * is performed by adding with the x0 register. Setting rs to 208 * zero with the following mask will accomplish this goal. 209 */ 210 addi_insn_mask &= 0x07fff; 211 } 212 213 if (lower_immediate & 0x00000fff || lui_insn == RISCV_INSN_NOP4) { 214 /* replace upper 12 bits of addi with lower 12 bits of val */ 215 addi_insn &= addi_insn_mask; 216 addi_insn |= (lower_immediate & 0x00000fff) << 20; 217 } else { 218 /* replace addi with nop if lower_immediate is empty */ 219 addi_insn = RISCV_INSN_NOP4; 220 } 221 222 addi_res = cpu_to_le32(addi_insn); 223 lui_res = cpu_to_le32(lui_insn); 224 mutex_lock(&text_mutex); 225 patch_insn_write(addi_parcel, &addi_res, sizeof(addi_res)); 226 patch_insn_write(lui_parcel, &lui_res, sizeof(lui_res)); 227 mutex_unlock(&text_mutex); 228 } 229 230 static inline void __runtime_fixup_ptr(void *where, unsigned long val) 231 { 232 #ifdef CONFIG_32BIT 233 __runtime_fixup_32(where, where + 4, val); 234 __runtime_fixup_caches(where, 2); 235 #else 236 __runtime_fixup_32(where, where + 8, val); 237 __runtime_fixup_32(where + 4, where + 12, val >> 32); 238 __runtime_fixup_caches(where, 4); 239 #endif 240 } 241 242 /* 243 * Replace the least significant 5 bits of the srli/srliw immediate that is 244 * located at bits 20-24 245 */ 246 static inline void __runtime_fixup_shift(void *where, unsigned long val) 247 { 248 __le16 *parcel = where; 249 __le32 res; 250 u32 insn; 251 252 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16; 253 254 insn &= 0xfe0fffff; 255 insn |= (val & 0b11111) << 20; 256 257 res = cpu_to_le32(insn); 258 mutex_lock(&text_mutex); 259 patch_text_nosync(where, &res, sizeof(insn)); 260 mutex_unlock(&text_mutex); 261 } 262 263 static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), 264 unsigned long val, s32 *start, s32 *end) 265 { 266 while (start < end) { 267 fn(*start + (void *)start, val); 268 start++; 269 } 270 } 271 272 #endif /* _ASM_RISCV_RUNTIME_CONST_H */ 273