1*a44fb572SCharlie Jenkins /* SPDX-License-Identifier: GPL-2.0 */ 2*a44fb572SCharlie Jenkins #ifndef _ASM_RISCV_RUNTIME_CONST_H 3*a44fb572SCharlie Jenkins #define _ASM_RISCV_RUNTIME_CONST_H 4*a44fb572SCharlie Jenkins 5*a44fb572SCharlie Jenkins #include <asm/asm.h> 6*a44fb572SCharlie Jenkins #include <asm/alternative.h> 7*a44fb572SCharlie Jenkins #include <asm/cacheflush.h> 8*a44fb572SCharlie Jenkins #include <asm/insn-def.h> 9*a44fb572SCharlie Jenkins #include <linux/memory.h> 10*a44fb572SCharlie Jenkins #include <asm/text-patching.h> 11*a44fb572SCharlie Jenkins 12*a44fb572SCharlie Jenkins #include <linux/uaccess.h> 13*a44fb572SCharlie Jenkins 14*a44fb572SCharlie Jenkins #ifdef CONFIG_32BIT 15*a44fb572SCharlie Jenkins #define runtime_const_ptr(sym) \ 16*a44fb572SCharlie Jenkins ({ \ 17*a44fb572SCharlie Jenkins typeof(sym) __ret; \ 18*a44fb572SCharlie Jenkins asm_inline(".option push\n\t" \ 19*a44fb572SCharlie Jenkins ".option norvc\n\t" \ 20*a44fb572SCharlie Jenkins "1:\t" \ 21*a44fb572SCharlie Jenkins "lui %[__ret],0x89abd\n\t" \ 22*a44fb572SCharlie Jenkins "addi %[__ret],%[__ret],-0x211\n\t" \ 23*a44fb572SCharlie Jenkins ".option pop\n\t" \ 24*a44fb572SCharlie Jenkins ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ 25*a44fb572SCharlie Jenkins ".long 1b - .\n\t" \ 26*a44fb572SCharlie Jenkins ".popsection" \ 27*a44fb572SCharlie Jenkins : [__ret] "=r" (__ret)); \ 28*a44fb572SCharlie Jenkins __ret; \ 29*a44fb572SCharlie Jenkins }) 30*a44fb572SCharlie Jenkins #else 31*a44fb572SCharlie Jenkins /* 32*a44fb572SCharlie Jenkins * Loading 64-bit constants into a register from immediates is a non-trivial 33*a44fb572SCharlie Jenkins * task on riscv64. To get it somewhat performant, load 32 bits into two 34*a44fb572SCharlie Jenkins * different registers and then combine the results. 35*a44fb572SCharlie Jenkins * 36*a44fb572SCharlie Jenkins * If the processor supports the Zbkb extension, we can combine the final 37*a44fb572SCharlie Jenkins * "slli,slli,srli,add" into the single "pack" instruction. If the processor 38*a44fb572SCharlie Jenkins * doesn't support Zbkb but does support the Zbb extension, we can 39*a44fb572SCharlie Jenkins * combine the final "slli,srli,add" into one instruction "add.uw". 40*a44fb572SCharlie Jenkins */ 41*a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_PREAMBLE \ 42*a44fb572SCharlie Jenkins ".option push\n\t" \ 43*a44fb572SCharlie Jenkins ".option norvc\n\t" \ 44*a44fb572SCharlie Jenkins "1:\t" \ 45*a44fb572SCharlie Jenkins "lui %[__ret],0x89abd\n\t" \ 46*a44fb572SCharlie Jenkins "lui %[__tmp],0x1234\n\t" \ 47*a44fb572SCharlie Jenkins "addiw %[__ret],%[__ret],-0x211\n\t" \ 48*a44fb572SCharlie Jenkins "addiw %[__tmp],%[__tmp],0x567\n\t" \ 49*a44fb572SCharlie Jenkins 50*a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_BASE \ 51*a44fb572SCharlie Jenkins "slli %[__tmp],%[__tmp],32\n\t" \ 52*a44fb572SCharlie Jenkins "slli %[__ret],%[__ret],32\n\t" \ 53*a44fb572SCharlie Jenkins "srli %[__ret],%[__ret],32\n\t" \ 54*a44fb572SCharlie Jenkins "add %[__ret],%[__ret],%[__tmp]\n\t" \ 55*a44fb572SCharlie Jenkins 56*a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_ZBA \ 57*a44fb572SCharlie Jenkins ".option push\n\t" \ 58*a44fb572SCharlie Jenkins ".option arch,+zba\n\t" \ 59*a44fb572SCharlie Jenkins "slli %[__tmp],%[__tmp],32\n\t" \ 60*a44fb572SCharlie Jenkins "add.uw %[__ret],%[__ret],%[__tmp]\n\t" \ 61*a44fb572SCharlie Jenkins "nop\n\t" \ 62*a44fb572SCharlie Jenkins "nop\n\t" \ 63*a44fb572SCharlie Jenkins ".option pop\n\t" \ 64*a44fb572SCharlie Jenkins 65*a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_ZBKB \ 66*a44fb572SCharlie Jenkins ".option push\n\t" \ 67*a44fb572SCharlie Jenkins ".option arch,+zbkb\n\t" \ 68*a44fb572SCharlie Jenkins "pack %[__ret],%[__ret],%[__tmp]\n\t" \ 69*a44fb572SCharlie Jenkins "nop\n\t" \ 70*a44fb572SCharlie Jenkins "nop\n\t" \ 71*a44fb572SCharlie Jenkins "nop\n\t" \ 72*a44fb572SCharlie Jenkins ".option pop\n\t" \ 73*a44fb572SCharlie Jenkins 74*a44fb572SCharlie Jenkins #define RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 75*a44fb572SCharlie Jenkins ".option pop\n\t" \ 76*a44fb572SCharlie Jenkins ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ 77*a44fb572SCharlie Jenkins ".long 1b - .\n\t" \ 78*a44fb572SCharlie Jenkins ".popsection" \ 79*a44fb572SCharlie Jenkins 80*a44fb572SCharlie Jenkins #if defined(CONFIG_RISCV_ISA_ZBA) && defined(CONFIG_RISCV_ISA_ZBKB) 81*a44fb572SCharlie Jenkins #define runtime_const_ptr(sym) \ 82*a44fb572SCharlie Jenkins ({ \ 83*a44fb572SCharlie Jenkins typeof(sym) __ret, __tmp; \ 84*a44fb572SCharlie Jenkins asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 85*a44fb572SCharlie Jenkins ALTERNATIVE_2( \ 86*a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_BASE, \ 87*a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_ZBA, \ 88*a44fb572SCharlie Jenkins 0, RISCV_ISA_EXT_ZBA, 1, \ 89*a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_ZBKB, \ 90*a44fb572SCharlie Jenkins 0, RISCV_ISA_EXT_ZBKB, 1 \ 91*a44fb572SCharlie Jenkins ) \ 92*a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 93*a44fb572SCharlie Jenkins : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 94*a44fb572SCharlie Jenkins __ret; \ 95*a44fb572SCharlie Jenkins }) 96*a44fb572SCharlie Jenkins #elif defined(CONFIG_RISCV_ISA_ZBA) 97*a44fb572SCharlie Jenkins #define runtime_const_ptr(sym) \ 98*a44fb572SCharlie Jenkins ({ \ 99*a44fb572SCharlie Jenkins typeof(sym) __ret, __tmp; \ 100*a44fb572SCharlie Jenkins asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 101*a44fb572SCharlie Jenkins ALTERNATIVE( \ 102*a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_BASE, \ 103*a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_ZBA, \ 104*a44fb572SCharlie Jenkins 0, RISCV_ISA_EXT_ZBA, 1 \ 105*a44fb572SCharlie Jenkins ) \ 106*a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 107*a44fb572SCharlie Jenkins : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 108*a44fb572SCharlie Jenkins __ret; \ 109*a44fb572SCharlie Jenkins }) 110*a44fb572SCharlie Jenkins #elif defined(CONFIG_RISCV_ISA_ZBKB) 111*a44fb572SCharlie Jenkins #define runtime_const_ptr(sym) \ 112*a44fb572SCharlie Jenkins ({ \ 113*a44fb572SCharlie Jenkins typeof(sym) __ret, __tmp; \ 114*a44fb572SCharlie Jenkins asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 115*a44fb572SCharlie Jenkins ALTERNATIVE( \ 116*a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_BASE, \ 117*a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_ZBKB, \ 118*a44fb572SCharlie Jenkins 0, RISCV_ISA_EXT_ZBKB, 1 \ 119*a44fb572SCharlie Jenkins ) \ 120*a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 121*a44fb572SCharlie Jenkins : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 122*a44fb572SCharlie Jenkins __ret; \ 123*a44fb572SCharlie Jenkins }) 124*a44fb572SCharlie Jenkins #else 125*a44fb572SCharlie Jenkins #define runtime_const_ptr(sym) \ 126*a44fb572SCharlie Jenkins ({ \ 127*a44fb572SCharlie Jenkins typeof(sym) __ret, __tmp; \ 128*a44fb572SCharlie Jenkins asm_inline(RISCV_RUNTIME_CONST_64_PREAMBLE \ 129*a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_BASE \ 130*a44fb572SCharlie Jenkins RISCV_RUNTIME_CONST_64_POSTAMBLE(sym) \ 131*a44fb572SCharlie Jenkins : [__ret] "=r" (__ret), [__tmp] "=r" (__tmp)); \ 132*a44fb572SCharlie Jenkins __ret; \ 133*a44fb572SCharlie Jenkins }) 134*a44fb572SCharlie Jenkins #endif 135*a44fb572SCharlie Jenkins #endif 136*a44fb572SCharlie Jenkins 137*a44fb572SCharlie Jenkins #define runtime_const_shift_right_32(val, sym) \ 138*a44fb572SCharlie Jenkins ({ \ 139*a44fb572SCharlie Jenkins u32 __ret; \ 140*a44fb572SCharlie Jenkins asm_inline(".option push\n\t" \ 141*a44fb572SCharlie Jenkins ".option norvc\n\t" \ 142*a44fb572SCharlie Jenkins "1:\t" \ 143*a44fb572SCharlie Jenkins SRLI " %[__ret],%[__val],12\n\t" \ 144*a44fb572SCharlie Jenkins ".option pop\n\t" \ 145*a44fb572SCharlie Jenkins ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ 146*a44fb572SCharlie Jenkins ".long 1b - .\n\t" \ 147*a44fb572SCharlie Jenkins ".popsection" \ 148*a44fb572SCharlie Jenkins : [__ret] "=r" (__ret) \ 149*a44fb572SCharlie Jenkins : [__val] "r" (val)); \ 150*a44fb572SCharlie Jenkins __ret; \ 151*a44fb572SCharlie Jenkins }) 152*a44fb572SCharlie Jenkins 153*a44fb572SCharlie Jenkins #define runtime_const_init(type, sym) do { \ 154*a44fb572SCharlie Jenkins extern s32 __start_runtime_##type##_##sym[]; \ 155*a44fb572SCharlie Jenkins extern s32 __stop_runtime_##type##_##sym[]; \ 156*a44fb572SCharlie Jenkins \ 157*a44fb572SCharlie Jenkins runtime_const_fixup(__runtime_fixup_##type, \ 158*a44fb572SCharlie Jenkins (unsigned long)(sym), \ 159*a44fb572SCharlie Jenkins __start_runtime_##type##_##sym, \ 160*a44fb572SCharlie Jenkins __stop_runtime_##type##_##sym); \ 161*a44fb572SCharlie Jenkins } while (0) 162*a44fb572SCharlie Jenkins 163*a44fb572SCharlie Jenkins static inline void __runtime_fixup_caches(void *where, unsigned int insns) 164*a44fb572SCharlie Jenkins { 165*a44fb572SCharlie Jenkins /* On riscv there are currently only cache-wide flushes so va is ignored. */ 166*a44fb572SCharlie Jenkins __always_unused uintptr_t va = (uintptr_t)where; 167*a44fb572SCharlie Jenkins 168*a44fb572SCharlie Jenkins flush_icache_range(va, va + 4 * insns); 169*a44fb572SCharlie Jenkins } 170*a44fb572SCharlie Jenkins 171*a44fb572SCharlie Jenkins /* 172*a44fb572SCharlie Jenkins * The 32-bit immediate is stored in a lui+addi pairing. 173*a44fb572SCharlie Jenkins * lui holds the upper 20 bits of the immediate in the first 20 bits of the instruction. 174*a44fb572SCharlie Jenkins * addi holds the lower 12 bits of the immediate in the first 12 bits of the instruction. 175*a44fb572SCharlie Jenkins */ 176*a44fb572SCharlie Jenkins static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, unsigned int val) 177*a44fb572SCharlie Jenkins { 178*a44fb572SCharlie Jenkins unsigned int lower_immediate, upper_immediate; 179*a44fb572SCharlie Jenkins u32 lui_insn, addi_insn, addi_insn_mask; 180*a44fb572SCharlie Jenkins __le32 lui_res, addi_res; 181*a44fb572SCharlie Jenkins 182*a44fb572SCharlie Jenkins /* Mask out upper 12 bit of addi */ 183*a44fb572SCharlie Jenkins addi_insn_mask = 0x000fffff; 184*a44fb572SCharlie Jenkins 185*a44fb572SCharlie Jenkins lui_insn = (u32)le16_to_cpu(lui_parcel[0]) | (u32)le16_to_cpu(lui_parcel[1]) << 16; 186*a44fb572SCharlie Jenkins addi_insn = (u32)le16_to_cpu(addi_parcel[0]) | (u32)le16_to_cpu(addi_parcel[1]) << 16; 187*a44fb572SCharlie Jenkins 188*a44fb572SCharlie Jenkins lower_immediate = sign_extend32(val, 11); 189*a44fb572SCharlie Jenkins upper_immediate = (val - lower_immediate); 190*a44fb572SCharlie Jenkins 191*a44fb572SCharlie Jenkins if (upper_immediate & 0xfffff000) { 192*a44fb572SCharlie Jenkins /* replace upper 20 bits of lui with upper immediate */ 193*a44fb572SCharlie Jenkins lui_insn &= 0x00000fff; 194*a44fb572SCharlie Jenkins lui_insn |= upper_immediate & 0xfffff000; 195*a44fb572SCharlie Jenkins } else { 196*a44fb572SCharlie Jenkins /* replace lui with nop if immediate is small enough to fit in addi */ 197*a44fb572SCharlie Jenkins lui_insn = RISCV_INSN_NOP4; 198*a44fb572SCharlie Jenkins /* 199*a44fb572SCharlie Jenkins * lui is being skipped, so do a load instead of an add. A load 200*a44fb572SCharlie Jenkins * is performed by adding with the x0 register. Setting rs to 201*a44fb572SCharlie Jenkins * zero with the following mask will accomplish this goal. 202*a44fb572SCharlie Jenkins */ 203*a44fb572SCharlie Jenkins addi_insn_mask &= 0x07fff; 204*a44fb572SCharlie Jenkins } 205*a44fb572SCharlie Jenkins 206*a44fb572SCharlie Jenkins if (lower_immediate & 0x00000fff) { 207*a44fb572SCharlie Jenkins /* replace upper 12 bits of addi with lower 12 bits of val */ 208*a44fb572SCharlie Jenkins addi_insn &= addi_insn_mask; 209*a44fb572SCharlie Jenkins addi_insn |= (lower_immediate & 0x00000fff) << 20; 210*a44fb572SCharlie Jenkins } else { 211*a44fb572SCharlie Jenkins /* replace addi with nop if lower_immediate is empty */ 212*a44fb572SCharlie Jenkins addi_insn = RISCV_INSN_NOP4; 213*a44fb572SCharlie Jenkins } 214*a44fb572SCharlie Jenkins 215*a44fb572SCharlie Jenkins addi_res = cpu_to_le32(addi_insn); 216*a44fb572SCharlie Jenkins lui_res = cpu_to_le32(lui_insn); 217*a44fb572SCharlie Jenkins mutex_lock(&text_mutex); 218*a44fb572SCharlie Jenkins patch_insn_write(addi_parcel, &addi_res, sizeof(addi_res)); 219*a44fb572SCharlie Jenkins patch_insn_write(lui_parcel, &lui_res, sizeof(lui_res)); 220*a44fb572SCharlie Jenkins mutex_unlock(&text_mutex); 221*a44fb572SCharlie Jenkins } 222*a44fb572SCharlie Jenkins 223*a44fb572SCharlie Jenkins static inline void __runtime_fixup_ptr(void *where, unsigned long val) 224*a44fb572SCharlie Jenkins { 225*a44fb572SCharlie Jenkins #ifdef CONFIG_32BIT 226*a44fb572SCharlie Jenkins __runtime_fixup_32(where, where + 4, val); 227*a44fb572SCharlie Jenkins __runtime_fixup_caches(where, 2); 228*a44fb572SCharlie Jenkins #else 229*a44fb572SCharlie Jenkins __runtime_fixup_32(where, where + 8, val); 230*a44fb572SCharlie Jenkins __runtime_fixup_32(where + 4, where + 12, val >> 32); 231*a44fb572SCharlie Jenkins __runtime_fixup_caches(where, 4); 232*a44fb572SCharlie Jenkins #endif 233*a44fb572SCharlie Jenkins } 234*a44fb572SCharlie Jenkins 235*a44fb572SCharlie Jenkins /* 236*a44fb572SCharlie Jenkins * Replace the least significant 5 bits of the srli/srliw immediate that is 237*a44fb572SCharlie Jenkins * located at bits 20-24 238*a44fb572SCharlie Jenkins */ 239*a44fb572SCharlie Jenkins static inline void __runtime_fixup_shift(void *where, unsigned long val) 240*a44fb572SCharlie Jenkins { 241*a44fb572SCharlie Jenkins __le16 *parcel = where; 242*a44fb572SCharlie Jenkins __le32 res; 243*a44fb572SCharlie Jenkins u32 insn; 244*a44fb572SCharlie Jenkins 245*a44fb572SCharlie Jenkins insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16; 246*a44fb572SCharlie Jenkins 247*a44fb572SCharlie Jenkins insn &= 0xfe0fffff; 248*a44fb572SCharlie Jenkins insn |= (val & 0b11111) << 20; 249*a44fb572SCharlie Jenkins 250*a44fb572SCharlie Jenkins res = cpu_to_le32(insn); 251*a44fb572SCharlie Jenkins mutex_lock(&text_mutex); 252*a44fb572SCharlie Jenkins patch_text_nosync(where, &res, sizeof(insn)); 253*a44fb572SCharlie Jenkins mutex_unlock(&text_mutex); 254*a44fb572SCharlie Jenkins } 255*a44fb572SCharlie Jenkins 256*a44fb572SCharlie Jenkins static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), 257*a44fb572SCharlie Jenkins unsigned long val, s32 *start, s32 *end) 258*a44fb572SCharlie Jenkins { 259*a44fb572SCharlie Jenkins while (start < end) { 260*a44fb572SCharlie Jenkins fn(*start + (void *)start, val); 261*a44fb572SCharlie Jenkins start++; 262*a44fb572SCharlie Jenkins } 263*a44fb572SCharlie Jenkins } 264*a44fb572SCharlie Jenkins 265*a44fb572SCharlie Jenkins #endif /* _ASM_RISCV_RUNTIME_CONST_H */ 266