1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Kernel Probes Jump Optimization (Optprobes) 4 * 5 * Copyright (C) IBM Corporation, 2002, 2004 6 * Copyright (C) Hitachi Ltd., 2012 7 * Copyright (C) Huawei Inc., 2014 8 */ 9 10 #include <linux/kprobes.h> 11 #include <linux/jump_label.h> 12 #include <asm/kprobes.h> 13 #include <asm/cacheflush.h> 14 /* for arm_gen_branch */ 15 #include <asm/insn.h> 16 /* for patch_text */ 17 #include <asm/text-patching.h> 18 19 #include "core.h" 20 21 /* 22 * See register_usage_flags. If the probed instruction doesn't use PC, 23 * we can copy it into template and have it executed directly without 24 * simulation or emulation. 25 */ 26 #define ARM_REG_PC 15 27 #define can_kprobe_direct_exec(m) (!test_bit(ARM_REG_PC, &(m))) 28 29 /* 30 * NOTE: the first sub and add instruction will be modified according 31 * to the stack cost of the instruction. 32 */ 33 asm ( 34 ".global optprobe_template_entry\n" 35 "optprobe_template_entry:\n" 36 ".global optprobe_template_sub_sp\n" 37 "optprobe_template_sub_sp:" 38 " sub sp, sp, #0xff\n" 39 " stmia sp, {r0 - r14} \n" 40 ".global optprobe_template_add_sp\n" 41 "optprobe_template_add_sp:" 42 " add r3, sp, #0xff\n" 43 " str r3, [sp, #52]\n" 44 " mrs r4, cpsr\n" 45 " str r4, [sp, #64]\n" 46 " mov r1, sp\n" 47 " ldr r0, 1f\n" 48 " ldr r2, 2f\n" 49 /* 50 * AEABI requires an 8-bytes alignment stack. If 51 * SP % 8 != 0 (SP % 4 == 0 should be ensured), 52 * alloc more bytes here. 53 */ 54 " and r4, sp, #4\n" 55 " sub sp, sp, r4\n" 56 #if __LINUX_ARM_ARCH__ >= 5 57 " blx r2\n" 58 #else 59 " mov lr, pc\n" 60 " mov pc, r2\n" 61 #endif 62 " add sp, sp, r4\n" 63 " ldr r1, [sp, #64]\n" 64 " tst r1, #"__stringify(PSR_T_BIT)"\n" 65 " ldrne r2, [sp, #60]\n" 66 " orrne r2, #1\n" 67 " strne r2, [sp, #60] @ set bit0 of PC for thumb\n" 68 " msr cpsr_cxsf, r1\n" 69 ".global optprobe_template_restore_begin\n" 70 "optprobe_template_restore_begin:\n" 71 " ldmia sp, {r0 - r15}\n" 72 ".global optprobe_template_restore_orig_insn\n" 73 "optprobe_template_restore_orig_insn:\n" 74 " nop\n" 75 ".global optprobe_template_restore_end\n" 76 "optprobe_template_restore_end:\n" 77 " nop\n" 78 ".global optprobe_template_val\n" 79 "optprobe_template_val:\n" 80 "1: .long 0\n" 81 ".global optprobe_template_call\n" 82 "optprobe_template_call:\n" 83 "2: .long 0\n" 84 ".global optprobe_template_end\n" 85 "optprobe_template_end:\n"); 86 87 #define TMPL_VAL_IDX \ 88 ((unsigned long *)optprobe_template_val - (unsigned long *)optprobe_template_entry) 89 #define TMPL_CALL_IDX \ 90 ((unsigned long *)optprobe_template_call - (unsigned long *)optprobe_template_entry) 91 #define TMPL_END_IDX \ 92 ((unsigned long *)optprobe_template_end - (unsigned long *)optprobe_template_entry) 93 #define TMPL_ADD_SP \ 94 ((unsigned long *)optprobe_template_add_sp - (unsigned long *)optprobe_template_entry) 95 #define TMPL_SUB_SP \ 96 ((unsigned long *)optprobe_template_sub_sp - (unsigned long *)optprobe_template_entry) 97 #define TMPL_RESTORE_BEGIN \ 98 ((unsigned long *)optprobe_template_restore_begin - (unsigned long *)optprobe_template_entry) 99 #define TMPL_RESTORE_ORIGN_INSN \ 100 ((unsigned long *)optprobe_template_restore_orig_insn - (unsigned long *)optprobe_template_entry) 101 #define TMPL_RESTORE_END \ 102 ((unsigned long *)optprobe_template_restore_end - (unsigned long *)optprobe_template_entry) 103 104 /* 105 * ARM can always optimize an instruction when using ARM ISA, except 106 * instructions like 'str r0, [sp, r1]' which store to stack and unable 107 * to determine stack space consumption statically. 108 */ 109 int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) 110 { 111 return optinsn->insn != NULL; 112 } 113 114 /* 115 * In ARM ISA, kprobe opt always replace one instruction (4 bytes 116 * aligned and 4 bytes long). It is impossible to encounter another 117 * kprobe in the address range. So always return 0. 118 */ 119 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 120 { 121 return 0; 122 } 123 124 /* Caller must ensure addr & 3 == 0 */ 125 static int can_optimize(struct kprobe *kp) 126 { 127 if (kp->ainsn.stack_space < 0) 128 return 0; 129 /* 130 * 255 is the biggest imm can be used in 'sub r0, r0, #<imm>'. 131 * Number larger than 255 needs special encoding. 132 */ 133 if (kp->ainsn.stack_space > 255 - sizeof(struct pt_regs)) 134 return 0; 135 return 1; 136 } 137 138 /* Free optimized instruction slot */ 139 static void 140 __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 141 { 142 if (op->optinsn.insn) { 143 free_optinsn_slot(op->optinsn.insn, dirty); 144 op->optinsn.insn = NULL; 145 } 146 } 147 148 static void 149 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 150 { 151 unsigned long flags; 152 struct kprobe *p = &op->kp; 153 struct kprobe_ctlblk *kcb; 154 155 /* Save skipped registers */ 156 regs->ARM_pc = (unsigned long)op->kp.addr; 157 regs->ARM_ORIG_r0 = ~0UL; 158 159 local_irq_save(flags); 160 kcb = get_kprobe_ctlblk(); 161 162 if (kprobe_running()) { 163 kprobes_inc_nmissed_count(&op->kp); 164 } else { 165 __this_cpu_write(current_kprobe, &op->kp); 166 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 167 opt_pre_handler(&op->kp, regs); 168 __this_cpu_write(current_kprobe, NULL); 169 } 170 171 /* 172 * We singlestep the replaced instruction only when it can't be 173 * executed directly during restore. 174 */ 175 if (!p->ainsn.kprobe_direct_exec) 176 op->kp.ainsn.insn_singlestep(p->opcode, &p->ainsn, regs); 177 178 local_irq_restore(flags); 179 } 180 NOKPROBE_SYMBOL(optimized_callback) 181 182 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig) 183 { 184 kprobe_opcode_t *code; 185 unsigned long rel_chk; 186 unsigned long val; 187 unsigned long stack_protect = sizeof(struct pt_regs); 188 189 if (!can_optimize(orig)) 190 return -EILSEQ; 191 192 code = get_optinsn_slot(); 193 if (!code) 194 return -ENOMEM; 195 196 /* 197 * Verify if the address gap is in 32MiB range, because this uses 198 * a relative jump. 199 * 200 * kprobe opt use a 'b' instruction to branch to optinsn.insn. 201 * According to ARM manual, branch instruction is: 202 * 203 * 31 28 27 24 23 0 204 * +------+---+---+---+---+----------------+ 205 * | cond | 1 | 0 | 1 | 0 | imm24 | 206 * +------+---+---+---+---+----------------+ 207 * 208 * imm24 is a signed 24 bits integer. The real branch offset is computed 209 * by: imm32 = SignExtend(imm24:'00', 32); 210 * 211 * So the maximum forward branch should be: 212 * (0x007fffff << 2) = 0x01fffffc = 0x1fffffc 213 * The maximum backword branch should be: 214 * (0xff800000 << 2) = 0xfe000000 = -0x2000000 215 * 216 * We can simply check (rel & 0xfe000003): 217 * if rel is positive, (rel & 0xfe000000) shoule be 0 218 * if rel is negitive, (rel & 0xfe000000) should be 0xfe000000 219 * the last '3' is used for alignment checking. 220 */ 221 rel_chk = (unsigned long)((long)code - 222 (long)orig->addr + 8) & 0xfe000003; 223 224 if ((rel_chk != 0) && (rel_chk != 0xfe000000)) { 225 /* 226 * Different from x86, we free code buf directly instead of 227 * calling __arch_remove_optimized_kprobe() because 228 * we have not fill any field in op. 229 */ 230 free_optinsn_slot(code, 0); 231 return -ERANGE; 232 } 233 234 /* Copy arch-dep-instance from template. */ 235 memcpy(code, (unsigned long *)optprobe_template_entry, 236 TMPL_END_IDX * sizeof(kprobe_opcode_t)); 237 238 /* Adjust buffer according to instruction. */ 239 BUG_ON(orig->ainsn.stack_space < 0); 240 241 stack_protect += orig->ainsn.stack_space; 242 243 /* Should have been filtered by can_optimize(). */ 244 BUG_ON(stack_protect > 255); 245 246 /* Create a 'sub sp, sp, #<stack_protect>' */ 247 code[TMPL_SUB_SP] = __opcode_to_mem_arm(0xe24dd000 | stack_protect); 248 /* Create a 'add r3, sp, #<stack_protect>' */ 249 code[TMPL_ADD_SP] = __opcode_to_mem_arm(0xe28d3000 | stack_protect); 250 251 /* Set probe information */ 252 val = (unsigned long)op; 253 code[TMPL_VAL_IDX] = val; 254 255 /* Set probe function call */ 256 val = (unsigned long)optimized_callback; 257 code[TMPL_CALL_IDX] = val; 258 259 /* If possible, copy insn and have it executed during restore */ 260 orig->ainsn.kprobe_direct_exec = false; 261 if (can_kprobe_direct_exec(orig->ainsn.register_usage_flags)) { 262 kprobe_opcode_t final_branch = arm_gen_branch( 263 (unsigned long)(&code[TMPL_RESTORE_END]), 264 (unsigned long)(op->kp.addr) + 4); 265 if (final_branch != 0) { 266 /* 267 * Replace original 'ldmia sp, {r0 - r15}' with 268 * 'ldmia {r0 - r14}', restore all registers except pc. 269 */ 270 code[TMPL_RESTORE_BEGIN] = __opcode_to_mem_arm(0xe89d7fff); 271 272 /* The original probed instruction */ 273 code[TMPL_RESTORE_ORIGN_INSN] = __opcode_to_mem_arm(orig->opcode); 274 275 /* Jump back to next instruction */ 276 code[TMPL_RESTORE_END] = __opcode_to_mem_arm(final_branch); 277 orig->ainsn.kprobe_direct_exec = true; 278 } 279 } 280 281 flush_icache_range((unsigned long)code, 282 (unsigned long)(&code[TMPL_END_IDX])); 283 284 /* Set op->optinsn.insn means prepared. */ 285 op->optinsn.insn = code; 286 return 0; 287 } 288 289 void __kprobes arch_optimize_kprobes(struct list_head *oplist) 290 { 291 struct optimized_kprobe *op, *tmp; 292 293 list_for_each_entry_safe(op, tmp, oplist, list) { 294 unsigned long insn; 295 WARN_ON(kprobe_disabled(&op->kp)); 296 297 /* 298 * Backup instructions which will be replaced 299 * by jump address 300 */ 301 memcpy(op->optinsn.copied_insn, op->kp.addr, 302 RELATIVEJUMP_SIZE); 303 304 insn = arm_gen_branch((unsigned long)op->kp.addr, 305 (unsigned long)op->optinsn.insn); 306 BUG_ON(insn == 0); 307 308 /* 309 * Make it a conditional branch if replaced insn 310 * is consitional 311 */ 312 insn = (__mem_to_opcode_arm( 313 op->optinsn.copied_insn[0]) & 0xf0000000) | 314 (insn & 0x0fffffff); 315 316 /* 317 * Similar to __arch_disarm_kprobe, operations which 318 * removing breakpoints must be wrapped by stop_machine 319 * to avoid racing. 320 */ 321 kprobes_remove_breakpoint(op->kp.addr, insn); 322 323 list_del_init(&op->list); 324 } 325 } 326 327 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 328 { 329 arch_arm_kprobe(&op->kp); 330 } 331 332 /* 333 * Recover original instructions and breakpoints from relative jumps. 334 * Caller must call with locking kprobe_mutex. 335 */ 336 void arch_unoptimize_kprobes(struct list_head *oplist, 337 struct list_head *done_list) 338 { 339 struct optimized_kprobe *op, *tmp; 340 341 list_for_each_entry_safe(op, tmp, oplist, list) { 342 arch_unoptimize_kprobe(op); 343 list_move(&op->list, done_list); 344 } 345 } 346 347 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 348 kprobe_opcode_t *addr) 349 { 350 return (op->kp.addr <= addr && 351 op->kp.addr + (RELATIVEJUMP_SIZE / sizeof(kprobe_opcode_t)) > addr); 352 353 } 354 355 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 356 { 357 __arch_remove_optimized_kprobe(op, 1); 358 } 359