1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Kernel Probes Jump Optimization (Optprobes) 4 * 5 * Copyright (C) IBM Corporation, 2002, 2004 6 * Copyright (C) Hitachi Ltd., 2012 7 */ 8 #include <linux/kprobes.h> 9 #include <linux/ptrace.h> 10 #include <linux/string.h> 11 #include <linux/slab.h> 12 #include <linux/hardirq.h> 13 #include <linux/preempt.h> 14 #include <linux/extable.h> 15 #include <linux/kdebug.h> 16 #include <linux/kallsyms.h> 17 #include <linux/ftrace.h> 18 #include <linux/frame.h> 19 20 #include <asm/text-patching.h> 21 #include <asm/cacheflush.h> 22 #include <asm/desc.h> 23 #include <asm/pgtable.h> 24 #include <linux/uaccess.h> 25 #include <asm/alternative.h> 26 #include <asm/insn.h> 27 #include <asm/debugreg.h> 28 #include <asm/set_memory.h> 29 #include <asm/sections.h> 30 #include <asm/nospec-branch.h> 31 32 #include "common.h" 33 34 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) 35 { 36 struct optimized_kprobe *op; 37 struct kprobe *kp; 38 long offs; 39 int i; 40 41 for (i = 0; i < RELATIVEJUMP_SIZE; i++) { 42 kp = get_kprobe((void *)addr - i); 43 /* This function only handles jump-optimized kprobe */ 44 if (kp && kprobe_optimized(kp)) { 45 op = container_of(kp, struct optimized_kprobe, kp); 46 /* If op->list is not empty, op is under optimizing */ 47 if (list_empty(&op->list)) 48 goto found; 49 } 50 } 51 52 return addr; 53 found: 54 /* 55 * If the kprobe can be optimized, original bytes which can be 56 * overwritten by jump destination address. In this case, original 57 * bytes must be recovered from op->optinsn.copied_insn buffer. 58 */ 59 if (probe_kernel_read(buf, (void *)addr, 60 MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 61 return 0UL; 62 63 if (addr == (unsigned long)kp->addr) { 64 buf[0] = kp->opcode; 65 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 66 } else { 67 offs = addr - (unsigned long)kp->addr - 1; 68 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); 69 } 70 71 return (unsigned long)buf; 72 } 73 74 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ 75 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) 76 { 77 #ifdef CONFIG_X86_64 78 *addr++ = 0x48; 79 *addr++ = 0xbf; 80 #else 81 *addr++ = 0xb8; 82 #endif 83 *(unsigned long *)addr = val; 84 } 85 86 asm ( 87 ".pushsection .rodata\n" 88 "optprobe_template_func:\n" 89 ".global optprobe_template_entry\n" 90 "optprobe_template_entry:\n" 91 #ifdef CONFIG_X86_64 92 /* We don't bother saving the ss register */ 93 " pushq %rsp\n" 94 " pushfq\n" 95 SAVE_REGS_STRING 96 " movq %rsp, %rsi\n" 97 ".global optprobe_template_val\n" 98 "optprobe_template_val:\n" 99 ASM_NOP5 100 ASM_NOP5 101 ".global optprobe_template_call\n" 102 "optprobe_template_call:\n" 103 ASM_NOP5 104 /* Move flags to rsp */ 105 " movq 18*8(%rsp), %rdx\n" 106 " movq %rdx, 19*8(%rsp)\n" 107 RESTORE_REGS_STRING 108 /* Skip flags entry */ 109 " addq $8, %rsp\n" 110 " popfq\n" 111 #else /* CONFIG_X86_32 */ 112 " pushl %esp\n" 113 " pushfl\n" 114 SAVE_REGS_STRING 115 " movl %esp, %edx\n" 116 ".global optprobe_template_val\n" 117 "optprobe_template_val:\n" 118 ASM_NOP5 119 ".global optprobe_template_call\n" 120 "optprobe_template_call:\n" 121 ASM_NOP5 122 /* Move flags into esp */ 123 " movl 14*4(%esp), %edx\n" 124 " movl %edx, 15*4(%esp)\n" 125 RESTORE_REGS_STRING 126 /* Skip flags entry */ 127 " addl $4, %esp\n" 128 " popfl\n" 129 #endif 130 ".global optprobe_template_end\n" 131 "optprobe_template_end:\n" 132 ".popsection\n"); 133 134 void optprobe_template_func(void); 135 STACK_FRAME_NON_STANDARD(optprobe_template_func); 136 137 #define TMPL_MOVE_IDX \ 138 ((long)optprobe_template_val - (long)optprobe_template_entry) 139 #define TMPL_CALL_IDX \ 140 ((long)optprobe_template_call - (long)optprobe_template_entry) 141 #define TMPL_END_IDX \ 142 ((long)optprobe_template_end - (long)optprobe_template_entry) 143 144 #define INT3_SIZE sizeof(kprobe_opcode_t) 145 146 /* Optimized kprobe call back function: called from optinsn */ 147 static void 148 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 149 { 150 /* This is possible if op is under delayed unoptimizing */ 151 if (kprobe_disabled(&op->kp)) 152 return; 153 154 preempt_disable(); 155 if (kprobe_running()) { 156 kprobes_inc_nmissed_count(&op->kp); 157 } else { 158 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 159 /* Save skipped registers */ 160 regs->cs = __KERNEL_CS; 161 #ifdef CONFIG_X86_32 162 regs->cs |= get_kernel_rpl(); 163 regs->gs = 0; 164 #endif 165 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; 166 regs->orig_ax = ~0UL; 167 168 __this_cpu_write(current_kprobe, &op->kp); 169 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 170 opt_pre_handler(&op->kp, regs); 171 __this_cpu_write(current_kprobe, NULL); 172 } 173 preempt_enable(); 174 } 175 NOKPROBE_SYMBOL(optimized_callback); 176 177 static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) 178 { 179 struct insn insn; 180 int len = 0, ret; 181 182 while (len < RELATIVEJUMP_SIZE) { 183 ret = __copy_instruction(dest + len, src + len, real + len, &insn); 184 if (!ret || !can_boost(&insn, src + len)) 185 return -EINVAL; 186 len += ret; 187 } 188 /* Check whether the address range is reserved */ 189 if (ftrace_text_reserved(src, src + len - 1) || 190 alternatives_text_reserved(src, src + len - 1) || 191 jump_label_text_reserved(src, src + len - 1)) 192 return -EBUSY; 193 194 return len; 195 } 196 197 /* Check whether insn is indirect jump */ 198 static int __insn_is_indirect_jump(struct insn *insn) 199 { 200 return ((insn->opcode.bytes[0] == 0xff && 201 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ 202 insn->opcode.bytes[0] == 0xea); /* Segment based jump */ 203 } 204 205 /* Check whether insn jumps into specified address range */ 206 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) 207 { 208 unsigned long target = 0; 209 210 switch (insn->opcode.bytes[0]) { 211 case 0xe0: /* loopne */ 212 case 0xe1: /* loope */ 213 case 0xe2: /* loop */ 214 case 0xe3: /* jcxz */ 215 case 0xe9: /* near relative jump */ 216 case 0xeb: /* short relative jump */ 217 break; 218 case 0x0f: 219 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ 220 break; 221 return 0; 222 default: 223 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ 224 break; 225 return 0; 226 } 227 target = (unsigned long)insn->next_byte + insn->immediate.value; 228 229 return (start <= target && target <= start + len); 230 } 231 232 static int insn_is_indirect_jump(struct insn *insn) 233 { 234 int ret = __insn_is_indirect_jump(insn); 235 236 #ifdef CONFIG_RETPOLINE 237 /* 238 * Jump to x86_indirect_thunk_* is treated as an indirect jump. 239 * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with 240 * older gcc may use indirect jump. So we add this check instead of 241 * replace indirect-jump check. 242 */ 243 if (!ret) 244 ret = insn_jump_into_range(insn, 245 (unsigned long)__indirect_thunk_start, 246 (unsigned long)__indirect_thunk_end - 247 (unsigned long)__indirect_thunk_start); 248 #endif 249 return ret; 250 } 251 252 /* Decode whole function to ensure any instructions don't jump into target */ 253 static int can_optimize(unsigned long paddr) 254 { 255 unsigned long addr, size = 0, offset = 0; 256 struct insn insn; 257 kprobe_opcode_t buf[MAX_INSN_SIZE]; 258 259 /* Lookup symbol including addr */ 260 if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) 261 return 0; 262 263 /* 264 * Do not optimize in the entry code due to the unstable 265 * stack handling and registers setup. 266 */ 267 if (((paddr >= (unsigned long)__entry_text_start) && 268 (paddr < (unsigned long)__entry_text_end)) || 269 ((paddr >= (unsigned long)__irqentry_text_start) && 270 (paddr < (unsigned long)__irqentry_text_end))) 271 return 0; 272 273 /* Check there is enough space for a relative jump. */ 274 if (size - offset < RELATIVEJUMP_SIZE) 275 return 0; 276 277 /* Decode instructions */ 278 addr = paddr - offset; 279 while (addr < paddr - offset + size) { /* Decode until function end */ 280 unsigned long recovered_insn; 281 if (search_exception_tables(addr)) 282 /* 283 * Since some fixup code will jumps into this function, 284 * we can't optimize kprobe in this function. 285 */ 286 return 0; 287 recovered_insn = recover_probed_instruction(buf, addr); 288 if (!recovered_insn) 289 return 0; 290 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); 291 insn_get_length(&insn); 292 /* Another subsystem puts a breakpoint */ 293 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 294 return 0; 295 /* Recover address */ 296 insn.kaddr = (void *)addr; 297 insn.next_byte = (void *)(addr + insn.length); 298 /* Check any instructions don't jump into target */ 299 if (insn_is_indirect_jump(&insn) || 300 insn_jump_into_range(&insn, paddr + INT3_SIZE, 301 RELATIVE_ADDR_SIZE)) 302 return 0; 303 addr += insn.length; 304 } 305 306 return 1; 307 } 308 309 /* Check optimized_kprobe can actually be optimized. */ 310 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 311 { 312 int i; 313 struct kprobe *p; 314 315 for (i = 1; i < op->optinsn.size; i++) { 316 p = get_kprobe(op->kp.addr + i); 317 if (p && !kprobe_disabled(p)) 318 return -EEXIST; 319 } 320 321 return 0; 322 } 323 324 /* Check the addr is within the optimized instructions. */ 325 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 326 unsigned long addr) 327 { 328 return ((unsigned long)op->kp.addr <= addr && 329 (unsigned long)op->kp.addr + op->optinsn.size > addr); 330 } 331 332 /* Free optimized instruction slot */ 333 static 334 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 335 { 336 if (op->optinsn.insn) { 337 free_optinsn_slot(op->optinsn.insn, dirty); 338 op->optinsn.insn = NULL; 339 op->optinsn.size = 0; 340 } 341 } 342 343 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 344 { 345 __arch_remove_optimized_kprobe(op, 1); 346 } 347 348 /* 349 * Copy replacing target instructions 350 * Target instructions MUST be relocatable (checked inside) 351 * This is called when new aggr(opt)probe is allocated or reused. 352 */ 353 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, 354 struct kprobe *__unused) 355 { 356 u8 *buf = NULL, *slot; 357 int ret, len; 358 long rel; 359 360 if (!can_optimize((unsigned long)op->kp.addr)) 361 return -EILSEQ; 362 363 buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL); 364 if (!buf) 365 return -ENOMEM; 366 367 op->optinsn.insn = slot = get_optinsn_slot(); 368 if (!slot) { 369 ret = -ENOMEM; 370 goto out; 371 } 372 373 /* 374 * Verify if the address gap is in 2GB range, because this uses 375 * a relative jump. 376 */ 377 rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE; 378 if (abs(rel) > 0x7fffffff) { 379 ret = -ERANGE; 380 goto err; 381 } 382 383 /* Copy arch-dep-instance from template */ 384 memcpy(buf, optprobe_template_entry, TMPL_END_IDX); 385 386 /* Copy instructions into the out-of-line buffer */ 387 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr, 388 slot + TMPL_END_IDX); 389 if (ret < 0) 390 goto err; 391 op->optinsn.size = ret; 392 len = TMPL_END_IDX + op->optinsn.size; 393 394 /* Set probe information */ 395 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); 396 397 /* Set probe function call */ 398 synthesize_relcall(buf + TMPL_CALL_IDX, 399 slot + TMPL_CALL_IDX, optimized_callback); 400 401 /* Set returning jmp instruction at the tail of out-of-line buffer */ 402 synthesize_reljump(buf + len, slot + len, 403 (u8 *)op->kp.addr + op->optinsn.size); 404 len += RELATIVEJUMP_SIZE; 405 406 /* We have to use text_poke for instuction buffer because it is RO */ 407 text_poke(slot, buf, len); 408 ret = 0; 409 out: 410 kfree(buf); 411 return ret; 412 413 err: 414 __arch_remove_optimized_kprobe(op, 0); 415 goto out; 416 } 417 418 /* 419 * Replace breakpoints (int3) with relative jumps. 420 * Caller must call with locking kprobe_mutex and text_mutex. 421 */ 422 void arch_optimize_kprobes(struct list_head *oplist) 423 { 424 struct optimized_kprobe *op, *tmp; 425 u8 insn_buff[RELATIVEJUMP_SIZE]; 426 427 list_for_each_entry_safe(op, tmp, oplist, list) { 428 s32 rel = (s32)((long)op->optinsn.insn - 429 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 430 431 WARN_ON(kprobe_disabled(&op->kp)); 432 433 /* Backup instructions which will be replaced by jump address */ 434 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 435 RELATIVE_ADDR_SIZE); 436 437 insn_buff[0] = RELATIVEJUMP_OPCODE; 438 *(s32 *)(&insn_buff[1]) = rel; 439 440 text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, 441 op->optinsn.insn); 442 443 list_del_init(&op->list); 444 } 445 } 446 447 /* Replace a relative jump with a breakpoint (int3). */ 448 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 449 { 450 u8 insn_buff[RELATIVEJUMP_SIZE]; 451 452 /* Set int3 to first byte for kprobes */ 453 insn_buff[0] = BREAKPOINT_INSTRUCTION; 454 memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 455 text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, 456 op->optinsn.insn); 457 } 458 459 /* 460 * Recover original instructions and breakpoints from relative jumps. 461 * Caller must call with locking kprobe_mutex. 462 */ 463 extern void arch_unoptimize_kprobes(struct list_head *oplist, 464 struct list_head *done_list) 465 { 466 struct optimized_kprobe *op, *tmp; 467 468 list_for_each_entry_safe(op, tmp, oplist, list) { 469 arch_unoptimize_kprobe(op); 470 list_move(&op->list, done_list); 471 } 472 } 473 474 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) 475 { 476 struct optimized_kprobe *op; 477 478 if (p->flags & KPROBE_FLAG_OPTIMIZED) { 479 /* This kprobe is really able to run optimized path. */ 480 op = container_of(p, struct optimized_kprobe, kp); 481 /* Detour through copied instructions */ 482 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; 483 if (!reenter) 484 reset_current_kprobe(); 485 return 1; 486 } 487 return 0; 488 } 489 NOKPROBE_SYMBOL(setup_detour_execution); 490