1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Kernel Probes (KProbes) 4 * 5 * Copyright (C) IBM Corporation, 2002, 2004 6 * 7 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel 8 * Probes initial implementation ( includes contributions from 9 * Rusty Russell). 10 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes 11 * interface to access function arguments. 12 * 2004-Oct Jim Keniston <jkenisto@us.ibm.com> and Prasanna S Panchamukhi 13 * <prasanna@in.ibm.com> adapted for x86_64 from i386. 14 * 2005-Mar Roland McGrath <roland@redhat.com> 15 * Fixed to handle %rip-relative addressing mode correctly. 16 * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston 17 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi 18 * <prasanna@in.ibm.com> added function-return probes. 19 * 2005-May Rusty Lynch <rusty.lynch@intel.com> 20 * Added function return probes functionality 21 * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added 22 * kprobe-booster and kretprobe-booster for i386. 23 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster 24 * and kretprobe-booster for x86-64 25 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven 26 * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> 27 * unified x86 kprobes code. 28 */ 29 #include <linux/kprobes.h> 30 #include <linux/ptrace.h> 31 #include <linux/string.h> 32 #include <linux/slab.h> 33 #include <linux/hardirq.h> 34 #include <linux/preempt.h> 35 #include <linux/sched/debug.h> 36 #include <linux/perf_event.h> 37 #include <linux/extable.h> 38 #include <linux/kdebug.h> 39 #include <linux/kallsyms.h> 40 #include <linux/ftrace.h> 41 #include <linux/kasan.h> 42 #include <linux/moduleloader.h> 43 #include <linux/objtool.h> 44 #include <linux/vmalloc.h> 45 #include <linux/pgtable.h> 46 47 #include <asm/text-patching.h> 48 #include <asm/cacheflush.h> 49 #include <asm/desc.h> 50 #include <linux/uaccess.h> 51 #include <asm/alternative.h> 52 #include <asm/insn.h> 53 #include <asm/debugreg.h> 54 #include <asm/set_memory.h> 55 56 #include "common.h" 57 58 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; 59 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); 60 61 #define stack_addr(regs) ((unsigned long *)regs->sp) 62 63 #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ 64 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ 65 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ 66 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ 67 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ 68 << (row % 32)) 69 /* 70 * Undefined/reserved opcodes, conditional jump, Opcode Extension 71 * Groups, and some special opcodes can not boost. 72 * This is non-const and volatile to keep gcc from statically 73 * optimizing it out, as variable_test_bit makes gcc think only 74 * *(unsigned long*) is used. 75 */ 76 static volatile u32 twobyte_is_boostable[256 / 32] = { 77 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 78 /* ---------------------------------------------- */ 79 W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ 80 W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1) , /* 10 */ 81 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */ 82 W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ 83 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ 84 W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ 85 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1) | /* 60 */ 86 W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ 87 W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 80 */ 88 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ 89 W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* a0 */ 90 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) , /* b0 */ 91 W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ 92 W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) , /* d0 */ 93 W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* e0 */ 94 W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0) /* f0 */ 95 /* ----------------------------------------------- */ 96 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 97 }; 98 #undef W 99 100 struct kretprobe_blackpoint kretprobe_blacklist[] = { 101 {"__switch_to", }, /* This function switches only current task, but 102 doesn't switch kernel stack.*/ 103 {NULL, NULL} /* Terminator */ 104 }; 105 106 const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); 107 108 static nokprobe_inline void 109 __synthesize_relative_insn(void *dest, void *from, void *to, u8 op) 110 { 111 struct __arch_relative_insn { 112 u8 op; 113 s32 raddr; 114 } __packed *insn; 115 116 insn = (struct __arch_relative_insn *)dest; 117 insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); 118 insn->op = op; 119 } 120 121 /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ 122 void synthesize_reljump(void *dest, void *from, void *to) 123 { 124 __synthesize_relative_insn(dest, from, to, JMP32_INSN_OPCODE); 125 } 126 NOKPROBE_SYMBOL(synthesize_reljump); 127 128 /* Insert a call instruction at address 'from', which calls address 'to'.*/ 129 void synthesize_relcall(void *dest, void *from, void *to) 130 { 131 __synthesize_relative_insn(dest, from, to, CALL_INSN_OPCODE); 132 } 133 NOKPROBE_SYMBOL(synthesize_relcall); 134 135 /* 136 * Returns non-zero if INSN is boostable. 137 * RIP relative instructions are adjusted at copying time in 64 bits mode 138 */ 139 int can_boost(struct insn *insn, void *addr) 140 { 141 kprobe_opcode_t opcode; 142 143 if (search_exception_tables((unsigned long)addr)) 144 return 0; /* Page fault may occur on this address. */ 145 146 /* 2nd-byte opcode */ 147 if (insn->opcode.nbytes == 2) 148 return test_bit(insn->opcode.bytes[1], 149 (unsigned long *)twobyte_is_boostable); 150 151 if (insn->opcode.nbytes != 1) 152 return 0; 153 154 /* Can't boost Address-size override prefix */ 155 if (unlikely(inat_is_address_size_prefix(insn->attr))) 156 return 0; 157 158 opcode = insn->opcode.bytes[0]; 159 160 switch (opcode & 0xf0) { 161 case 0x60: 162 /* can't boost "bound" */ 163 return (opcode != 0x62); 164 case 0x70: 165 return 0; /* can't boost conditional jump */ 166 case 0x90: 167 return opcode != 0x9a; /* can't boost call far */ 168 case 0xc0: 169 /* can't boost software-interruptions */ 170 return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf; 171 case 0xd0: 172 /* can boost AA* and XLAT */ 173 return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); 174 case 0xe0: 175 /* can boost in/out and absolute jmps */ 176 return ((opcode & 0x04) || opcode == 0xea); 177 case 0xf0: 178 /* clear and set flags are boostable */ 179 return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); 180 default: 181 /* CS override prefix and call are not boostable */ 182 return (opcode != 0x2e && opcode != 0x9a); 183 } 184 } 185 186 static unsigned long 187 __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) 188 { 189 struct kprobe *kp; 190 unsigned long faddr; 191 192 kp = get_kprobe((void *)addr); 193 faddr = ftrace_location(addr); 194 /* 195 * Addresses inside the ftrace location are refused by 196 * arch_check_ftrace_location(). Something went terribly wrong 197 * if such an address is checked here. 198 */ 199 if (WARN_ON(faddr && faddr != addr)) 200 return 0UL; 201 /* 202 * Use the current code if it is not modified by Kprobe 203 * and it cannot be modified by ftrace. 204 */ 205 if (!kp && !faddr) 206 return addr; 207 208 /* 209 * Basically, kp->ainsn.insn has an original instruction. 210 * However, RIP-relative instruction can not do single-stepping 211 * at different place, __copy_instruction() tweaks the displacement of 212 * that instruction. In that case, we can't recover the instruction 213 * from the kp->ainsn.insn. 214 * 215 * On the other hand, in case on normal Kprobe, kp->opcode has a copy 216 * of the first byte of the probed instruction, which is overwritten 217 * by int3. And the instruction at kp->addr is not modified by kprobes 218 * except for the first byte, we can recover the original instruction 219 * from it and kp->opcode. 220 * 221 * In case of Kprobes using ftrace, we do not have a copy of 222 * the original instruction. In fact, the ftrace location might 223 * be modified at anytime and even could be in an inconsistent state. 224 * Fortunately, we know that the original code is the ideal 5-byte 225 * long NOP. 226 */ 227 if (copy_from_kernel_nofault(buf, (void *)addr, 228 MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 229 return 0UL; 230 231 if (faddr) 232 memcpy(buf, ideal_nops[NOP_ATOMIC5], 5); 233 else 234 buf[0] = kp->opcode; 235 return (unsigned long)buf; 236 } 237 238 /* 239 * Recover the probed instruction at addr for further analysis. 240 * Caller must lock kprobes by kprobe_mutex, or disable preemption 241 * for preventing to release referencing kprobes. 242 * Returns zero if the instruction can not get recovered (or access failed). 243 */ 244 unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) 245 { 246 unsigned long __addr; 247 248 __addr = __recover_optprobed_insn(buf, addr); 249 if (__addr != addr) 250 return __addr; 251 252 return __recover_probed_insn(buf, addr); 253 } 254 255 /* Check if paddr is at an instruction boundary */ 256 static int can_probe(unsigned long paddr) 257 { 258 unsigned long addr, __addr, offset = 0; 259 struct insn insn; 260 kprobe_opcode_t buf[MAX_INSN_SIZE]; 261 262 if (!kallsyms_lookup_size_offset(paddr, NULL, &offset)) 263 return 0; 264 265 /* Decode instructions */ 266 addr = paddr - offset; 267 while (addr < paddr) { 268 /* 269 * Check if the instruction has been modified by another 270 * kprobe, in which case we replace the breakpoint by the 271 * original instruction in our buffer. 272 * Also, jump optimization will change the breakpoint to 273 * relative-jump. Since the relative-jump itself is 274 * normally used, we just go through if there is no kprobe. 275 */ 276 __addr = recover_probed_instruction(buf, addr); 277 if (!__addr) 278 return 0; 279 kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE); 280 insn_get_length(&insn); 281 282 /* 283 * Another debugging subsystem might insert this breakpoint. 284 * In that case, we can't recover it. 285 */ 286 if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) 287 return 0; 288 addr += insn.length; 289 } 290 291 return (addr == paddr); 292 } 293 294 /* 295 * Copy an instruction with recovering modified instruction by kprobes 296 * and adjust the displacement if the instruction uses the %rip-relative 297 * addressing mode. Note that since @real will be the final place of copied 298 * instruction, displacement must be adjust by @real, not @dest. 299 * This returns the length of copied instruction, or 0 if it has an error. 300 */ 301 int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) 302 { 303 kprobe_opcode_t buf[MAX_INSN_SIZE]; 304 unsigned long recovered_insn = 305 recover_probed_instruction(buf, (unsigned long)src); 306 307 if (!recovered_insn || !insn) 308 return 0; 309 310 /* This can access kernel text if given address is not recovered */ 311 if (copy_from_kernel_nofault(dest, (void *)recovered_insn, 312 MAX_INSN_SIZE)) 313 return 0; 314 315 kernel_insn_init(insn, dest, MAX_INSN_SIZE); 316 insn_get_length(insn); 317 318 /* We can not probe force emulate prefixed instruction */ 319 if (insn_has_emulate_prefix(insn)) 320 return 0; 321 322 /* Another subsystem puts a breakpoint, failed to recover */ 323 if (insn->opcode.bytes[0] == INT3_INSN_OPCODE) 324 return 0; 325 326 /* We should not singlestep on the exception masking instructions */ 327 if (insn_masking_exception(insn)) 328 return 0; 329 330 #ifdef CONFIG_X86_64 331 /* Only x86_64 has RIP relative instructions */ 332 if (insn_rip_relative(insn)) { 333 s64 newdisp; 334 u8 *disp; 335 /* 336 * The copied instruction uses the %rip-relative addressing 337 * mode. Adjust the displacement for the difference between 338 * the original location of this instruction and the location 339 * of the copy that will actually be run. The tricky bit here 340 * is making sure that the sign extension happens correctly in 341 * this calculation, since we need a signed 32-bit result to 342 * be sign-extended to 64 bits when it's added to the %rip 343 * value and yield the same 64-bit result that the sign- 344 * extension of the original signed 32-bit displacement would 345 * have given. 346 */ 347 newdisp = (u8 *) src + (s64) insn->displacement.value 348 - (u8 *) real; 349 if ((s64) (s32) newdisp != newdisp) { 350 pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp); 351 return 0; 352 } 353 disp = (u8 *) dest + insn_offset_displacement(insn); 354 *(s32 *) disp = (s32) newdisp; 355 } 356 #endif 357 return insn->length; 358 } 359 360 /* Prepare reljump right after instruction to boost */ 361 static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p, 362 struct insn *insn) 363 { 364 int len = insn->length; 365 366 if (can_boost(insn, p->addr) && 367 MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) { 368 /* 369 * These instructions can be executed directly if it 370 * jumps back to correct address. 371 */ 372 synthesize_reljump(buf + len, p->ainsn.insn + len, 373 p->addr + insn->length); 374 len += JMP32_INSN_SIZE; 375 p->ainsn.boostable = 1; 376 } else { 377 p->ainsn.boostable = 0; 378 } 379 380 return len; 381 } 382 383 /* Make page to RO mode when allocate it */ 384 void *alloc_insn_page(void) 385 { 386 void *page; 387 388 page = module_alloc(PAGE_SIZE); 389 if (!page) 390 return NULL; 391 392 set_vm_flush_reset_perms(page); 393 /* 394 * First make the page read-only, and only then make it executable to 395 * prevent it from being W+X in between. 396 */ 397 set_memory_ro((unsigned long)page, 1); 398 399 /* 400 * TODO: Once additional kernel code protection mechanisms are set, ensure 401 * that the page was not maliciously altered and it is still zeroed. 402 */ 403 set_memory_x((unsigned long)page, 1); 404 405 return page; 406 } 407 408 /* Recover page to RW mode before releasing it */ 409 void free_insn_page(void *page) 410 { 411 module_memfree(page); 412 } 413 414 static void set_resume_flags(struct kprobe *p, struct insn *insn) 415 { 416 insn_byte_t opcode = insn->opcode.bytes[0]; 417 418 switch (opcode) { 419 case 0xfa: /* cli */ 420 case 0xfb: /* sti */ 421 case 0x9d: /* popf/popfd */ 422 /* Check whether the instruction modifies Interrupt Flag or not */ 423 p->ainsn.if_modifier = 1; 424 break; 425 case 0x9c: /* pushfl */ 426 p->ainsn.is_pushf = 1; 427 break; 428 case 0xcf: /* iret */ 429 p->ainsn.if_modifier = 1; 430 fallthrough; 431 case 0xc2: /* ret/lret */ 432 case 0xc3: 433 case 0xca: 434 case 0xcb: 435 case 0xea: /* jmp absolute -- ip is correct */ 436 /* ip is already adjusted, no more changes required */ 437 p->ainsn.is_abs_ip = 1; 438 /* Without resume jump, this is boostable */ 439 p->ainsn.boostable = 1; 440 break; 441 case 0xe8: /* call relative - Fix return addr */ 442 p->ainsn.is_call = 1; 443 break; 444 #ifdef CONFIG_X86_32 445 case 0x9a: /* call absolute -- same as call absolute, indirect */ 446 p->ainsn.is_call = 1; 447 p->ainsn.is_abs_ip = 1; 448 break; 449 #endif 450 case 0xff: 451 opcode = insn->opcode.bytes[1]; 452 if ((opcode & 0x30) == 0x10) { 453 /* 454 * call absolute, indirect 455 * Fix return addr; ip is correct. 456 * But this is not boostable 457 */ 458 p->ainsn.is_call = 1; 459 p->ainsn.is_abs_ip = 1; 460 break; 461 } else if (((opcode & 0x31) == 0x20) || 462 ((opcode & 0x31) == 0x21)) { 463 /* 464 * jmp near and far, absolute indirect 465 * ip is correct. 466 */ 467 p->ainsn.is_abs_ip = 1; 468 /* Without resume jump, this is boostable */ 469 p->ainsn.boostable = 1; 470 } 471 break; 472 } 473 } 474 475 static int arch_copy_kprobe(struct kprobe *p) 476 { 477 struct insn insn; 478 kprobe_opcode_t buf[MAX_INSN_SIZE]; 479 int len; 480 481 /* Copy an instruction with recovering if other optprobe modifies it.*/ 482 len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn); 483 if (!len) 484 return -EINVAL; 485 486 /* 487 * __copy_instruction can modify the displacement of the instruction, 488 * but it doesn't affect boostable check. 489 */ 490 len = prepare_boost(buf, p, &insn); 491 492 /* Analyze the opcode and set resume flags */ 493 set_resume_flags(p, &insn); 494 495 /* Also, displacement change doesn't affect the first byte */ 496 p->opcode = buf[0]; 497 498 p->ainsn.tp_len = len; 499 perf_event_text_poke(p->ainsn.insn, NULL, 0, buf, len); 500 501 /* OK, write back the instruction(s) into ROX insn buffer */ 502 text_poke(p->ainsn.insn, buf, len); 503 504 return 0; 505 } 506 507 int arch_prepare_kprobe(struct kprobe *p) 508 { 509 int ret; 510 511 if (alternatives_text_reserved(p->addr, p->addr)) 512 return -EINVAL; 513 514 if (!can_probe((unsigned long)p->addr)) 515 return -EILSEQ; 516 517 memset(&p->ainsn, 0, sizeof(p->ainsn)); 518 519 /* insn: must be on special executable page on x86. */ 520 p->ainsn.insn = get_insn_slot(); 521 if (!p->ainsn.insn) 522 return -ENOMEM; 523 524 ret = arch_copy_kprobe(p); 525 if (ret) { 526 free_insn_slot(p->ainsn.insn, 0); 527 p->ainsn.insn = NULL; 528 } 529 530 return ret; 531 } 532 533 void arch_arm_kprobe(struct kprobe *p) 534 { 535 u8 int3 = INT3_INSN_OPCODE; 536 537 text_poke(p->addr, &int3, 1); 538 text_poke_sync(); 539 perf_event_text_poke(p->addr, &p->opcode, 1, &int3, 1); 540 } 541 542 void arch_disarm_kprobe(struct kprobe *p) 543 { 544 u8 int3 = INT3_INSN_OPCODE; 545 546 perf_event_text_poke(p->addr, &int3, 1, &p->opcode, 1); 547 text_poke(p->addr, &p->opcode, 1); 548 text_poke_sync(); 549 } 550 551 void arch_remove_kprobe(struct kprobe *p) 552 { 553 if (p->ainsn.insn) { 554 /* Record the perf event before freeing the slot */ 555 perf_event_text_poke(p->ainsn.insn, p->ainsn.insn, 556 p->ainsn.tp_len, NULL, 0); 557 free_insn_slot(p->ainsn.insn, p->ainsn.boostable); 558 p->ainsn.insn = NULL; 559 } 560 } 561 562 static nokprobe_inline void 563 save_previous_kprobe(struct kprobe_ctlblk *kcb) 564 { 565 kcb->prev_kprobe.kp = kprobe_running(); 566 kcb->prev_kprobe.status = kcb->kprobe_status; 567 kcb->prev_kprobe.old_flags = kcb->kprobe_old_flags; 568 kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags; 569 } 570 571 static nokprobe_inline void 572 restore_previous_kprobe(struct kprobe_ctlblk *kcb) 573 { 574 __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); 575 kcb->kprobe_status = kcb->prev_kprobe.status; 576 kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags; 577 kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; 578 } 579 580 static nokprobe_inline void 581 set_current_kprobe(struct kprobe *p, struct pt_regs *regs, 582 struct kprobe_ctlblk *kcb) 583 { 584 __this_cpu_write(current_kprobe, p); 585 kcb->kprobe_saved_flags = kcb->kprobe_old_flags 586 = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); 587 if (p->ainsn.if_modifier) 588 kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; 589 } 590 591 static nokprobe_inline void clear_btf(void) 592 { 593 if (test_thread_flag(TIF_BLOCKSTEP)) { 594 unsigned long debugctl = get_debugctlmsr(); 595 596 debugctl &= ~DEBUGCTLMSR_BTF; 597 update_debugctlmsr(debugctl); 598 } 599 } 600 601 static nokprobe_inline void restore_btf(void) 602 { 603 if (test_thread_flag(TIF_BLOCKSTEP)) { 604 unsigned long debugctl = get_debugctlmsr(); 605 606 debugctl |= DEBUGCTLMSR_BTF; 607 update_debugctlmsr(debugctl); 608 } 609 } 610 611 void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) 612 { 613 unsigned long *sara = stack_addr(regs); 614 615 ri->ret_addr = (kprobe_opcode_t *) *sara; 616 ri->fp = sara; 617 618 /* Replace the return addr with trampoline addr */ 619 *sara = (unsigned long) &kretprobe_trampoline; 620 } 621 NOKPROBE_SYMBOL(arch_prepare_kretprobe); 622 623 static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, 624 struct kprobe_ctlblk *kcb, int reenter) 625 { 626 if (setup_detour_execution(p, regs, reenter)) 627 return; 628 629 #if !defined(CONFIG_PREEMPTION) 630 if (p->ainsn.boostable && !p->post_handler) { 631 /* Boost up -- we can execute copied instructions directly */ 632 if (!reenter) 633 reset_current_kprobe(); 634 /* 635 * Reentering boosted probe doesn't reset current_kprobe, 636 * nor set current_kprobe, because it doesn't use single 637 * stepping. 638 */ 639 regs->ip = (unsigned long)p->ainsn.insn; 640 return; 641 } 642 #endif 643 if (reenter) { 644 save_previous_kprobe(kcb); 645 set_current_kprobe(p, regs, kcb); 646 kcb->kprobe_status = KPROBE_REENTER; 647 } else 648 kcb->kprobe_status = KPROBE_HIT_SS; 649 /* Prepare real single stepping */ 650 clear_btf(); 651 regs->flags |= X86_EFLAGS_TF; 652 regs->flags &= ~X86_EFLAGS_IF; 653 /* single step inline if the instruction is an int3 */ 654 if (p->opcode == INT3_INSN_OPCODE) 655 regs->ip = (unsigned long)p->addr; 656 else 657 regs->ip = (unsigned long)p->ainsn.insn; 658 } 659 NOKPROBE_SYMBOL(setup_singlestep); 660 661 /* 662 * We have reentered the kprobe_handler(), since another probe was hit while 663 * within the handler. We save the original kprobes variables and just single 664 * step on the instruction of the new probe without calling any user handlers. 665 */ 666 static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs, 667 struct kprobe_ctlblk *kcb) 668 { 669 switch (kcb->kprobe_status) { 670 case KPROBE_HIT_SSDONE: 671 case KPROBE_HIT_ACTIVE: 672 case KPROBE_HIT_SS: 673 kprobes_inc_nmissed_count(p); 674 setup_singlestep(p, regs, kcb, 1); 675 break; 676 case KPROBE_REENTER: 677 /* A probe has been hit in the codepath leading up to, or just 678 * after, single-stepping of a probed instruction. This entire 679 * codepath should strictly reside in .kprobes.text section. 680 * Raise a BUG or we'll continue in an endless reentering loop 681 * and eventually a stack overflow. 682 */ 683 pr_err("Unrecoverable kprobe detected.\n"); 684 dump_kprobe(p); 685 BUG(); 686 default: 687 /* impossible cases */ 688 WARN_ON(1); 689 return 0; 690 } 691 692 return 1; 693 } 694 NOKPROBE_SYMBOL(reenter_kprobe); 695 696 /* 697 * Interrupts are disabled on entry as trap3 is an interrupt gate and they 698 * remain disabled throughout this function. 699 */ 700 int kprobe_int3_handler(struct pt_regs *regs) 701 { 702 kprobe_opcode_t *addr; 703 struct kprobe *p; 704 struct kprobe_ctlblk *kcb; 705 706 if (user_mode(regs)) 707 return 0; 708 709 addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); 710 /* 711 * We don't want to be preempted for the entire duration of kprobe 712 * processing. Since int3 and debug trap disables irqs and we clear 713 * IF while singlestepping, it must be no preemptible. 714 */ 715 716 kcb = get_kprobe_ctlblk(); 717 p = get_kprobe(addr); 718 719 if (p) { 720 if (kprobe_running()) { 721 if (reenter_kprobe(p, regs, kcb)) 722 return 1; 723 } else { 724 set_current_kprobe(p, regs, kcb); 725 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 726 727 /* 728 * If we have no pre-handler or it returned 0, we 729 * continue with normal processing. If we have a 730 * pre-handler and it returned non-zero, that means 731 * user handler setup registers to exit to another 732 * instruction, we must skip the single stepping. 733 */ 734 if (!p->pre_handler || !p->pre_handler(p, regs)) 735 setup_singlestep(p, regs, kcb, 0); 736 else 737 reset_current_kprobe(); 738 return 1; 739 } 740 } else if (*addr != INT3_INSN_OPCODE) { 741 /* 742 * The breakpoint instruction was removed right 743 * after we hit it. Another cpu has removed 744 * either a probepoint or a debugger breakpoint 745 * at this address. In either case, no further 746 * handling of this interrupt is appropriate. 747 * Back up over the (now missing) int3 and run 748 * the original instruction. 749 */ 750 regs->ip = (unsigned long)addr; 751 return 1; 752 } /* else: not a kprobe fault; let the kernel handle it */ 753 754 return 0; 755 } 756 NOKPROBE_SYMBOL(kprobe_int3_handler); 757 758 /* 759 * When a retprobed function returns, this code saves registers and 760 * calls trampoline_handler() runs, which calls the kretprobe's handler. 761 */ 762 asm( 763 ".text\n" 764 ".global kretprobe_trampoline\n" 765 ".type kretprobe_trampoline, @function\n" 766 "kretprobe_trampoline:\n" 767 /* We don't bother saving the ss register */ 768 #ifdef CONFIG_X86_64 769 " pushq %rsp\n" 770 " pushfq\n" 771 SAVE_REGS_STRING 772 " movq %rsp, %rdi\n" 773 " call trampoline_handler\n" 774 /* Replace saved sp with true return address. */ 775 " movq %rax, 19*8(%rsp)\n" 776 RESTORE_REGS_STRING 777 " popfq\n" 778 #else 779 " pushl %esp\n" 780 " pushfl\n" 781 SAVE_REGS_STRING 782 " movl %esp, %eax\n" 783 " call trampoline_handler\n" 784 /* Replace saved sp with true return address. */ 785 " movl %eax, 15*4(%esp)\n" 786 RESTORE_REGS_STRING 787 " popfl\n" 788 #endif 789 " ret\n" 790 ".size kretprobe_trampoline, .-kretprobe_trampoline\n" 791 ); 792 NOKPROBE_SYMBOL(kretprobe_trampoline); 793 STACK_FRAME_NON_STANDARD(kretprobe_trampoline); 794 795 796 /* 797 * Called from kretprobe_trampoline 798 */ 799 __used __visible void *trampoline_handler(struct pt_regs *regs) 800 { 801 /* fixup registers */ 802 regs->cs = __KERNEL_CS; 803 #ifdef CONFIG_X86_32 804 regs->gs = 0; 805 #endif 806 regs->ip = (unsigned long)&kretprobe_trampoline; 807 regs->orig_ax = ~0UL; 808 809 return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, ®s->sp); 810 } 811 NOKPROBE_SYMBOL(trampoline_handler); 812 813 /* 814 * Called after single-stepping. p->addr is the address of the 815 * instruction whose first byte has been replaced by the "int 3" 816 * instruction. To avoid the SMP problems that can occur when we 817 * temporarily put back the original opcode to single-step, we 818 * single-stepped a copy of the instruction. The address of this 819 * copy is p->ainsn.insn. 820 * 821 * This function prepares to return from the post-single-step 822 * interrupt. We have to fix up the stack as follows: 823 * 824 * 0) Except in the case of absolute or indirect jump or call instructions, 825 * the new ip is relative to the copied instruction. We need to make 826 * it relative to the original instruction. 827 * 828 * 1) If the single-stepped instruction was pushfl, then the TF and IF 829 * flags are set in the just-pushed flags, and may need to be cleared. 830 * 831 * 2) If the single-stepped instruction was a call, the return address 832 * that is atop the stack is the address following the copied instruction. 833 * We need to make it the address following the original instruction. 834 */ 835 static void resume_execution(struct kprobe *p, struct pt_regs *regs, 836 struct kprobe_ctlblk *kcb) 837 { 838 unsigned long *tos = stack_addr(regs); 839 unsigned long copy_ip = (unsigned long)p->ainsn.insn; 840 unsigned long orig_ip = (unsigned long)p->addr; 841 842 regs->flags &= ~X86_EFLAGS_TF; 843 844 /* Fixup the contents of top of stack */ 845 if (p->ainsn.is_pushf) { 846 *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF); 847 *tos |= kcb->kprobe_old_flags; 848 } else if (p->ainsn.is_call) { 849 *tos = orig_ip + (*tos - copy_ip); 850 } 851 852 if (!p->ainsn.is_abs_ip) 853 regs->ip += orig_ip - copy_ip; 854 855 restore_btf(); 856 } 857 NOKPROBE_SYMBOL(resume_execution); 858 859 /* 860 * Interrupts are disabled on entry as trap1 is an interrupt gate and they 861 * remain disabled throughout this function. 862 */ 863 int kprobe_debug_handler(struct pt_regs *regs) 864 { 865 struct kprobe *cur = kprobe_running(); 866 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 867 868 if (!cur) 869 return 0; 870 871 resume_execution(cur, regs, kcb); 872 regs->flags |= kcb->kprobe_saved_flags; 873 874 if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { 875 kcb->kprobe_status = KPROBE_HIT_SSDONE; 876 cur->post_handler(cur, regs, 0); 877 } 878 879 /* Restore back the original saved kprobes variables and continue. */ 880 if (kcb->kprobe_status == KPROBE_REENTER) { 881 restore_previous_kprobe(kcb); 882 goto out; 883 } 884 reset_current_kprobe(); 885 out: 886 /* 887 * if somebody else is singlestepping across a probe point, flags 888 * will have TF set, in which case, continue the remaining processing 889 * of do_debug, as if this is not a probe hit. 890 */ 891 if (regs->flags & X86_EFLAGS_TF) 892 return 0; 893 894 return 1; 895 } 896 NOKPROBE_SYMBOL(kprobe_debug_handler); 897 898 int kprobe_fault_handler(struct pt_regs *regs, int trapnr) 899 { 900 struct kprobe *cur = kprobe_running(); 901 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 902 903 if (unlikely(regs->ip == (unsigned long)cur->ainsn.insn)) { 904 /* This must happen on single-stepping */ 905 WARN_ON(kcb->kprobe_status != KPROBE_HIT_SS && 906 kcb->kprobe_status != KPROBE_REENTER); 907 /* 908 * We are here because the instruction being single 909 * stepped caused a page fault. We reset the current 910 * kprobe and the ip points back to the probe address 911 * and allow the page fault handler to continue as a 912 * normal page fault. 913 */ 914 regs->ip = (unsigned long)cur->addr; 915 /* 916 * Trap flag (TF) has been set here because this fault 917 * happened where the single stepping will be done. 918 * So clear it by resetting the current kprobe: 919 */ 920 regs->flags &= ~X86_EFLAGS_TF; 921 /* 922 * Since the single step (trap) has been cancelled, 923 * we need to restore BTF here. 924 */ 925 restore_btf(); 926 927 /* 928 * If the TF flag was set before the kprobe hit, 929 * don't touch it: 930 */ 931 regs->flags |= kcb->kprobe_old_flags; 932 933 if (kcb->kprobe_status == KPROBE_REENTER) 934 restore_previous_kprobe(kcb); 935 else 936 reset_current_kprobe(); 937 } else if (kcb->kprobe_status == KPROBE_HIT_ACTIVE || 938 kcb->kprobe_status == KPROBE_HIT_SSDONE) { 939 /* 940 * We increment the nmissed count for accounting, 941 * we can also use npre/npostfault count for accounting 942 * these specific fault cases. 943 */ 944 kprobes_inc_nmissed_count(cur); 945 946 /* 947 * We come here because instructions in the pre/post 948 * handler caused the page_fault, this could happen 949 * if handler tries to access user space by 950 * copy_from_user(), get_user() etc. Let the 951 * user-specified handler try to fix it first. 952 */ 953 if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) 954 return 1; 955 } 956 957 return 0; 958 } 959 NOKPROBE_SYMBOL(kprobe_fault_handler); 960 961 int __init arch_populate_kprobe_blacklist(void) 962 { 963 return kprobe_add_area_blacklist((unsigned long)__entry_text_start, 964 (unsigned long)__entry_text_end); 965 } 966 967 int __init arch_init_kprobes(void) 968 { 969 return 0; 970 } 971 972 int arch_trampoline_kprobe(struct kprobe *p) 973 { 974 return 0; 975 } 976