1 /* 2 * Code for replacing ftrace calls with jumps. 3 * 4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 5 * 6 * Thanks goes to Ingo Molnar, for suggesting the idea. 7 * Mathieu Desnoyers, for suggesting postponing the modifications. 8 * Arjan van de Ven, for keeping me straight, and explaining to me 9 * the dangers of modifying code on the run. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/spinlock.h> 15 #include <linux/hardirq.h> 16 #include <linux/uaccess.h> 17 #include <linux/ftrace.h> 18 #include <linux/percpu.h> 19 #include <linux/sched.h> 20 #include <linux/init.h> 21 #include <linux/list.h> 22 #include <linux/module.h> 23 24 #include <trace/syscall.h> 25 26 #include <asm/cacheflush.h> 27 #include <asm/kprobes.h> 28 #include <asm/ftrace.h> 29 #include <asm/nops.h> 30 31 #ifdef CONFIG_DYNAMIC_FTRACE 32 33 int ftrace_arch_code_modify_prepare(void) 34 { 35 set_kernel_text_rw(); 36 set_all_modules_text_rw(); 37 return 0; 38 } 39 40 int ftrace_arch_code_modify_post_process(void) 41 { 42 set_all_modules_text_ro(); 43 set_kernel_text_ro(); 44 return 0; 45 } 46 47 union ftrace_code_union { 48 char code[MCOUNT_INSN_SIZE]; 49 struct { 50 char e8; 51 int offset; 52 } __attribute__((packed)); 53 }; 54 55 static int ftrace_calc_offset(long ip, long addr) 56 { 57 return (int)(addr - ip); 58 } 59 60 static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) 61 { 62 static union ftrace_code_union calc; 63 64 calc.e8 = 0xe8; 65 calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); 66 67 /* 68 * No locking needed, this must be called via kstop_machine 69 * which in essence is like running on a uniprocessor machine. 70 */ 71 return calc.code; 72 } 73 74 static inline int 75 within(unsigned long addr, unsigned long start, unsigned long end) 76 { 77 return addr >= start && addr < end; 78 } 79 80 static int 81 do_ftrace_mod_code(unsigned long ip, const void *new_code) 82 { 83 /* 84 * On x86_64, kernel text mappings are mapped read-only with 85 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead 86 * of the kernel text mapping to modify the kernel text. 87 * 88 * For 32bit kernels, these mappings are same and we can use 89 * kernel identity mapping to modify code. 90 */ 91 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 92 ip = (unsigned long)__va(__pa(ip)); 93 94 return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); 95 } 96 97 static const unsigned char *ftrace_nop_replace(void) 98 { 99 return ideal_nops[NOP_ATOMIC5]; 100 } 101 102 static int 103 ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, 104 unsigned const char *new_code) 105 { 106 unsigned char replaced[MCOUNT_INSN_SIZE]; 107 108 /* 109 * Note: Due to modules and __init, code can 110 * disappear and change, we need to protect against faulting 111 * as well as code changing. We do this by using the 112 * probe_kernel_* functions. 113 * 114 * No real locking needed, this code is run through 115 * kstop_machine, or before SMP starts. 116 */ 117 118 /* read the text we want to modify */ 119 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 120 return -EFAULT; 121 122 /* Make sure it is what we expect it to be */ 123 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) 124 return -EINVAL; 125 126 /* replace the text with the new text */ 127 if (do_ftrace_mod_code(ip, new_code)) 128 return -EPERM; 129 130 sync_core(); 131 132 return 0; 133 } 134 135 int ftrace_make_nop(struct module *mod, 136 struct dyn_ftrace *rec, unsigned long addr) 137 { 138 unsigned const char *new, *old; 139 unsigned long ip = rec->ip; 140 141 old = ftrace_call_replace(ip, addr); 142 new = ftrace_nop_replace(); 143 144 /* 145 * On boot up, and when modules are loaded, the MCOUNT_ADDR 146 * is converted to a nop, and will never become MCOUNT_ADDR 147 * again. This code is either running before SMP (on boot up) 148 * or before the code will ever be executed (module load). 149 * We do not want to use the breakpoint version in this case, 150 * just modify the code directly. 151 */ 152 if (addr == MCOUNT_ADDR) 153 return ftrace_modify_code_direct(rec->ip, old, new); 154 155 /* Normal cases use add_brk_on_nop */ 156 WARN_ONCE(1, "invalid use of ftrace_make_nop"); 157 return -EINVAL; 158 } 159 160 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 161 { 162 unsigned const char *new, *old; 163 unsigned long ip = rec->ip; 164 165 old = ftrace_nop_replace(); 166 new = ftrace_call_replace(ip, addr); 167 168 /* Should only be called when module is loaded */ 169 return ftrace_modify_code_direct(rec->ip, old, new); 170 } 171 172 /* 173 * The modifying_ftrace_code is used to tell the breakpoint 174 * handler to call ftrace_int3_handler(). If it fails to 175 * call this handler for a breakpoint added by ftrace, then 176 * the kernel may crash. 177 * 178 * As atomic_writes on x86 do not need a barrier, we do not 179 * need to add smp_mb()s for this to work. It is also considered 180 * that we can not read the modifying_ftrace_code before 181 * executing the breakpoint. That would be quite remarkable if 182 * it could do that. Here's the flow that is required: 183 * 184 * CPU-0 CPU-1 185 * 186 * atomic_inc(mfc); 187 * write int3s 188 * <trap-int3> // implicit (r)mb 189 * if (atomic_read(mfc)) 190 * call ftrace_int3_handler() 191 * 192 * Then when we are finished: 193 * 194 * atomic_dec(mfc); 195 * 196 * If we hit a breakpoint that was not set by ftrace, it does not 197 * matter if ftrace_int3_handler() is called or not. It will 198 * simply be ignored. But it is crucial that a ftrace nop/caller 199 * breakpoint is handled. No other user should ever place a 200 * breakpoint on an ftrace nop/caller location. It must only 201 * be done by this code. 202 */ 203 atomic_t modifying_ftrace_code __read_mostly; 204 205 static int 206 ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 207 unsigned const char *new_code); 208 209 /* 210 * Should never be called: 211 * As it is only called by __ftrace_replace_code() which is called by 212 * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() 213 * which is called to turn mcount into nops or nops into function calls 214 * but not to convert a function from not using regs to one that uses 215 * regs, which ftrace_modify_call() is for. 216 */ 217 int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, 218 unsigned long addr) 219 { 220 WARN_ON(1); 221 return -EINVAL; 222 } 223 224 int ftrace_update_ftrace_func(ftrace_func_t func) 225 { 226 unsigned long ip = (unsigned long)(&ftrace_call); 227 unsigned char old[MCOUNT_INSN_SIZE], *new; 228 int ret; 229 230 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); 231 new = ftrace_call_replace(ip, (unsigned long)func); 232 233 /* See comment above by declaration of modifying_ftrace_code */ 234 atomic_inc(&modifying_ftrace_code); 235 236 ret = ftrace_modify_code(ip, old, new); 237 238 /* Also update the regs callback function */ 239 if (!ret) { 240 ip = (unsigned long)(&ftrace_regs_call); 241 memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE); 242 new = ftrace_call_replace(ip, (unsigned long)func); 243 ret = ftrace_modify_code(ip, old, new); 244 } 245 246 atomic_dec(&modifying_ftrace_code); 247 248 return ret; 249 } 250 251 /* 252 * A breakpoint was added to the code address we are about to 253 * modify, and this is the handle that will just skip over it. 254 * We are either changing a nop into a trace call, or a trace 255 * call to a nop. While the change is taking place, we treat 256 * it just like it was a nop. 257 */ 258 int ftrace_int3_handler(struct pt_regs *regs) 259 { 260 if (WARN_ON_ONCE(!regs)) 261 return 0; 262 263 if (!ftrace_location(regs->ip - 1)) 264 return 0; 265 266 regs->ip += MCOUNT_INSN_SIZE - 1; 267 268 return 1; 269 } 270 271 static int ftrace_write(unsigned long ip, const char *val, int size) 272 { 273 /* 274 * On x86_64, kernel text mappings are mapped read-only with 275 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead 276 * of the kernel text mapping to modify the kernel text. 277 * 278 * For 32bit kernels, these mappings are same and we can use 279 * kernel identity mapping to modify code. 280 */ 281 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 282 ip = (unsigned long)__va(__pa(ip)); 283 284 return probe_kernel_write((void *)ip, val, size); 285 } 286 287 static int add_break(unsigned long ip, const char *old) 288 { 289 unsigned char replaced[MCOUNT_INSN_SIZE]; 290 unsigned char brk = BREAKPOINT_INSTRUCTION; 291 292 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 293 return -EFAULT; 294 295 /* Make sure it is what we expect it to be */ 296 if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) 297 return -EINVAL; 298 299 if (ftrace_write(ip, &brk, 1)) 300 return -EPERM; 301 302 return 0; 303 } 304 305 static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) 306 { 307 unsigned const char *old; 308 unsigned long ip = rec->ip; 309 310 old = ftrace_call_replace(ip, addr); 311 312 return add_break(rec->ip, old); 313 } 314 315 316 static int add_brk_on_nop(struct dyn_ftrace *rec) 317 { 318 unsigned const char *old; 319 320 old = ftrace_nop_replace(); 321 322 return add_break(rec->ip, old); 323 } 324 325 /* 326 * If the record has the FTRACE_FL_REGS set, that means that it 327 * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS 328 * is not not set, then it wants to convert to the normal callback. 329 */ 330 static unsigned long get_ftrace_addr(struct dyn_ftrace *rec) 331 { 332 if (rec->flags & FTRACE_FL_REGS) 333 return (unsigned long)FTRACE_REGS_ADDR; 334 else 335 return (unsigned long)FTRACE_ADDR; 336 } 337 338 /* 339 * The FTRACE_FL_REGS_EN is set when the record already points to 340 * a function that saves all the regs. Basically the '_EN' version 341 * represents the current state of the function. 342 */ 343 static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec) 344 { 345 if (rec->flags & FTRACE_FL_REGS_EN) 346 return (unsigned long)FTRACE_REGS_ADDR; 347 else 348 return (unsigned long)FTRACE_ADDR; 349 } 350 351 static int add_breakpoints(struct dyn_ftrace *rec, int enable) 352 { 353 unsigned long ftrace_addr; 354 int ret; 355 356 ret = ftrace_test_record(rec, enable); 357 358 ftrace_addr = get_ftrace_addr(rec); 359 360 switch (ret) { 361 case FTRACE_UPDATE_IGNORE: 362 return 0; 363 364 case FTRACE_UPDATE_MAKE_CALL: 365 /* converting nop to call */ 366 return add_brk_on_nop(rec); 367 368 case FTRACE_UPDATE_MODIFY_CALL_REGS: 369 case FTRACE_UPDATE_MODIFY_CALL: 370 ftrace_addr = get_ftrace_old_addr(rec); 371 /* fall through */ 372 case FTRACE_UPDATE_MAKE_NOP: 373 /* converting a call to a nop */ 374 return add_brk_on_call(rec, ftrace_addr); 375 } 376 return 0; 377 } 378 379 /* 380 * On error, we need to remove breakpoints. This needs to 381 * be done caefully. If the address does not currently have a 382 * breakpoint, we know we are done. Otherwise, we look at the 383 * remaining 4 bytes of the instruction. If it matches a nop 384 * we replace the breakpoint with the nop. Otherwise we replace 385 * it with the call instruction. 386 */ 387 static int remove_breakpoint(struct dyn_ftrace *rec) 388 { 389 unsigned char ins[MCOUNT_INSN_SIZE]; 390 unsigned char brk = BREAKPOINT_INSTRUCTION; 391 const unsigned char *nop; 392 unsigned long ftrace_addr; 393 unsigned long ip = rec->ip; 394 395 /* If we fail the read, just give up */ 396 if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE)) 397 return -EFAULT; 398 399 /* If this does not have a breakpoint, we are done */ 400 if (ins[0] != brk) 401 return -1; 402 403 nop = ftrace_nop_replace(); 404 405 /* 406 * If the last 4 bytes of the instruction do not match 407 * a nop, then we assume that this is a call to ftrace_addr. 408 */ 409 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) { 410 /* 411 * For extra paranoidism, we check if the breakpoint is on 412 * a call that would actually jump to the ftrace_addr. 413 * If not, don't touch the breakpoint, we make just create 414 * a disaster. 415 */ 416 ftrace_addr = get_ftrace_addr(rec); 417 nop = ftrace_call_replace(ip, ftrace_addr); 418 419 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) 420 goto update; 421 422 /* Check both ftrace_addr and ftrace_old_addr */ 423 ftrace_addr = get_ftrace_old_addr(rec); 424 nop = ftrace_call_replace(ip, ftrace_addr); 425 426 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) 427 return -EINVAL; 428 } 429 430 update: 431 return probe_kernel_write((void *)ip, &nop[0], 1); 432 } 433 434 static int add_update_code(unsigned long ip, unsigned const char *new) 435 { 436 /* skip breakpoint */ 437 ip++; 438 new++; 439 if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1)) 440 return -EPERM; 441 return 0; 442 } 443 444 static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) 445 { 446 unsigned long ip = rec->ip; 447 unsigned const char *new; 448 449 new = ftrace_call_replace(ip, addr); 450 return add_update_code(ip, new); 451 } 452 453 static int add_update_nop(struct dyn_ftrace *rec) 454 { 455 unsigned long ip = rec->ip; 456 unsigned const char *new; 457 458 new = ftrace_nop_replace(); 459 return add_update_code(ip, new); 460 } 461 462 static int add_update(struct dyn_ftrace *rec, int enable) 463 { 464 unsigned long ftrace_addr; 465 int ret; 466 467 ret = ftrace_test_record(rec, enable); 468 469 ftrace_addr = get_ftrace_addr(rec); 470 471 switch (ret) { 472 case FTRACE_UPDATE_IGNORE: 473 return 0; 474 475 case FTRACE_UPDATE_MODIFY_CALL_REGS: 476 case FTRACE_UPDATE_MODIFY_CALL: 477 case FTRACE_UPDATE_MAKE_CALL: 478 /* converting nop to call */ 479 return add_update_call(rec, ftrace_addr); 480 481 case FTRACE_UPDATE_MAKE_NOP: 482 /* converting a call to a nop */ 483 return add_update_nop(rec); 484 } 485 486 return 0; 487 } 488 489 static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr) 490 { 491 unsigned long ip = rec->ip; 492 unsigned const char *new; 493 494 new = ftrace_call_replace(ip, addr); 495 496 if (ftrace_write(ip, new, 1)) 497 return -EPERM; 498 499 return 0; 500 } 501 502 static int finish_update_nop(struct dyn_ftrace *rec) 503 { 504 unsigned long ip = rec->ip; 505 unsigned const char *new; 506 507 new = ftrace_nop_replace(); 508 509 if (ftrace_write(ip, new, 1)) 510 return -EPERM; 511 return 0; 512 } 513 514 static int finish_update(struct dyn_ftrace *rec, int enable) 515 { 516 unsigned long ftrace_addr; 517 int ret; 518 519 ret = ftrace_update_record(rec, enable); 520 521 ftrace_addr = get_ftrace_addr(rec); 522 523 switch (ret) { 524 case FTRACE_UPDATE_IGNORE: 525 return 0; 526 527 case FTRACE_UPDATE_MODIFY_CALL_REGS: 528 case FTRACE_UPDATE_MODIFY_CALL: 529 case FTRACE_UPDATE_MAKE_CALL: 530 /* converting nop to call */ 531 return finish_update_call(rec, ftrace_addr); 532 533 case FTRACE_UPDATE_MAKE_NOP: 534 /* converting a call to a nop */ 535 return finish_update_nop(rec); 536 } 537 538 return 0; 539 } 540 541 static void do_sync_core(void *data) 542 { 543 sync_core(); 544 } 545 546 static void run_sync(void) 547 { 548 int enable_irqs = irqs_disabled(); 549 550 /* We may be called with interrupts disbled (on bootup). */ 551 if (enable_irqs) 552 local_irq_enable(); 553 on_each_cpu(do_sync_core, NULL, 1); 554 if (enable_irqs) 555 local_irq_disable(); 556 } 557 558 void ftrace_replace_code(int enable) 559 { 560 struct ftrace_rec_iter *iter; 561 struct dyn_ftrace *rec; 562 const char *report = "adding breakpoints"; 563 int count = 0; 564 int ret; 565 566 for_ftrace_rec_iter(iter) { 567 rec = ftrace_rec_iter_record(iter); 568 569 ret = add_breakpoints(rec, enable); 570 if (ret) 571 goto remove_breakpoints; 572 count++; 573 } 574 575 run_sync(); 576 577 report = "updating code"; 578 579 for_ftrace_rec_iter(iter) { 580 rec = ftrace_rec_iter_record(iter); 581 582 ret = add_update(rec, enable); 583 if (ret) 584 goto remove_breakpoints; 585 } 586 587 run_sync(); 588 589 report = "removing breakpoints"; 590 591 for_ftrace_rec_iter(iter) { 592 rec = ftrace_rec_iter_record(iter); 593 594 ret = finish_update(rec, enable); 595 if (ret) 596 goto remove_breakpoints; 597 } 598 599 run_sync(); 600 601 return; 602 603 remove_breakpoints: 604 ftrace_bug(ret, rec ? rec->ip : 0); 605 printk(KERN_WARNING "Failed on %s (%d):\n", report, count); 606 for_ftrace_rec_iter(iter) { 607 rec = ftrace_rec_iter_record(iter); 608 remove_breakpoint(rec); 609 } 610 } 611 612 static int 613 ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 614 unsigned const char *new_code) 615 { 616 int ret; 617 618 ret = add_break(ip, old_code); 619 if (ret) 620 goto out; 621 622 run_sync(); 623 624 ret = add_update_code(ip, new_code); 625 if (ret) 626 goto fail_update; 627 628 run_sync(); 629 630 ret = ftrace_write(ip, new_code, 1); 631 if (ret) { 632 ret = -EPERM; 633 goto out; 634 } 635 run_sync(); 636 out: 637 return ret; 638 639 fail_update: 640 probe_kernel_write((void *)ip, &old_code[0], 1); 641 goto out; 642 } 643 644 void arch_ftrace_update_code(int command) 645 { 646 /* See comment above by declaration of modifying_ftrace_code */ 647 atomic_inc(&modifying_ftrace_code); 648 649 ftrace_modify_all_code(command); 650 651 atomic_dec(&modifying_ftrace_code); 652 } 653 654 int __init ftrace_dyn_arch_init(void *data) 655 { 656 /* The return code is retured via data */ 657 *(unsigned long *)data = 0; 658 659 return 0; 660 } 661 #endif 662 663 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 664 665 #ifdef CONFIG_DYNAMIC_FTRACE 666 extern void ftrace_graph_call(void); 667 668 static int ftrace_mod_jmp(unsigned long ip, 669 int old_offset, int new_offset) 670 { 671 unsigned char code[MCOUNT_INSN_SIZE]; 672 673 if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) 674 return -EFAULT; 675 676 if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) 677 return -EINVAL; 678 679 *(int *)(&code[1]) = new_offset; 680 681 if (do_ftrace_mod_code(ip, &code)) 682 return -EPERM; 683 684 return 0; 685 } 686 687 int ftrace_enable_ftrace_graph_caller(void) 688 { 689 unsigned long ip = (unsigned long)(&ftrace_graph_call); 690 int old_offset, new_offset; 691 692 old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); 693 new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); 694 695 return ftrace_mod_jmp(ip, old_offset, new_offset); 696 } 697 698 int ftrace_disable_ftrace_graph_caller(void) 699 { 700 unsigned long ip = (unsigned long)(&ftrace_graph_call); 701 int old_offset, new_offset; 702 703 old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); 704 new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); 705 706 return ftrace_mod_jmp(ip, old_offset, new_offset); 707 } 708 709 #endif /* !CONFIG_DYNAMIC_FTRACE */ 710 711 /* 712 * Hook the return address and push it in the stack of return addrs 713 * in current thread info. 714 */ 715 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, 716 unsigned long frame_pointer) 717 { 718 unsigned long old; 719 int faulted; 720 struct ftrace_graph_ent trace; 721 unsigned long return_hooker = (unsigned long) 722 &return_to_handler; 723 724 if (unlikely(atomic_read(¤t->tracing_graph_pause))) 725 return; 726 727 /* 728 * Protect against fault, even if it shouldn't 729 * happen. This tool is too much intrusive to 730 * ignore such a protection. 731 */ 732 asm volatile( 733 "1: " _ASM_MOV " (%[parent]), %[old]\n" 734 "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" 735 " movl $0, %[faulted]\n" 736 "3:\n" 737 738 ".section .fixup, \"ax\"\n" 739 "4: movl $1, %[faulted]\n" 740 " jmp 3b\n" 741 ".previous\n" 742 743 _ASM_EXTABLE(1b, 4b) 744 _ASM_EXTABLE(2b, 4b) 745 746 : [old] "=&r" (old), [faulted] "=r" (faulted) 747 : [parent] "r" (parent), [return_hooker] "r" (return_hooker) 748 : "memory" 749 ); 750 751 if (unlikely(faulted)) { 752 ftrace_graph_stop(); 753 WARN_ON(1); 754 return; 755 } 756 757 trace.func = self_addr; 758 trace.depth = current->curr_ret_stack + 1; 759 760 /* Only trace if the calling function expects to */ 761 if (!ftrace_graph_entry(&trace)) { 762 *parent = old; 763 return; 764 } 765 766 if (ftrace_push_return_trace(old, self_addr, &trace.depth, 767 frame_pointer) == -EBUSY) { 768 *parent = old; 769 return; 770 } 771 } 772 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 773