1 /* 2 * Code for replacing ftrace calls with jumps. 3 * 4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 5 * 6 * Thanks goes to Ingo Molnar, for suggesting the idea. 7 * Mathieu Desnoyers, for suggesting postponing the modifications. 8 * Arjan van de Ven, for keeping me straight, and explaining to me 9 * the dangers of modifying code on the run. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/spinlock.h> 15 #include <linux/hardirq.h> 16 #include <linux/uaccess.h> 17 #include <linux/ftrace.h> 18 #include <linux/percpu.h> 19 #include <linux/sched.h> 20 #include <linux/init.h> 21 #include <linux/list.h> 22 #include <linux/module.h> 23 24 #include <trace/syscall.h> 25 26 #include <asm/cacheflush.h> 27 #include <asm/kprobes.h> 28 #include <asm/ftrace.h> 29 #include <asm/nops.h> 30 31 #ifdef CONFIG_DYNAMIC_FTRACE 32 33 int ftrace_arch_code_modify_prepare(void) 34 { 35 set_kernel_text_rw(); 36 set_all_modules_text_rw(); 37 return 0; 38 } 39 40 int ftrace_arch_code_modify_post_process(void) 41 { 42 set_all_modules_text_ro(); 43 set_kernel_text_ro(); 44 return 0; 45 } 46 47 union ftrace_code_union { 48 char code[MCOUNT_INSN_SIZE]; 49 struct { 50 char e8; 51 int offset; 52 } __attribute__((packed)); 53 }; 54 55 static int ftrace_calc_offset(long ip, long addr) 56 { 57 return (int)(addr - ip); 58 } 59 60 static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) 61 { 62 static union ftrace_code_union calc; 63 64 calc.e8 = 0xe8; 65 calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); 66 67 /* 68 * No locking needed, this must be called via kstop_machine 69 * which in essence is like running on a uniprocessor machine. 70 */ 71 return calc.code; 72 } 73 74 static inline int 75 within(unsigned long addr, unsigned long start, unsigned long end) 76 { 77 return addr >= start && addr < end; 78 } 79 80 static int 81 do_ftrace_mod_code(unsigned long ip, const void *new_code) 82 { 83 /* 84 * On x86_64, kernel text mappings are mapped read-only with 85 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead 86 * of the kernel text mapping to modify the kernel text. 87 * 88 * For 32bit kernels, these mappings are same and we can use 89 * kernel identity mapping to modify code. 90 */ 91 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 92 ip = (unsigned long)__va(__pa(ip)); 93 94 return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); 95 } 96 97 static const unsigned char *ftrace_nop_replace(void) 98 { 99 return ideal_nops[NOP_ATOMIC5]; 100 } 101 102 static int 103 ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, 104 unsigned const char *new_code) 105 { 106 unsigned char replaced[MCOUNT_INSN_SIZE]; 107 108 /* 109 * Note: Due to modules and __init, code can 110 * disappear and change, we need to protect against faulting 111 * as well as code changing. We do this by using the 112 * probe_kernel_* functions. 113 * 114 * No real locking needed, this code is run through 115 * kstop_machine, or before SMP starts. 116 */ 117 118 /* read the text we want to modify */ 119 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 120 return -EFAULT; 121 122 /* Make sure it is what we expect it to be */ 123 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) 124 return -EINVAL; 125 126 /* replace the text with the new text */ 127 if (do_ftrace_mod_code(ip, new_code)) 128 return -EPERM; 129 130 sync_core(); 131 132 return 0; 133 } 134 135 int ftrace_make_nop(struct module *mod, 136 struct dyn_ftrace *rec, unsigned long addr) 137 { 138 unsigned const char *new, *old; 139 unsigned long ip = rec->ip; 140 141 old = ftrace_call_replace(ip, addr); 142 new = ftrace_nop_replace(); 143 144 /* 145 * On boot up, and when modules are loaded, the MCOUNT_ADDR 146 * is converted to a nop, and will never become MCOUNT_ADDR 147 * again. This code is either running before SMP (on boot up) 148 * or before the code will ever be executed (module load). 149 * We do not want to use the breakpoint version in this case, 150 * just modify the code directly. 151 */ 152 if (addr == MCOUNT_ADDR) 153 return ftrace_modify_code_direct(rec->ip, old, new); 154 155 /* Normal cases use add_brk_on_nop */ 156 WARN_ONCE(1, "invalid use of ftrace_make_nop"); 157 return -EINVAL; 158 } 159 160 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 161 { 162 unsigned const char *new, *old; 163 unsigned long ip = rec->ip; 164 165 old = ftrace_nop_replace(); 166 new = ftrace_call_replace(ip, addr); 167 168 /* Should only be called when module is loaded */ 169 return ftrace_modify_code_direct(rec->ip, old, new); 170 } 171 172 /* 173 * The modifying_ftrace_code is used to tell the breakpoint 174 * handler to call ftrace_int3_handler(). If it fails to 175 * call this handler for a breakpoint added by ftrace, then 176 * the kernel may crash. 177 * 178 * As atomic_writes on x86 do not need a barrier, we do not 179 * need to add smp_mb()s for this to work. It is also considered 180 * that we can not read the modifying_ftrace_code before 181 * executing the breakpoint. That would be quite remarkable if 182 * it could do that. Here's the flow that is required: 183 * 184 * CPU-0 CPU-1 185 * 186 * atomic_inc(mfc); 187 * write int3s 188 * <trap-int3> // implicit (r)mb 189 * if (atomic_read(mfc)) 190 * call ftrace_int3_handler() 191 * 192 * Then when we are finished: 193 * 194 * atomic_dec(mfc); 195 * 196 * If we hit a breakpoint that was not set by ftrace, it does not 197 * matter if ftrace_int3_handler() is called or not. It will 198 * simply be ignored. But it is crucial that a ftrace nop/caller 199 * breakpoint is handled. No other user should ever place a 200 * breakpoint on an ftrace nop/caller location. It must only 201 * be done by this code. 202 */ 203 atomic_t modifying_ftrace_code __read_mostly; 204 205 static int 206 ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 207 unsigned const char *new_code); 208 209 int ftrace_update_ftrace_func(ftrace_func_t func) 210 { 211 unsigned long ip = (unsigned long)(&ftrace_call); 212 unsigned char old[MCOUNT_INSN_SIZE], *new; 213 int ret; 214 215 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); 216 new = ftrace_call_replace(ip, (unsigned long)func); 217 218 /* See comment above by declaration of modifying_ftrace_code */ 219 atomic_inc(&modifying_ftrace_code); 220 221 ret = ftrace_modify_code(ip, old, new); 222 223 atomic_dec(&modifying_ftrace_code); 224 225 return ret; 226 } 227 228 /* 229 * A breakpoint was added to the code address we are about to 230 * modify, and this is the handle that will just skip over it. 231 * We are either changing a nop into a trace call, or a trace 232 * call to a nop. While the change is taking place, we treat 233 * it just like it was a nop. 234 */ 235 int ftrace_int3_handler(struct pt_regs *regs) 236 { 237 if (WARN_ON_ONCE(!regs)) 238 return 0; 239 240 if (!ftrace_location(regs->ip - 1)) 241 return 0; 242 243 regs->ip += MCOUNT_INSN_SIZE - 1; 244 245 return 1; 246 } 247 248 static int ftrace_write(unsigned long ip, const char *val, int size) 249 { 250 /* 251 * On x86_64, kernel text mappings are mapped read-only with 252 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead 253 * of the kernel text mapping to modify the kernel text. 254 * 255 * For 32bit kernels, these mappings are same and we can use 256 * kernel identity mapping to modify code. 257 */ 258 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 259 ip = (unsigned long)__va(__pa(ip)); 260 261 return probe_kernel_write((void *)ip, val, size); 262 } 263 264 static int add_break(unsigned long ip, const char *old) 265 { 266 unsigned char replaced[MCOUNT_INSN_SIZE]; 267 unsigned char brk = BREAKPOINT_INSTRUCTION; 268 269 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 270 return -EFAULT; 271 272 /* Make sure it is what we expect it to be */ 273 if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) 274 return -EINVAL; 275 276 if (ftrace_write(ip, &brk, 1)) 277 return -EPERM; 278 279 return 0; 280 } 281 282 static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) 283 { 284 unsigned const char *old; 285 unsigned long ip = rec->ip; 286 287 old = ftrace_call_replace(ip, addr); 288 289 return add_break(rec->ip, old); 290 } 291 292 293 static int add_brk_on_nop(struct dyn_ftrace *rec) 294 { 295 unsigned const char *old; 296 297 old = ftrace_nop_replace(); 298 299 return add_break(rec->ip, old); 300 } 301 302 static int add_breakpoints(struct dyn_ftrace *rec, int enable) 303 { 304 unsigned long ftrace_addr; 305 int ret; 306 307 ret = ftrace_test_record(rec, enable); 308 309 ftrace_addr = (unsigned long)FTRACE_ADDR; 310 311 switch (ret) { 312 case FTRACE_UPDATE_IGNORE: 313 return 0; 314 315 case FTRACE_UPDATE_MAKE_CALL: 316 /* converting nop to call */ 317 return add_brk_on_nop(rec); 318 319 case FTRACE_UPDATE_MAKE_NOP: 320 /* converting a call to a nop */ 321 return add_brk_on_call(rec, ftrace_addr); 322 } 323 return 0; 324 } 325 326 /* 327 * On error, we need to remove breakpoints. This needs to 328 * be done caefully. If the address does not currently have a 329 * breakpoint, we know we are done. Otherwise, we look at the 330 * remaining 4 bytes of the instruction. If it matches a nop 331 * we replace the breakpoint with the nop. Otherwise we replace 332 * it with the call instruction. 333 */ 334 static int remove_breakpoint(struct dyn_ftrace *rec) 335 { 336 unsigned char ins[MCOUNT_INSN_SIZE]; 337 unsigned char brk = BREAKPOINT_INSTRUCTION; 338 const unsigned char *nop; 339 unsigned long ftrace_addr; 340 unsigned long ip = rec->ip; 341 342 /* If we fail the read, just give up */ 343 if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE)) 344 return -EFAULT; 345 346 /* If this does not have a breakpoint, we are done */ 347 if (ins[0] != brk) 348 return -1; 349 350 nop = ftrace_nop_replace(); 351 352 /* 353 * If the last 4 bytes of the instruction do not match 354 * a nop, then we assume that this is a call to ftrace_addr. 355 */ 356 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) { 357 /* 358 * For extra paranoidism, we check if the breakpoint is on 359 * a call that would actually jump to the ftrace_addr. 360 * If not, don't touch the breakpoint, we make just create 361 * a disaster. 362 */ 363 ftrace_addr = (unsigned long)FTRACE_ADDR; 364 nop = ftrace_call_replace(ip, ftrace_addr); 365 366 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) 367 return -EINVAL; 368 } 369 370 return probe_kernel_write((void *)ip, &nop[0], 1); 371 } 372 373 static int add_update_code(unsigned long ip, unsigned const char *new) 374 { 375 /* skip breakpoint */ 376 ip++; 377 new++; 378 if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1)) 379 return -EPERM; 380 return 0; 381 } 382 383 static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) 384 { 385 unsigned long ip = rec->ip; 386 unsigned const char *new; 387 388 new = ftrace_call_replace(ip, addr); 389 return add_update_code(ip, new); 390 } 391 392 static int add_update_nop(struct dyn_ftrace *rec) 393 { 394 unsigned long ip = rec->ip; 395 unsigned const char *new; 396 397 new = ftrace_nop_replace(); 398 return add_update_code(ip, new); 399 } 400 401 static int add_update(struct dyn_ftrace *rec, int enable) 402 { 403 unsigned long ftrace_addr; 404 int ret; 405 406 ret = ftrace_test_record(rec, enable); 407 408 ftrace_addr = (unsigned long)FTRACE_ADDR; 409 410 switch (ret) { 411 case FTRACE_UPDATE_IGNORE: 412 return 0; 413 414 case FTRACE_UPDATE_MAKE_CALL: 415 /* converting nop to call */ 416 return add_update_call(rec, ftrace_addr); 417 418 case FTRACE_UPDATE_MAKE_NOP: 419 /* converting a call to a nop */ 420 return add_update_nop(rec); 421 } 422 423 return 0; 424 } 425 426 static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr) 427 { 428 unsigned long ip = rec->ip; 429 unsigned const char *new; 430 431 new = ftrace_call_replace(ip, addr); 432 433 if (ftrace_write(ip, new, 1)) 434 return -EPERM; 435 436 return 0; 437 } 438 439 static int finish_update_nop(struct dyn_ftrace *rec) 440 { 441 unsigned long ip = rec->ip; 442 unsigned const char *new; 443 444 new = ftrace_nop_replace(); 445 446 if (ftrace_write(ip, new, 1)) 447 return -EPERM; 448 return 0; 449 } 450 451 static int finish_update(struct dyn_ftrace *rec, int enable) 452 { 453 unsigned long ftrace_addr; 454 int ret; 455 456 ret = ftrace_update_record(rec, enable); 457 458 ftrace_addr = (unsigned long)FTRACE_ADDR; 459 460 switch (ret) { 461 case FTRACE_UPDATE_IGNORE: 462 return 0; 463 464 case FTRACE_UPDATE_MAKE_CALL: 465 /* converting nop to call */ 466 return finish_update_call(rec, ftrace_addr); 467 468 case FTRACE_UPDATE_MAKE_NOP: 469 /* converting a call to a nop */ 470 return finish_update_nop(rec); 471 } 472 473 return 0; 474 } 475 476 static void do_sync_core(void *data) 477 { 478 sync_core(); 479 } 480 481 static void run_sync(void) 482 { 483 int enable_irqs = irqs_disabled(); 484 485 /* We may be called with interrupts disbled (on bootup). */ 486 if (enable_irqs) 487 local_irq_enable(); 488 on_each_cpu(do_sync_core, NULL, 1); 489 if (enable_irqs) 490 local_irq_disable(); 491 } 492 493 void ftrace_replace_code(int enable) 494 { 495 struct ftrace_rec_iter *iter; 496 struct dyn_ftrace *rec; 497 const char *report = "adding breakpoints"; 498 int count = 0; 499 int ret; 500 501 for_ftrace_rec_iter(iter) { 502 rec = ftrace_rec_iter_record(iter); 503 504 ret = add_breakpoints(rec, enable); 505 if (ret) 506 goto remove_breakpoints; 507 count++; 508 } 509 510 run_sync(); 511 512 report = "updating code"; 513 514 for_ftrace_rec_iter(iter) { 515 rec = ftrace_rec_iter_record(iter); 516 517 ret = add_update(rec, enable); 518 if (ret) 519 goto remove_breakpoints; 520 } 521 522 run_sync(); 523 524 report = "removing breakpoints"; 525 526 for_ftrace_rec_iter(iter) { 527 rec = ftrace_rec_iter_record(iter); 528 529 ret = finish_update(rec, enable); 530 if (ret) 531 goto remove_breakpoints; 532 } 533 534 run_sync(); 535 536 return; 537 538 remove_breakpoints: 539 ftrace_bug(ret, rec ? rec->ip : 0); 540 printk(KERN_WARNING "Failed on %s (%d):\n", report, count); 541 for_ftrace_rec_iter(iter) { 542 rec = ftrace_rec_iter_record(iter); 543 remove_breakpoint(rec); 544 } 545 } 546 547 static int 548 ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 549 unsigned const char *new_code) 550 { 551 int ret; 552 553 ret = add_break(ip, old_code); 554 if (ret) 555 goto out; 556 557 run_sync(); 558 559 ret = add_update_code(ip, new_code); 560 if (ret) 561 goto fail_update; 562 563 run_sync(); 564 565 ret = ftrace_write(ip, new_code, 1); 566 if (ret) { 567 ret = -EPERM; 568 goto out; 569 } 570 run_sync(); 571 out: 572 return ret; 573 574 fail_update: 575 probe_kernel_write((void *)ip, &old_code[0], 1); 576 goto out; 577 } 578 579 void arch_ftrace_update_code(int command) 580 { 581 /* See comment above by declaration of modifying_ftrace_code */ 582 atomic_inc(&modifying_ftrace_code); 583 584 ftrace_modify_all_code(command); 585 586 atomic_dec(&modifying_ftrace_code); 587 } 588 589 int __init ftrace_dyn_arch_init(void *data) 590 { 591 /* The return code is retured via data */ 592 *(unsigned long *)data = 0; 593 594 return 0; 595 } 596 #endif 597 598 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 599 600 #ifdef CONFIG_DYNAMIC_FTRACE 601 extern void ftrace_graph_call(void); 602 603 static int ftrace_mod_jmp(unsigned long ip, 604 int old_offset, int new_offset) 605 { 606 unsigned char code[MCOUNT_INSN_SIZE]; 607 608 if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) 609 return -EFAULT; 610 611 if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) 612 return -EINVAL; 613 614 *(int *)(&code[1]) = new_offset; 615 616 if (do_ftrace_mod_code(ip, &code)) 617 return -EPERM; 618 619 return 0; 620 } 621 622 int ftrace_enable_ftrace_graph_caller(void) 623 { 624 unsigned long ip = (unsigned long)(&ftrace_graph_call); 625 int old_offset, new_offset; 626 627 old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); 628 new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); 629 630 return ftrace_mod_jmp(ip, old_offset, new_offset); 631 } 632 633 int ftrace_disable_ftrace_graph_caller(void) 634 { 635 unsigned long ip = (unsigned long)(&ftrace_graph_call); 636 int old_offset, new_offset; 637 638 old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); 639 new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); 640 641 return ftrace_mod_jmp(ip, old_offset, new_offset); 642 } 643 644 #endif /* !CONFIG_DYNAMIC_FTRACE */ 645 646 /* 647 * Hook the return address and push it in the stack of return addrs 648 * in current thread info. 649 */ 650 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, 651 unsigned long frame_pointer) 652 { 653 unsigned long old; 654 int faulted; 655 struct ftrace_graph_ent trace; 656 unsigned long return_hooker = (unsigned long) 657 &return_to_handler; 658 659 if (unlikely(atomic_read(¤t->tracing_graph_pause))) 660 return; 661 662 /* 663 * Protect against fault, even if it shouldn't 664 * happen. This tool is too much intrusive to 665 * ignore such a protection. 666 */ 667 asm volatile( 668 "1: " _ASM_MOV " (%[parent]), %[old]\n" 669 "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" 670 " movl $0, %[faulted]\n" 671 "3:\n" 672 673 ".section .fixup, \"ax\"\n" 674 "4: movl $1, %[faulted]\n" 675 " jmp 3b\n" 676 ".previous\n" 677 678 _ASM_EXTABLE(1b, 4b) 679 _ASM_EXTABLE(2b, 4b) 680 681 : [old] "=&r" (old), [faulted] "=r" (faulted) 682 : [parent] "r" (parent), [return_hooker] "r" (return_hooker) 683 : "memory" 684 ); 685 686 if (unlikely(faulted)) { 687 ftrace_graph_stop(); 688 WARN_ON(1); 689 return; 690 } 691 692 trace.func = self_addr; 693 trace.depth = current->curr_ret_stack + 1; 694 695 /* Only trace if the calling function expects to */ 696 if (!ftrace_graph_entry(&trace)) { 697 *parent = old; 698 return; 699 } 700 701 if (ftrace_push_return_trace(old, self_addr, &trace.depth, 702 frame_pointer) == -EBUSY) { 703 *parent = old; 704 return; 705 } 706 } 707 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 708