1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Dynamic function tracing support. 4 * 5 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 6 * 7 * Thanks goes to Ingo Molnar, for suggesting the idea. 8 * Mathieu Desnoyers, for suggesting postponing the modifications. 9 * Arjan van de Ven, for keeping me straight, and explaining to me 10 * the dangers of modifying code on the run. 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/spinlock.h> 16 #include <linux/hardirq.h> 17 #include <linux/uaccess.h> 18 #include <linux/ftrace.h> 19 #include <linux/percpu.h> 20 #include <linux/sched.h> 21 #include <linux/slab.h> 22 #include <linux/init.h> 23 #include <linux/list.h> 24 #include <linux/module.h> 25 #include <linux/memory.h> 26 #include <linux/vmalloc.h> 27 #include <linux/set_memory.h> 28 #include <linux/execmem.h> 29 30 #include <trace/syscall.h> 31 32 #include <asm/kprobes.h> 33 #include <asm/ftrace.h> 34 #include <asm/nops.h> 35 #include <asm/text-patching.h> 36 37 #ifdef CONFIG_DYNAMIC_FTRACE 38 39 static int ftrace_poke_late = 0; 40 41 void ftrace_arch_code_modify_prepare(void) 42 __acquires(&text_mutex) 43 { 44 /* 45 * Need to grab text_mutex to prevent a race from module loading 46 * and live kernel patching from changing the text permissions while 47 * ftrace has it set to "read/write". 48 */ 49 mutex_lock(&text_mutex); 50 ftrace_poke_late = 1; 51 } 52 53 void ftrace_arch_code_modify_post_process(void) 54 __releases(&text_mutex) 55 { 56 /* 57 * ftrace_make_{call,nop}() may be called during 58 * module load, and we need to finish the text_poke_queue() 59 * that they do, here. 60 */ 61 text_poke_finish(); 62 ftrace_poke_late = 0; 63 mutex_unlock(&text_mutex); 64 } 65 66 static const char *ftrace_nop_replace(void) 67 { 68 return x86_nops[5]; 69 } 70 71 static const char *ftrace_call_replace(unsigned long ip, unsigned long addr) 72 { 73 /* 74 * No need to translate into a callthunk. The trampoline does 75 * the depth accounting itself. 76 */ 77 return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr); 78 } 79 80 static int ftrace_verify_code(unsigned long ip, const char *old_code) 81 { 82 char cur_code[MCOUNT_INSN_SIZE]; 83 84 /* 85 * Note: 86 * We are paranoid about modifying text, as if a bug was to happen, it 87 * could cause us to read or write to someplace that could cause harm. 88 * Carefully read and modify the code with probe_kernel_*(), and make 89 * sure what we read is what we expected it to be before modifying it. 90 */ 91 /* read the text we want to modify */ 92 if (copy_from_kernel_nofault(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { 93 WARN_ON(1); 94 return -EFAULT; 95 } 96 97 /* Make sure it is what we expect it to be */ 98 if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) { 99 ftrace_expected = old_code; 100 WARN_ON(1); 101 return -EINVAL; 102 } 103 104 return 0; 105 } 106 107 /* 108 * Marked __ref because it calls text_poke_early() which is .init.text. That is 109 * ok because that call will happen early, during boot, when .init sections are 110 * still present. 111 */ 112 static int __ref 113 ftrace_modify_code_direct(unsigned long ip, const char *old_code, 114 const char *new_code) 115 { 116 int ret = ftrace_verify_code(ip, old_code); 117 if (ret) 118 return ret; 119 120 /* replace the text with the new text */ 121 if (ftrace_poke_late) 122 text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL); 123 else 124 text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE); 125 return 0; 126 } 127 128 int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) 129 { 130 unsigned long ip = rec->ip; 131 const char *new, *old; 132 133 old = ftrace_call_replace(ip, addr); 134 new = ftrace_nop_replace(); 135 136 /* 137 * On boot up, and when modules are loaded, the MCOUNT_ADDR 138 * is converted to a nop, and will never become MCOUNT_ADDR 139 * again. This code is either running before SMP (on boot up) 140 * or before the code will ever be executed (module load). 141 * We do not want to use the breakpoint version in this case, 142 * just modify the code directly. 143 */ 144 if (addr == MCOUNT_ADDR) 145 return ftrace_modify_code_direct(ip, old, new); 146 147 /* 148 * x86 overrides ftrace_replace_code -- this function will never be used 149 * in this case. 150 */ 151 WARN_ONCE(1, "invalid use of ftrace_make_nop"); 152 return -EINVAL; 153 } 154 155 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 156 { 157 unsigned long ip = rec->ip; 158 const char *new, *old; 159 160 old = ftrace_nop_replace(); 161 new = ftrace_call_replace(ip, addr); 162 163 /* Should only be called when module is loaded */ 164 return ftrace_modify_code_direct(rec->ip, old, new); 165 } 166 167 /* 168 * Should never be called: 169 * As it is only called by __ftrace_replace_code() which is called by 170 * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() 171 * which is called to turn mcount into nops or nops into function calls 172 * but not to convert a function from not using regs to one that uses 173 * regs, which ftrace_modify_call() is for. 174 */ 175 int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, 176 unsigned long addr) 177 { 178 WARN_ON(1); 179 return -EINVAL; 180 } 181 182 int ftrace_update_ftrace_func(ftrace_func_t func) 183 { 184 unsigned long ip; 185 const char *new; 186 187 ip = (unsigned long)(&ftrace_call); 188 new = ftrace_call_replace(ip, (unsigned long)func); 189 text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); 190 191 ip = (unsigned long)(&ftrace_regs_call); 192 new = ftrace_call_replace(ip, (unsigned long)func); 193 text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); 194 195 return 0; 196 } 197 198 void ftrace_replace_code(int enable) 199 { 200 struct ftrace_rec_iter *iter; 201 struct dyn_ftrace *rec; 202 const char *new, *old; 203 int ret; 204 205 for_ftrace_rec_iter(iter) { 206 rec = ftrace_rec_iter_record(iter); 207 208 switch (ftrace_test_record(rec, enable)) { 209 case FTRACE_UPDATE_IGNORE: 210 default: 211 continue; 212 213 case FTRACE_UPDATE_MAKE_CALL: 214 old = ftrace_nop_replace(); 215 break; 216 217 case FTRACE_UPDATE_MODIFY_CALL: 218 case FTRACE_UPDATE_MAKE_NOP: 219 old = ftrace_call_replace(rec->ip, ftrace_get_addr_curr(rec)); 220 break; 221 } 222 223 ret = ftrace_verify_code(rec->ip, old); 224 if (ret) { 225 ftrace_expected = old; 226 ftrace_bug(ret, rec); 227 ftrace_expected = NULL; 228 return; 229 } 230 } 231 232 for_ftrace_rec_iter(iter) { 233 rec = ftrace_rec_iter_record(iter); 234 235 switch (ftrace_test_record(rec, enable)) { 236 case FTRACE_UPDATE_IGNORE: 237 default: 238 continue; 239 240 case FTRACE_UPDATE_MAKE_CALL: 241 case FTRACE_UPDATE_MODIFY_CALL: 242 new = ftrace_call_replace(rec->ip, ftrace_get_addr_new(rec)); 243 break; 244 245 case FTRACE_UPDATE_MAKE_NOP: 246 new = ftrace_nop_replace(); 247 break; 248 } 249 250 text_poke_queue((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL); 251 ftrace_update_record(rec, enable); 252 } 253 text_poke_finish(); 254 } 255 256 void arch_ftrace_update_code(int command) 257 { 258 ftrace_modify_all_code(command); 259 } 260 261 /* Currently only x86_64 supports dynamic trampolines */ 262 #ifdef CONFIG_X86_64 263 264 static inline void *alloc_tramp(unsigned long size) 265 { 266 return execmem_alloc(EXECMEM_FTRACE, size); 267 } 268 static inline void tramp_free(void *tramp) 269 { 270 execmem_free(tramp); 271 } 272 273 /* Defined as markers to the end of the ftrace default trampolines */ 274 extern void ftrace_regs_caller_end(void); 275 extern void ftrace_caller_end(void); 276 extern void ftrace_caller_op_ptr(void); 277 extern void ftrace_regs_caller_op_ptr(void); 278 extern void ftrace_regs_caller_jmp(void); 279 280 /* movq function_trace_op(%rip), %rdx */ 281 /* 0x48 0x8b 0x15 <offset-to-ftrace_trace_op (4 bytes)> */ 282 #define OP_REF_SIZE 7 283 284 /* 285 * The ftrace_ops is passed to the function callback. Since the 286 * trampoline only services a single ftrace_ops, we can pass in 287 * that ops directly. 288 * 289 * The ftrace_op_code_union is used to create a pointer to the 290 * ftrace_ops that will be passed to the callback function. 291 */ 292 union ftrace_op_code_union { 293 char code[OP_REF_SIZE]; 294 struct { 295 char op[3]; 296 int offset; 297 } __attribute__((packed)); 298 }; 299 300 #define RET_SIZE \ 301 (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_MITIGATION_SLS)) 302 303 static unsigned long 304 create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) 305 { 306 unsigned long start_offset; 307 unsigned long end_offset; 308 unsigned long op_offset; 309 unsigned long call_offset; 310 unsigned long jmp_offset; 311 unsigned long offset; 312 unsigned long npages; 313 unsigned long size; 314 unsigned long *ptr; 315 void *trampoline; 316 void *ip, *dest; 317 /* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */ 318 unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; 319 unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE }; 320 union ftrace_op_code_union op_ptr; 321 int ret; 322 323 if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { 324 start_offset = (unsigned long)ftrace_regs_caller; 325 end_offset = (unsigned long)ftrace_regs_caller_end; 326 op_offset = (unsigned long)ftrace_regs_caller_op_ptr; 327 call_offset = (unsigned long)ftrace_regs_call; 328 jmp_offset = (unsigned long)ftrace_regs_caller_jmp; 329 } else { 330 start_offset = (unsigned long)ftrace_caller; 331 end_offset = (unsigned long)ftrace_caller_end; 332 op_offset = (unsigned long)ftrace_caller_op_ptr; 333 call_offset = (unsigned long)ftrace_call; 334 jmp_offset = 0; 335 } 336 337 size = end_offset - start_offset; 338 339 /* 340 * Allocate enough size to store the ftrace_caller code, 341 * the iret , as well as the address of the ftrace_ops this 342 * trampoline is used for. 343 */ 344 trampoline = alloc_tramp(size + RET_SIZE + sizeof(void *)); 345 if (!trampoline) 346 return 0; 347 348 *tramp_size = size + RET_SIZE + sizeof(void *); 349 npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE); 350 351 /* Copy ftrace_caller onto the trampoline memory */ 352 ret = copy_from_kernel_nofault(trampoline, (void *)start_offset, size); 353 if (WARN_ON(ret < 0)) 354 goto fail; 355 356 ip = trampoline + size; 357 if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) 358 __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); 359 else 360 memcpy(ip, retq, sizeof(retq)); 361 362 /* No need to test direct calls on created trampolines */ 363 if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { 364 /* NOP the jnz 1f; but make sure it's a 2 byte jnz */ 365 ip = trampoline + (jmp_offset - start_offset); 366 if (WARN_ON(*(char *)ip != 0x75)) 367 goto fail; 368 ret = copy_from_kernel_nofault(ip, x86_nops[2], 2); 369 if (ret < 0) 370 goto fail; 371 } 372 373 /* 374 * The address of the ftrace_ops that is used for this trampoline 375 * is stored at the end of the trampoline. This will be used to 376 * load the third parameter for the callback. Basically, that 377 * location at the end of the trampoline takes the place of 378 * the global function_trace_op variable. 379 */ 380 381 ptr = (unsigned long *)(trampoline + size + RET_SIZE); 382 *ptr = (unsigned long)ops; 383 384 op_offset -= start_offset; 385 memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE); 386 387 /* Are we pointing to the reference? */ 388 if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0)) 389 goto fail; 390 391 /* Load the contents of ptr into the callback parameter */ 392 offset = (unsigned long)ptr; 393 offset -= (unsigned long)trampoline + op_offset + OP_REF_SIZE; 394 395 op_ptr.offset = offset; 396 397 /* put in the new offset to the ftrace_ops */ 398 memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); 399 400 /* put in the call to the function */ 401 mutex_lock(&text_mutex); 402 call_offset -= start_offset; 403 /* 404 * No need to translate into a callthunk. The trampoline does 405 * the depth accounting before the call already. 406 */ 407 dest = ftrace_ops_get_func(ops); 408 memcpy(trampoline + call_offset, 409 text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest), 410 CALL_INSN_SIZE); 411 mutex_unlock(&text_mutex); 412 413 /* ALLOC_TRAMP flags lets us know we created it */ 414 ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP; 415 416 set_memory_rox((unsigned long)trampoline, npages); 417 return (unsigned long)trampoline; 418 fail: 419 tramp_free(trampoline); 420 return 0; 421 } 422 423 void set_ftrace_ops_ro(void) 424 { 425 struct ftrace_ops *ops; 426 unsigned long start_offset; 427 unsigned long end_offset; 428 unsigned long npages; 429 unsigned long size; 430 431 do_for_each_ftrace_op(ops, ftrace_ops_list) { 432 if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) 433 continue; 434 435 if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { 436 start_offset = (unsigned long)ftrace_regs_caller; 437 end_offset = (unsigned long)ftrace_regs_caller_end; 438 } else { 439 start_offset = (unsigned long)ftrace_caller; 440 end_offset = (unsigned long)ftrace_caller_end; 441 } 442 size = end_offset - start_offset; 443 size = size + RET_SIZE + sizeof(void *); 444 npages = DIV_ROUND_UP(size, PAGE_SIZE); 445 set_memory_ro((unsigned long)ops->trampoline, npages); 446 } while_for_each_ftrace_op(ops); 447 } 448 449 static unsigned long calc_trampoline_call_offset(bool save_regs) 450 { 451 unsigned long start_offset; 452 unsigned long call_offset; 453 454 if (save_regs) { 455 start_offset = (unsigned long)ftrace_regs_caller; 456 call_offset = (unsigned long)ftrace_regs_call; 457 } else { 458 start_offset = (unsigned long)ftrace_caller; 459 call_offset = (unsigned long)ftrace_call; 460 } 461 462 return call_offset - start_offset; 463 } 464 465 void arch_ftrace_update_trampoline(struct ftrace_ops *ops) 466 { 467 ftrace_func_t func; 468 unsigned long offset; 469 unsigned long ip; 470 unsigned int size; 471 const char *new; 472 473 if (!ops->trampoline) { 474 ops->trampoline = create_trampoline(ops, &size); 475 if (!ops->trampoline) 476 return; 477 ops->trampoline_size = size; 478 return; 479 } 480 481 /* 482 * The ftrace_ops caller may set up its own trampoline. 483 * In such a case, this code must not modify it. 484 */ 485 if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) 486 return; 487 488 offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); 489 ip = ops->trampoline + offset; 490 func = ftrace_ops_get_func(ops); 491 492 mutex_lock(&text_mutex); 493 /* Do a safe modify in case the trampoline is executing */ 494 new = ftrace_call_replace(ip, (unsigned long)func); 495 text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); 496 mutex_unlock(&text_mutex); 497 } 498 499 /* Return the address of the function the trampoline calls */ 500 static void *addr_from_call(void *ptr) 501 { 502 union text_poke_insn call; 503 int ret; 504 505 ret = copy_from_kernel_nofault(&call, ptr, CALL_INSN_SIZE); 506 if (WARN_ON_ONCE(ret < 0)) 507 return NULL; 508 509 /* Make sure this is a call */ 510 if (WARN_ON_ONCE(call.opcode != CALL_INSN_OPCODE)) { 511 pr_warn("Expected E8, got %x\n", call.opcode); 512 return NULL; 513 } 514 515 return ptr + CALL_INSN_SIZE + call.disp; 516 } 517 518 /* 519 * If the ops->trampoline was not allocated, then it probably 520 * has a static trampoline func, or is the ftrace caller itself. 521 */ 522 static void *static_tramp_func(struct ftrace_ops *ops, struct dyn_ftrace *rec) 523 { 524 unsigned long offset; 525 bool save_regs = rec->flags & FTRACE_FL_REGS_EN; 526 void *ptr; 527 528 if (ops && ops->trampoline) { 529 #if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) && \ 530 defined(CONFIG_FUNCTION_GRAPH_TRACER) 531 /* 532 * We only know about function graph tracer setting as static 533 * trampoline. 534 */ 535 if (ops->trampoline == FTRACE_GRAPH_ADDR) 536 return (void *)prepare_ftrace_return; 537 #endif 538 return NULL; 539 } 540 541 offset = calc_trampoline_call_offset(save_regs); 542 543 if (save_regs) 544 ptr = (void *)FTRACE_REGS_ADDR + offset; 545 else 546 ptr = (void *)FTRACE_ADDR + offset; 547 548 return addr_from_call(ptr); 549 } 550 551 void *arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec) 552 { 553 unsigned long offset; 554 555 /* If we didn't allocate this trampoline, consider it static */ 556 if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) 557 return static_tramp_func(ops, rec); 558 559 offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); 560 return addr_from_call((void *)ops->trampoline + offset); 561 } 562 563 void arch_ftrace_trampoline_free(struct ftrace_ops *ops) 564 { 565 if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) 566 return; 567 568 tramp_free((void *)ops->trampoline); 569 ops->trampoline = 0; 570 } 571 572 #endif /* CONFIG_X86_64 */ 573 #endif /* CONFIG_DYNAMIC_FTRACE */ 574 575 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 576 577 #if defined(CONFIG_DYNAMIC_FTRACE) && !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) 578 extern void ftrace_graph_call(void); 579 static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) 580 { 581 return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr); 582 } 583 584 static int ftrace_mod_jmp(unsigned long ip, void *func) 585 { 586 const char *new; 587 588 new = ftrace_jmp_replace(ip, (unsigned long)func); 589 text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); 590 return 0; 591 } 592 593 int ftrace_enable_ftrace_graph_caller(void) 594 { 595 unsigned long ip = (unsigned long)(&ftrace_graph_call); 596 597 return ftrace_mod_jmp(ip, &ftrace_graph_caller); 598 } 599 600 int ftrace_disable_ftrace_graph_caller(void) 601 { 602 unsigned long ip = (unsigned long)(&ftrace_graph_call); 603 604 return ftrace_mod_jmp(ip, &ftrace_stub); 605 } 606 #endif /* CONFIG_DYNAMIC_FTRACE && !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ 607 608 /* 609 * Hook the return address and push it in the stack of return addrs 610 * in current thread info. 611 */ 612 void prepare_ftrace_return(unsigned long ip, unsigned long *parent, 613 unsigned long frame_pointer) 614 { 615 unsigned long return_hooker = (unsigned long)&return_to_handler; 616 int bit; 617 618 /* 619 * When resuming from suspend-to-ram, this function can be indirectly 620 * called from early CPU startup code while the CPU is in real mode, 621 * which would fail miserably. Make sure the stack pointer is a 622 * virtual address. 623 * 624 * This check isn't as accurate as virt_addr_valid(), but it should be 625 * good enough for this purpose, and it's fast. 626 */ 627 if (unlikely((long)__builtin_frame_address(0) >= 0)) 628 return; 629 630 if (unlikely(ftrace_graph_is_dead())) 631 return; 632 633 if (unlikely(atomic_read(¤t->tracing_graph_pause))) 634 return; 635 636 bit = ftrace_test_recursion_trylock(ip, *parent); 637 if (bit < 0) 638 return; 639 640 if (!function_graph_enter(*parent, ip, frame_pointer, parent)) 641 *parent = return_hooker; 642 643 ftrace_test_recursion_unlock(bit); 644 } 645 646 #ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS 647 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, 648 struct ftrace_ops *op, struct ftrace_regs *fregs) 649 { 650 struct pt_regs *regs = &fregs->regs; 651 unsigned long *stack = (unsigned long *)kernel_stack_pointer(regs); 652 653 prepare_ftrace_return(ip, (unsigned long *)stack, 0); 654 } 655 #endif 656 657 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 658