1 /* 2 * Code for replacing ftrace calls with jumps. 3 * 4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 5 * 6 * Thanks goes to Ingo Molnar, for suggesting the idea. 7 * Mathieu Desnoyers, for suggesting postponing the modifications. 8 * Arjan van de Ven, for keeping me straight, and explaining to me 9 * the dangers of modifying code on the run. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/spinlock.h> 15 #include <linux/hardirq.h> 16 #include <linux/uaccess.h> 17 #include <linux/ftrace.h> 18 #include <linux/percpu.h> 19 #include <linux/sched.h> 20 #include <linux/init.h> 21 #include <linux/list.h> 22 23 #include <trace/syscall.h> 24 25 #include <asm/cacheflush.h> 26 #include <asm/ftrace.h> 27 #include <asm/nops.h> 28 #include <asm/nmi.h> 29 30 31 #ifdef CONFIG_DYNAMIC_FTRACE 32 33 /* 34 * modifying_code is set to notify NMIs that they need to use 35 * memory barriers when entering or exiting. But we don't want 36 * to burden NMIs with unnecessary memory barriers when code 37 * modification is not being done (which is most of the time). 38 * 39 * A mutex is already held when ftrace_arch_code_modify_prepare 40 * and post_process are called. No locks need to be taken here. 41 * 42 * Stop machine will make sure currently running NMIs are done 43 * and new NMIs will see the updated variable before we need 44 * to worry about NMIs doing memory barriers. 45 */ 46 static int modifying_code __read_mostly; 47 static DEFINE_PER_CPU(int, save_modifying_code); 48 49 int ftrace_arch_code_modify_prepare(void) 50 { 51 set_kernel_text_rw(); 52 modifying_code = 1; 53 return 0; 54 } 55 56 int ftrace_arch_code_modify_post_process(void) 57 { 58 modifying_code = 0; 59 set_kernel_text_ro(); 60 return 0; 61 } 62 63 union ftrace_code_union { 64 char code[MCOUNT_INSN_SIZE]; 65 struct { 66 char e8; 67 int offset; 68 } __attribute__((packed)); 69 }; 70 71 static int ftrace_calc_offset(long ip, long addr) 72 { 73 return (int)(addr - ip); 74 } 75 76 static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) 77 { 78 static union ftrace_code_union calc; 79 80 calc.e8 = 0xe8; 81 calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); 82 83 /* 84 * No locking needed, this must be called via kstop_machine 85 * which in essence is like running on a uniprocessor machine. 86 */ 87 return calc.code; 88 } 89 90 /* 91 * Modifying code must take extra care. On an SMP machine, if 92 * the code being modified is also being executed on another CPU 93 * that CPU will have undefined results and possibly take a GPF. 94 * We use kstop_machine to stop other CPUS from exectuing code. 95 * But this does not stop NMIs from happening. We still need 96 * to protect against that. We separate out the modification of 97 * the code to take care of this. 98 * 99 * Two buffers are added: An IP buffer and a "code" buffer. 100 * 101 * 1) Put the instruction pointer into the IP buffer 102 * and the new code into the "code" buffer. 103 * 2) Wait for any running NMIs to finish and set a flag that says 104 * we are modifying code, it is done in an atomic operation. 105 * 3) Write the code 106 * 4) clear the flag. 107 * 5) Wait for any running NMIs to finish. 108 * 109 * If an NMI is executed, the first thing it does is to call 110 * "ftrace_nmi_enter". This will check if the flag is set to write 111 * and if it is, it will write what is in the IP and "code" buffers. 112 * 113 * The trick is, it does not matter if everyone is writing the same 114 * content to the code location. Also, if a CPU is executing code 115 * it is OK to write to that code location if the contents being written 116 * are the same as what exists. 117 */ 118 119 #define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */ 120 static atomic_t nmi_running = ATOMIC_INIT(0); 121 static int mod_code_status; /* holds return value of text write */ 122 static void *mod_code_ip; /* holds the IP to write to */ 123 static void *mod_code_newcode; /* holds the text to write to the IP */ 124 125 static unsigned nmi_wait_count; 126 static atomic_t nmi_update_count = ATOMIC_INIT(0); 127 128 int ftrace_arch_read_dyn_info(char *buf, int size) 129 { 130 int r; 131 132 r = snprintf(buf, size, "%u %u", 133 nmi_wait_count, 134 atomic_read(&nmi_update_count)); 135 return r; 136 } 137 138 static void clear_mod_flag(void) 139 { 140 int old = atomic_read(&nmi_running); 141 142 for (;;) { 143 int new = old & ~MOD_CODE_WRITE_FLAG; 144 145 if (old == new) 146 break; 147 148 old = atomic_cmpxchg(&nmi_running, old, new); 149 } 150 } 151 152 static void ftrace_mod_code(void) 153 { 154 /* 155 * Yes, more than one CPU process can be writing to mod_code_status. 156 * (and the code itself) 157 * But if one were to fail, then they all should, and if one were 158 * to succeed, then they all should. 159 */ 160 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, 161 MCOUNT_INSN_SIZE); 162 163 /* if we fail, then kill any new writers */ 164 if (mod_code_status) 165 clear_mod_flag(); 166 } 167 168 void ftrace_nmi_enter(void) 169 { 170 __get_cpu_var(save_modifying_code) = modifying_code; 171 172 if (!__get_cpu_var(save_modifying_code)) 173 return; 174 175 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { 176 smp_rmb(); 177 ftrace_mod_code(); 178 atomic_inc(&nmi_update_count); 179 } 180 /* Must have previous changes seen before executions */ 181 smp_mb(); 182 } 183 184 void ftrace_nmi_exit(void) 185 { 186 if (!__get_cpu_var(save_modifying_code)) 187 return; 188 189 /* Finish all executions before clearing nmi_running */ 190 smp_mb(); 191 atomic_dec(&nmi_running); 192 } 193 194 static void wait_for_nmi_and_set_mod_flag(void) 195 { 196 if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)) 197 return; 198 199 do { 200 cpu_relax(); 201 } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)); 202 203 nmi_wait_count++; 204 } 205 206 static void wait_for_nmi(void) 207 { 208 if (!atomic_read(&nmi_running)) 209 return; 210 211 do { 212 cpu_relax(); 213 } while (atomic_read(&nmi_running)); 214 215 nmi_wait_count++; 216 } 217 218 static inline int 219 within(unsigned long addr, unsigned long start, unsigned long end) 220 { 221 return addr >= start && addr < end; 222 } 223 224 static int 225 do_ftrace_mod_code(unsigned long ip, void *new_code) 226 { 227 /* 228 * On x86_64, kernel text mappings are mapped read-only with 229 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead 230 * of the kernel text mapping to modify the kernel text. 231 * 232 * For 32bit kernels, these mappings are same and we can use 233 * kernel identity mapping to modify code. 234 */ 235 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 236 ip = (unsigned long)__va(__pa(ip)); 237 238 mod_code_ip = (void *)ip; 239 mod_code_newcode = new_code; 240 241 /* The buffers need to be visible before we let NMIs write them */ 242 smp_mb(); 243 244 wait_for_nmi_and_set_mod_flag(); 245 246 /* Make sure all running NMIs have finished before we write the code */ 247 smp_mb(); 248 249 ftrace_mod_code(); 250 251 /* Make sure the write happens before clearing the bit */ 252 smp_mb(); 253 254 clear_mod_flag(); 255 wait_for_nmi(); 256 257 return mod_code_status; 258 } 259 260 261 262 263 static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; 264 265 static unsigned char *ftrace_nop_replace(void) 266 { 267 return ftrace_nop; 268 } 269 270 static int 271 ftrace_modify_code(unsigned long ip, unsigned char *old_code, 272 unsigned char *new_code) 273 { 274 unsigned char replaced[MCOUNT_INSN_SIZE]; 275 276 /* 277 * Note: Due to modules and __init, code can 278 * disappear and change, we need to protect against faulting 279 * as well as code changing. We do this by using the 280 * probe_kernel_* functions. 281 * 282 * No real locking needed, this code is run through 283 * kstop_machine, or before SMP starts. 284 */ 285 286 /* read the text we want to modify */ 287 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 288 return -EFAULT; 289 290 /* Make sure it is what we expect it to be */ 291 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) 292 return -EINVAL; 293 294 /* replace the text with the new text */ 295 if (do_ftrace_mod_code(ip, new_code)) 296 return -EPERM; 297 298 sync_core(); 299 300 return 0; 301 } 302 303 int ftrace_make_nop(struct module *mod, 304 struct dyn_ftrace *rec, unsigned long addr) 305 { 306 unsigned char *new, *old; 307 unsigned long ip = rec->ip; 308 309 old = ftrace_call_replace(ip, addr); 310 new = ftrace_nop_replace(); 311 312 return ftrace_modify_code(rec->ip, old, new); 313 } 314 315 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 316 { 317 unsigned char *new, *old; 318 unsigned long ip = rec->ip; 319 320 old = ftrace_nop_replace(); 321 new = ftrace_call_replace(ip, addr); 322 323 return ftrace_modify_code(rec->ip, old, new); 324 } 325 326 int ftrace_update_ftrace_func(ftrace_func_t func) 327 { 328 unsigned long ip = (unsigned long)(&ftrace_call); 329 unsigned char old[MCOUNT_INSN_SIZE], *new; 330 int ret; 331 332 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); 333 new = ftrace_call_replace(ip, (unsigned long)func); 334 ret = ftrace_modify_code(ip, old, new); 335 336 return ret; 337 } 338 339 int __init ftrace_dyn_arch_init(void *data) 340 { 341 extern const unsigned char ftrace_test_p6nop[]; 342 extern const unsigned char ftrace_test_nop5[]; 343 extern const unsigned char ftrace_test_jmp[]; 344 int faulted = 0; 345 346 /* 347 * There is no good nop for all x86 archs. 348 * We will default to using the P6_NOP5, but first we 349 * will test to make sure that the nop will actually 350 * work on this CPU. If it faults, we will then 351 * go to a lesser efficient 5 byte nop. If that fails 352 * we then just use a jmp as our nop. This isn't the most 353 * efficient nop, but we can not use a multi part nop 354 * since we would then risk being preempted in the middle 355 * of that nop, and if we enabled tracing then, it might 356 * cause a system crash. 357 * 358 * TODO: check the cpuid to determine the best nop. 359 */ 360 asm volatile ( 361 "ftrace_test_jmp:" 362 "jmp ftrace_test_p6nop\n" 363 "nop\n" 364 "nop\n" 365 "nop\n" /* 2 byte jmp + 3 bytes */ 366 "ftrace_test_p6nop:" 367 P6_NOP5 368 "jmp 1f\n" 369 "ftrace_test_nop5:" 370 ".byte 0x66,0x66,0x66,0x66,0x90\n" 371 "1:" 372 ".section .fixup, \"ax\"\n" 373 "2: movl $1, %0\n" 374 " jmp ftrace_test_nop5\n" 375 "3: movl $2, %0\n" 376 " jmp 1b\n" 377 ".previous\n" 378 _ASM_EXTABLE(ftrace_test_p6nop, 2b) 379 _ASM_EXTABLE(ftrace_test_nop5, 3b) 380 : "=r"(faulted) : "0" (faulted)); 381 382 switch (faulted) { 383 case 0: 384 pr_info("converting mcount calls to 0f 1f 44 00 00\n"); 385 memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); 386 break; 387 case 1: 388 pr_info("converting mcount calls to 66 66 66 66 90\n"); 389 memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); 390 break; 391 case 2: 392 pr_info("converting mcount calls to jmp . + 5\n"); 393 memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); 394 break; 395 } 396 397 /* The return code is retured via data */ 398 *(unsigned long *)data = 0; 399 400 return 0; 401 } 402 #endif 403 404 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 405 406 #ifdef CONFIG_DYNAMIC_FTRACE 407 extern void ftrace_graph_call(void); 408 409 static int ftrace_mod_jmp(unsigned long ip, 410 int old_offset, int new_offset) 411 { 412 unsigned char code[MCOUNT_INSN_SIZE]; 413 414 if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) 415 return -EFAULT; 416 417 if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) 418 return -EINVAL; 419 420 *(int *)(&code[1]) = new_offset; 421 422 if (do_ftrace_mod_code(ip, &code)) 423 return -EPERM; 424 425 return 0; 426 } 427 428 int ftrace_enable_ftrace_graph_caller(void) 429 { 430 unsigned long ip = (unsigned long)(&ftrace_graph_call); 431 int old_offset, new_offset; 432 433 old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); 434 new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); 435 436 return ftrace_mod_jmp(ip, old_offset, new_offset); 437 } 438 439 int ftrace_disable_ftrace_graph_caller(void) 440 { 441 unsigned long ip = (unsigned long)(&ftrace_graph_call); 442 int old_offset, new_offset; 443 444 old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); 445 new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); 446 447 return ftrace_mod_jmp(ip, old_offset, new_offset); 448 } 449 450 #endif /* !CONFIG_DYNAMIC_FTRACE */ 451 452 /* 453 * Hook the return address and push it in the stack of return addrs 454 * in current thread info. 455 */ 456 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, 457 unsigned long frame_pointer) 458 { 459 unsigned long old; 460 int faulted; 461 struct ftrace_graph_ent trace; 462 unsigned long return_hooker = (unsigned long) 463 &return_to_handler; 464 465 if (unlikely(atomic_read(¤t->tracing_graph_pause))) 466 return; 467 468 /* 469 * Protect against fault, even if it shouldn't 470 * happen. This tool is too much intrusive to 471 * ignore such a protection. 472 */ 473 asm volatile( 474 "1: " _ASM_MOV " (%[parent]), %[old]\n" 475 "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" 476 " movl $0, %[faulted]\n" 477 "3:\n" 478 479 ".section .fixup, \"ax\"\n" 480 "4: movl $1, %[faulted]\n" 481 " jmp 3b\n" 482 ".previous\n" 483 484 _ASM_EXTABLE(1b, 4b) 485 _ASM_EXTABLE(2b, 4b) 486 487 : [old] "=&r" (old), [faulted] "=r" (faulted) 488 : [parent] "r" (parent), [return_hooker] "r" (return_hooker) 489 : "memory" 490 ); 491 492 if (unlikely(faulted)) { 493 ftrace_graph_stop(); 494 WARN_ON(1); 495 return; 496 } 497 498 if (ftrace_push_return_trace(old, self_addr, &trace.depth, 499 frame_pointer) == -EBUSY) { 500 *parent = old; 501 return; 502 } 503 504 trace.func = self_addr; 505 506 /* Only trace if the calling function expects to */ 507 if (!ftrace_graph_entry(&trace)) { 508 current->curr_ret_stack--; 509 *parent = old; 510 } 511 } 512 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 513