1 /* 2 * Code for replacing ftrace calls with jumps. 3 * 4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 5 * 6 * Thanks goes to Ingo Molnar, for suggesting the idea. 7 * Mathieu Desnoyers, for suggesting postponing the modifications. 8 * Arjan van de Ven, for keeping me straight, and explaining to me 9 * the dangers of modifying code on the run. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/spinlock.h> 15 #include <linux/hardirq.h> 16 #include <linux/uaccess.h> 17 #include <linux/ftrace.h> 18 #include <linux/percpu.h> 19 #include <linux/sched.h> 20 #include <linux/init.h> 21 #include <linux/list.h> 22 23 #include <trace/syscall.h> 24 25 #include <asm/cacheflush.h> 26 #include <asm/ftrace.h> 27 #include <asm/nops.h> 28 #include <asm/nmi.h> 29 30 31 #ifdef CONFIG_DYNAMIC_FTRACE 32 33 /* 34 * modifying_code is set to notify NMIs that they need to use 35 * memory barriers when entering or exiting. But we don't want 36 * to burden NMIs with unnecessary memory barriers when code 37 * modification is not being done (which is most of the time). 38 * 39 * A mutex is already held when ftrace_arch_code_modify_prepare 40 * and post_process are called. No locks need to be taken here. 41 * 42 * Stop machine will make sure currently running NMIs are done 43 * and new NMIs will see the updated variable before we need 44 * to worry about NMIs doing memory barriers. 45 */ 46 static int modifying_code __read_mostly; 47 static DEFINE_PER_CPU(int, save_modifying_code); 48 49 int ftrace_arch_code_modify_prepare(void) 50 { 51 set_kernel_text_rw(); 52 modifying_code = 1; 53 return 0; 54 } 55 56 int ftrace_arch_code_modify_post_process(void) 57 { 58 modifying_code = 0; 59 set_kernel_text_ro(); 60 return 0; 61 } 62 63 union ftrace_code_union { 64 char code[MCOUNT_INSN_SIZE]; 65 struct { 66 char e8; 67 int offset; 68 } __attribute__((packed)); 69 }; 70 71 static int ftrace_calc_offset(long ip, long addr) 72 { 73 return (int)(addr - ip); 74 } 75 76 static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) 77 { 78 static union ftrace_code_union calc; 79 80 calc.e8 = 0xe8; 81 calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); 82 83 /* 84 * No locking needed, this must be called via kstop_machine 85 * which in essence is like running on a uniprocessor machine. 86 */ 87 return calc.code; 88 } 89 90 /* 91 * Modifying code must take extra care. On an SMP machine, if 92 * the code being modified is also being executed on another CPU 93 * that CPU will have undefined results and possibly take a GPF. 94 * We use kstop_machine to stop other CPUS from exectuing code. 95 * But this does not stop NMIs from happening. We still need 96 * to protect against that. We separate out the modification of 97 * the code to take care of this. 98 * 99 * Two buffers are added: An IP buffer and a "code" buffer. 100 * 101 * 1) Put the instruction pointer into the IP buffer 102 * and the new code into the "code" buffer. 103 * 2) Wait for any running NMIs to finish and set a flag that says 104 * we are modifying code, it is done in an atomic operation. 105 * 3) Write the code 106 * 4) clear the flag. 107 * 5) Wait for any running NMIs to finish. 108 * 109 * If an NMI is executed, the first thing it does is to call 110 * "ftrace_nmi_enter". This will check if the flag is set to write 111 * and if it is, it will write what is in the IP and "code" buffers. 112 * 113 * The trick is, it does not matter if everyone is writing the same 114 * content to the code location. Also, if a CPU is executing code 115 * it is OK to write to that code location if the contents being written 116 * are the same as what exists. 117 */ 118 119 #define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */ 120 static atomic_t nmi_running = ATOMIC_INIT(0); 121 static int mod_code_status; /* holds return value of text write */ 122 static void *mod_code_ip; /* holds the IP to write to */ 123 static void *mod_code_newcode; /* holds the text to write to the IP */ 124 125 static unsigned nmi_wait_count; 126 static atomic_t nmi_update_count = ATOMIC_INIT(0); 127 128 int ftrace_arch_read_dyn_info(char *buf, int size) 129 { 130 int r; 131 132 r = snprintf(buf, size, "%u %u", 133 nmi_wait_count, 134 atomic_read(&nmi_update_count)); 135 return r; 136 } 137 138 static void clear_mod_flag(void) 139 { 140 int old = atomic_read(&nmi_running); 141 142 for (;;) { 143 int new = old & ~MOD_CODE_WRITE_FLAG; 144 145 if (old == new) 146 break; 147 148 old = atomic_cmpxchg(&nmi_running, old, new); 149 } 150 } 151 152 static void ftrace_mod_code(void) 153 { 154 /* 155 * Yes, more than one CPU process can be writing to mod_code_status. 156 * (and the code itself) 157 * But if one were to fail, then they all should, and if one were 158 * to succeed, then they all should. 159 */ 160 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, 161 MCOUNT_INSN_SIZE); 162 163 /* if we fail, then kill any new writers */ 164 if (mod_code_status) 165 clear_mod_flag(); 166 } 167 168 void ftrace_nmi_enter(void) 169 { 170 __get_cpu_var(save_modifying_code) = modifying_code; 171 172 if (!__get_cpu_var(save_modifying_code)) 173 return; 174 175 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { 176 smp_rmb(); 177 ftrace_mod_code(); 178 atomic_inc(&nmi_update_count); 179 } 180 /* Must have previous changes seen before executions */ 181 smp_mb(); 182 } 183 184 void ftrace_nmi_exit(void) 185 { 186 if (!__get_cpu_var(save_modifying_code)) 187 return; 188 189 /* Finish all executions before clearing nmi_running */ 190 smp_mb(); 191 atomic_dec(&nmi_running); 192 } 193 194 static void wait_for_nmi_and_set_mod_flag(void) 195 { 196 if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)) 197 return; 198 199 do { 200 cpu_relax(); 201 } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)); 202 203 nmi_wait_count++; 204 } 205 206 static void wait_for_nmi(void) 207 { 208 if (!atomic_read(&nmi_running)) 209 return; 210 211 do { 212 cpu_relax(); 213 } while (atomic_read(&nmi_running)); 214 215 nmi_wait_count++; 216 } 217 218 static inline int 219 within(unsigned long addr, unsigned long start, unsigned long end) 220 { 221 return addr >= start && addr < end; 222 } 223 224 static int 225 do_ftrace_mod_code(unsigned long ip, void *new_code) 226 { 227 /* 228 * On x86_64, kernel text mappings are mapped read-only with 229 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead 230 * of the kernel text mapping to modify the kernel text. 231 * 232 * For 32bit kernels, these mappings are same and we can use 233 * kernel identity mapping to modify code. 234 */ 235 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 236 ip = (unsigned long)__va(__pa(ip)); 237 238 mod_code_ip = (void *)ip; 239 mod_code_newcode = new_code; 240 241 /* The buffers need to be visible before we let NMIs write them */ 242 smp_mb(); 243 244 wait_for_nmi_and_set_mod_flag(); 245 246 /* Make sure all running NMIs have finished before we write the code */ 247 smp_mb(); 248 249 ftrace_mod_code(); 250 251 /* Make sure the write happens before clearing the bit */ 252 smp_mb(); 253 254 clear_mod_flag(); 255 wait_for_nmi(); 256 257 return mod_code_status; 258 } 259 260 static unsigned char *ftrace_nop_replace(void) 261 { 262 return ideal_nop5; 263 } 264 265 static int 266 ftrace_modify_code(unsigned long ip, unsigned char *old_code, 267 unsigned char *new_code) 268 { 269 unsigned char replaced[MCOUNT_INSN_SIZE]; 270 271 /* 272 * Note: Due to modules and __init, code can 273 * disappear and change, we need to protect against faulting 274 * as well as code changing. We do this by using the 275 * probe_kernel_* functions. 276 * 277 * No real locking needed, this code is run through 278 * kstop_machine, or before SMP starts. 279 */ 280 281 /* read the text we want to modify */ 282 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 283 return -EFAULT; 284 285 /* Make sure it is what we expect it to be */ 286 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) 287 return -EINVAL; 288 289 /* replace the text with the new text */ 290 if (do_ftrace_mod_code(ip, new_code)) 291 return -EPERM; 292 293 sync_core(); 294 295 return 0; 296 } 297 298 int ftrace_make_nop(struct module *mod, 299 struct dyn_ftrace *rec, unsigned long addr) 300 { 301 unsigned char *new, *old; 302 unsigned long ip = rec->ip; 303 304 old = ftrace_call_replace(ip, addr); 305 new = ftrace_nop_replace(); 306 307 return ftrace_modify_code(rec->ip, old, new); 308 } 309 310 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 311 { 312 unsigned char *new, *old; 313 unsigned long ip = rec->ip; 314 315 old = ftrace_nop_replace(); 316 new = ftrace_call_replace(ip, addr); 317 318 return ftrace_modify_code(rec->ip, old, new); 319 } 320 321 int ftrace_update_ftrace_func(ftrace_func_t func) 322 { 323 unsigned long ip = (unsigned long)(&ftrace_call); 324 unsigned char old[MCOUNT_INSN_SIZE], *new; 325 int ret; 326 327 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); 328 new = ftrace_call_replace(ip, (unsigned long)func); 329 ret = ftrace_modify_code(ip, old, new); 330 331 return ret; 332 } 333 334 int __init ftrace_dyn_arch_init(void *data) 335 { 336 /* The return code is retured via data */ 337 *(unsigned long *)data = 0; 338 339 return 0; 340 } 341 #endif 342 343 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 344 345 #ifdef CONFIG_DYNAMIC_FTRACE 346 extern void ftrace_graph_call(void); 347 348 static int ftrace_mod_jmp(unsigned long ip, 349 int old_offset, int new_offset) 350 { 351 unsigned char code[MCOUNT_INSN_SIZE]; 352 353 if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) 354 return -EFAULT; 355 356 if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) 357 return -EINVAL; 358 359 *(int *)(&code[1]) = new_offset; 360 361 if (do_ftrace_mod_code(ip, &code)) 362 return -EPERM; 363 364 return 0; 365 } 366 367 int ftrace_enable_ftrace_graph_caller(void) 368 { 369 unsigned long ip = (unsigned long)(&ftrace_graph_call); 370 int old_offset, new_offset; 371 372 old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); 373 new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); 374 375 return ftrace_mod_jmp(ip, old_offset, new_offset); 376 } 377 378 int ftrace_disable_ftrace_graph_caller(void) 379 { 380 unsigned long ip = (unsigned long)(&ftrace_graph_call); 381 int old_offset, new_offset; 382 383 old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); 384 new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); 385 386 return ftrace_mod_jmp(ip, old_offset, new_offset); 387 } 388 389 #endif /* !CONFIG_DYNAMIC_FTRACE */ 390 391 /* 392 * Hook the return address and push it in the stack of return addrs 393 * in current thread info. 394 */ 395 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, 396 unsigned long frame_pointer) 397 { 398 unsigned long old; 399 int faulted; 400 struct ftrace_graph_ent trace; 401 unsigned long return_hooker = (unsigned long) 402 &return_to_handler; 403 404 if (unlikely(atomic_read(¤t->tracing_graph_pause))) 405 return; 406 407 /* 408 * Protect against fault, even if it shouldn't 409 * happen. This tool is too much intrusive to 410 * ignore such a protection. 411 */ 412 asm volatile( 413 "1: " _ASM_MOV " (%[parent]), %[old]\n" 414 "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" 415 " movl $0, %[faulted]\n" 416 "3:\n" 417 418 ".section .fixup, \"ax\"\n" 419 "4: movl $1, %[faulted]\n" 420 " jmp 3b\n" 421 ".previous\n" 422 423 _ASM_EXTABLE(1b, 4b) 424 _ASM_EXTABLE(2b, 4b) 425 426 : [old] "=&r" (old), [faulted] "=r" (faulted) 427 : [parent] "r" (parent), [return_hooker] "r" (return_hooker) 428 : "memory" 429 ); 430 431 if (unlikely(faulted)) { 432 ftrace_graph_stop(); 433 WARN_ON(1); 434 return; 435 } 436 437 if (ftrace_push_return_trace(old, self_addr, &trace.depth, 438 frame_pointer) == -EBUSY) { 439 *parent = old; 440 return; 441 } 442 443 trace.func = self_addr; 444 445 /* Only trace if the calling function expects to */ 446 if (!ftrace_graph_entry(&trace)) { 447 current->curr_ret_stack--; 448 *parent = old; 449 } 450 } 451 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 452