1 /* 2 * Kernel Probes Jump Optimization (Optprobes) 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright (C) IBM Corporation, 2002, 2004 19 * Copyright (C) Hitachi Ltd., 2012 20 */ 21 #include <linux/kprobes.h> 22 #include <linux/ptrace.h> 23 #include <linux/string.h> 24 #include <linux/slab.h> 25 #include <linux/hardirq.h> 26 #include <linux/preempt.h> 27 #include <linux/extable.h> 28 #include <linux/kdebug.h> 29 #include <linux/kallsyms.h> 30 #include <linux/ftrace.h> 31 32 #include <asm/text-patching.h> 33 #include <asm/cacheflush.h> 34 #include <asm/desc.h> 35 #include <asm/pgtable.h> 36 #include <linux/uaccess.h> 37 #include <asm/alternative.h> 38 #include <asm/insn.h> 39 #include <asm/debugreg.h> 40 41 #include "common.h" 42 43 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) 44 { 45 struct optimized_kprobe *op; 46 struct kprobe *kp; 47 long offs; 48 int i; 49 50 for (i = 0; i < RELATIVEJUMP_SIZE; i++) { 51 kp = get_kprobe((void *)addr - i); 52 /* This function only handles jump-optimized kprobe */ 53 if (kp && kprobe_optimized(kp)) { 54 op = container_of(kp, struct optimized_kprobe, kp); 55 /* If op->list is not empty, op is under optimizing */ 56 if (list_empty(&op->list)) 57 goto found; 58 } 59 } 60 61 return addr; 62 found: 63 /* 64 * If the kprobe can be optimized, original bytes which can be 65 * overwritten by jump destination address. In this case, original 66 * bytes must be recovered from op->optinsn.copied_insn buffer. 67 */ 68 if (probe_kernel_read(buf, (void *)addr, 69 MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 70 return 0UL; 71 72 if (addr == (unsigned long)kp->addr) { 73 buf[0] = kp->opcode; 74 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 75 } else { 76 offs = addr - (unsigned long)kp->addr - 1; 77 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); 78 } 79 80 return (unsigned long)buf; 81 } 82 83 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ 84 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) 85 { 86 #ifdef CONFIG_X86_64 87 *addr++ = 0x48; 88 *addr++ = 0xbf; 89 #else 90 *addr++ = 0xb8; 91 #endif 92 *(unsigned long *)addr = val; 93 } 94 95 asm ( 96 ".global optprobe_template_entry\n" 97 "optprobe_template_entry:\n" 98 #ifdef CONFIG_X86_64 99 /* We don't bother saving the ss register */ 100 " pushq %rsp\n" 101 " pushfq\n" 102 SAVE_REGS_STRING 103 " movq %rsp, %rsi\n" 104 ".global optprobe_template_val\n" 105 "optprobe_template_val:\n" 106 ASM_NOP5 107 ASM_NOP5 108 ".global optprobe_template_call\n" 109 "optprobe_template_call:\n" 110 ASM_NOP5 111 /* Move flags to rsp */ 112 " movq 144(%rsp), %rdx\n" 113 " movq %rdx, 152(%rsp)\n" 114 RESTORE_REGS_STRING 115 /* Skip flags entry */ 116 " addq $8, %rsp\n" 117 " popfq\n" 118 #else /* CONFIG_X86_32 */ 119 " pushf\n" 120 SAVE_REGS_STRING 121 " movl %esp, %edx\n" 122 ".global optprobe_template_val\n" 123 "optprobe_template_val:\n" 124 ASM_NOP5 125 ".global optprobe_template_call\n" 126 "optprobe_template_call:\n" 127 ASM_NOP5 128 RESTORE_REGS_STRING 129 " addl $4, %esp\n" /* skip cs */ 130 " popf\n" 131 #endif 132 ".global optprobe_template_end\n" 133 "optprobe_template_end:\n"); 134 135 #define TMPL_MOVE_IDX \ 136 ((long)&optprobe_template_val - (long)&optprobe_template_entry) 137 #define TMPL_CALL_IDX \ 138 ((long)&optprobe_template_call - (long)&optprobe_template_entry) 139 #define TMPL_END_IDX \ 140 ((long)&optprobe_template_end - (long)&optprobe_template_entry) 141 142 #define INT3_SIZE sizeof(kprobe_opcode_t) 143 144 /* Optimized kprobe call back function: called from optinsn */ 145 static void 146 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 147 { 148 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 149 unsigned long flags; 150 151 /* This is possible if op is under delayed unoptimizing */ 152 if (kprobe_disabled(&op->kp)) 153 return; 154 155 local_irq_save(flags); 156 if (kprobe_running()) { 157 kprobes_inc_nmissed_count(&op->kp); 158 } else { 159 /* Save skipped registers */ 160 #ifdef CONFIG_X86_64 161 regs->cs = __KERNEL_CS; 162 #else 163 regs->cs = __KERNEL_CS | get_kernel_rpl(); 164 regs->gs = 0; 165 #endif 166 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; 167 regs->orig_ax = ~0UL; 168 169 __this_cpu_write(current_kprobe, &op->kp); 170 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 171 opt_pre_handler(&op->kp, regs); 172 __this_cpu_write(current_kprobe, NULL); 173 } 174 local_irq_restore(flags); 175 } 176 NOKPROBE_SYMBOL(optimized_callback); 177 178 static int copy_optimized_instructions(u8 *dest, u8 *src) 179 { 180 struct insn insn; 181 int len = 0, ret; 182 183 while (len < RELATIVEJUMP_SIZE) { 184 ret = __copy_instruction(dest + len, src + len, &insn); 185 if (!ret || !can_boost(&insn, src + len)) 186 return -EINVAL; 187 len += ret; 188 } 189 /* Check whether the address range is reserved */ 190 if (ftrace_text_reserved(src, src + len - 1) || 191 alternatives_text_reserved(src, src + len - 1) || 192 jump_label_text_reserved(src, src + len - 1)) 193 return -EBUSY; 194 195 return len; 196 } 197 198 /* Check whether insn is indirect jump */ 199 static int insn_is_indirect_jump(struct insn *insn) 200 { 201 return ((insn->opcode.bytes[0] == 0xff && 202 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ 203 insn->opcode.bytes[0] == 0xea); /* Segment based jump */ 204 } 205 206 /* Check whether insn jumps into specified address range */ 207 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) 208 { 209 unsigned long target = 0; 210 211 switch (insn->opcode.bytes[0]) { 212 case 0xe0: /* loopne */ 213 case 0xe1: /* loope */ 214 case 0xe2: /* loop */ 215 case 0xe3: /* jcxz */ 216 case 0xe9: /* near relative jump */ 217 case 0xeb: /* short relative jump */ 218 break; 219 case 0x0f: 220 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ 221 break; 222 return 0; 223 default: 224 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ 225 break; 226 return 0; 227 } 228 target = (unsigned long)insn->next_byte + insn->immediate.value; 229 230 return (start <= target && target <= start + len); 231 } 232 233 /* Decode whole function to ensure any instructions don't jump into target */ 234 static int can_optimize(unsigned long paddr) 235 { 236 unsigned long addr, size = 0, offset = 0; 237 struct insn insn; 238 kprobe_opcode_t buf[MAX_INSN_SIZE]; 239 240 /* Lookup symbol including addr */ 241 if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) 242 return 0; 243 244 /* 245 * Do not optimize in the entry code due to the unstable 246 * stack handling. 247 */ 248 if ((paddr >= (unsigned long)__entry_text_start) && 249 (paddr < (unsigned long)__entry_text_end)) 250 return 0; 251 252 /* Check there is enough space for a relative jump. */ 253 if (size - offset < RELATIVEJUMP_SIZE) 254 return 0; 255 256 /* Decode instructions */ 257 addr = paddr - offset; 258 while (addr < paddr - offset + size) { /* Decode until function end */ 259 unsigned long recovered_insn; 260 if (search_exception_tables(addr)) 261 /* 262 * Since some fixup code will jumps into this function, 263 * we can't optimize kprobe in this function. 264 */ 265 return 0; 266 recovered_insn = recover_probed_instruction(buf, addr); 267 if (!recovered_insn) 268 return 0; 269 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); 270 insn_get_length(&insn); 271 /* Another subsystem puts a breakpoint */ 272 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 273 return 0; 274 /* Recover address */ 275 insn.kaddr = (void *)addr; 276 insn.next_byte = (void *)(addr + insn.length); 277 /* Check any instructions don't jump into target */ 278 if (insn_is_indirect_jump(&insn) || 279 insn_jump_into_range(&insn, paddr + INT3_SIZE, 280 RELATIVE_ADDR_SIZE)) 281 return 0; 282 addr += insn.length; 283 } 284 285 return 1; 286 } 287 288 /* Check optimized_kprobe can actually be optimized. */ 289 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 290 { 291 int i; 292 struct kprobe *p; 293 294 for (i = 1; i < op->optinsn.size; i++) { 295 p = get_kprobe(op->kp.addr + i); 296 if (p && !kprobe_disabled(p)) 297 return -EEXIST; 298 } 299 300 return 0; 301 } 302 303 /* Check the addr is within the optimized instructions. */ 304 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 305 unsigned long addr) 306 { 307 return ((unsigned long)op->kp.addr <= addr && 308 (unsigned long)op->kp.addr + op->optinsn.size > addr); 309 } 310 311 /* Free optimized instruction slot */ 312 static 313 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 314 { 315 if (op->optinsn.insn) { 316 free_optinsn_slot(op->optinsn.insn, dirty); 317 op->optinsn.insn = NULL; 318 op->optinsn.size = 0; 319 } 320 } 321 322 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 323 { 324 __arch_remove_optimized_kprobe(op, 1); 325 } 326 327 /* 328 * Copy replacing target instructions 329 * Target instructions MUST be relocatable (checked inside) 330 * This is called when new aggr(opt)probe is allocated or reused. 331 */ 332 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, 333 struct kprobe *__unused) 334 { 335 u8 *buf; 336 int ret; 337 long rel; 338 339 if (!can_optimize((unsigned long)op->kp.addr)) 340 return -EILSEQ; 341 342 op->optinsn.insn = get_optinsn_slot(); 343 if (!op->optinsn.insn) 344 return -ENOMEM; 345 346 /* 347 * Verify if the address gap is in 2GB range, because this uses 348 * a relative jump. 349 */ 350 rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; 351 if (abs(rel) > 0x7fffffff) { 352 __arch_remove_optimized_kprobe(op, 0); 353 return -ERANGE; 354 } 355 356 buf = (u8 *)op->optinsn.insn; 357 set_memory_rw((unsigned long)buf & PAGE_MASK, 1); 358 359 /* Copy instructions into the out-of-line buffer */ 360 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); 361 if (ret < 0) { 362 __arch_remove_optimized_kprobe(op, 0); 363 return ret; 364 } 365 op->optinsn.size = ret; 366 367 /* Copy arch-dep-instance from template */ 368 memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); 369 370 /* Set probe information */ 371 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); 372 373 /* Set probe function call */ 374 synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); 375 376 /* Set returning jmp instruction at the tail of out-of-line buffer */ 377 synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, 378 (u8 *)op->kp.addr + op->optinsn.size); 379 380 set_memory_ro((unsigned long)buf & PAGE_MASK, 1); 381 382 flush_icache_range((unsigned long) buf, 383 (unsigned long) buf + TMPL_END_IDX + 384 op->optinsn.size + RELATIVEJUMP_SIZE); 385 return 0; 386 } 387 388 /* 389 * Replace breakpoints (int3) with relative jumps. 390 * Caller must call with locking kprobe_mutex and text_mutex. 391 */ 392 void arch_optimize_kprobes(struct list_head *oplist) 393 { 394 struct optimized_kprobe *op, *tmp; 395 u8 insn_buf[RELATIVEJUMP_SIZE]; 396 397 list_for_each_entry_safe(op, tmp, oplist, list) { 398 s32 rel = (s32)((long)op->optinsn.insn - 399 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 400 401 WARN_ON(kprobe_disabled(&op->kp)); 402 403 /* Backup instructions which will be replaced by jump address */ 404 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 405 RELATIVE_ADDR_SIZE); 406 407 insn_buf[0] = RELATIVEJUMP_OPCODE; 408 *(s32 *)(&insn_buf[1]) = rel; 409 410 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 411 op->optinsn.insn); 412 413 list_del_init(&op->list); 414 } 415 } 416 417 /* Replace a relative jump with a breakpoint (int3). */ 418 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 419 { 420 u8 insn_buf[RELATIVEJUMP_SIZE]; 421 422 /* Set int3 to first byte for kprobes */ 423 insn_buf[0] = BREAKPOINT_INSTRUCTION; 424 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 425 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 426 op->optinsn.insn); 427 } 428 429 /* 430 * Recover original instructions and breakpoints from relative jumps. 431 * Caller must call with locking kprobe_mutex. 432 */ 433 extern void arch_unoptimize_kprobes(struct list_head *oplist, 434 struct list_head *done_list) 435 { 436 struct optimized_kprobe *op, *tmp; 437 438 list_for_each_entry_safe(op, tmp, oplist, list) { 439 arch_unoptimize_kprobe(op); 440 list_move(&op->list, done_list); 441 } 442 } 443 444 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) 445 { 446 struct optimized_kprobe *op; 447 448 if (p->flags & KPROBE_FLAG_OPTIMIZED) { 449 /* This kprobe is really able to run optimized path. */ 450 op = container_of(p, struct optimized_kprobe, kp); 451 /* Detour through copied instructions */ 452 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; 453 if (!reenter) 454 reset_current_kprobe(); 455 preempt_enable_no_resched(); 456 return 1; 457 } 458 return 0; 459 } 460 NOKPROBE_SYMBOL(setup_detour_execution); 461