1 /* 2 * Kernel Probes Jump Optimization (Optprobes) 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright (C) IBM Corporation, 2002, 2004 19 * Copyright (C) Hitachi Ltd., 2012 20 */ 21 #include <linux/kprobes.h> 22 #include <linux/ptrace.h> 23 #include <linux/string.h> 24 #include <linux/slab.h> 25 #include <linux/hardirq.h> 26 #include <linux/preempt.h> 27 #include <linux/module.h> 28 #include <linux/kdebug.h> 29 #include <linux/kallsyms.h> 30 #include <linux/ftrace.h> 31 32 #include <asm/cacheflush.h> 33 #include <asm/desc.h> 34 #include <asm/pgtable.h> 35 #include <asm/uaccess.h> 36 #include <asm/alternative.h> 37 #include <asm/insn.h> 38 #include <asm/debugreg.h> 39 40 #include "common.h" 41 42 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) 43 { 44 struct optimized_kprobe *op; 45 struct kprobe *kp; 46 long offs; 47 int i; 48 49 for (i = 0; i < RELATIVEJUMP_SIZE; i++) { 50 kp = get_kprobe((void *)addr - i); 51 /* This function only handles jump-optimized kprobe */ 52 if (kp && kprobe_optimized(kp)) { 53 op = container_of(kp, struct optimized_kprobe, kp); 54 /* If op->list is not empty, op is under optimizing */ 55 if (list_empty(&op->list)) 56 goto found; 57 } 58 } 59 60 return addr; 61 found: 62 /* 63 * If the kprobe can be optimized, original bytes which can be 64 * overwritten by jump destination address. In this case, original 65 * bytes must be recovered from op->optinsn.copied_insn buffer. 66 */ 67 memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 68 if (addr == (unsigned long)kp->addr) { 69 buf[0] = kp->opcode; 70 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 71 } else { 72 offs = addr - (unsigned long)kp->addr - 1; 73 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); 74 } 75 76 return (unsigned long)buf; 77 } 78 79 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ 80 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) 81 { 82 #ifdef CONFIG_X86_64 83 *addr++ = 0x48; 84 *addr++ = 0xbf; 85 #else 86 *addr++ = 0xb8; 87 #endif 88 *(unsigned long *)addr = val; 89 } 90 91 asm ( 92 ".global optprobe_template_entry\n" 93 "optprobe_template_entry:\n" 94 #ifdef CONFIG_X86_64 95 /* We don't bother saving the ss register */ 96 " pushq %rsp\n" 97 " pushfq\n" 98 SAVE_REGS_STRING 99 " movq %rsp, %rsi\n" 100 ".global optprobe_template_val\n" 101 "optprobe_template_val:\n" 102 ASM_NOP5 103 ASM_NOP5 104 ".global optprobe_template_call\n" 105 "optprobe_template_call:\n" 106 ASM_NOP5 107 /* Move flags to rsp */ 108 " movq 144(%rsp), %rdx\n" 109 " movq %rdx, 152(%rsp)\n" 110 RESTORE_REGS_STRING 111 /* Skip flags entry */ 112 " addq $8, %rsp\n" 113 " popfq\n" 114 #else /* CONFIG_X86_32 */ 115 " pushf\n" 116 SAVE_REGS_STRING 117 " movl %esp, %edx\n" 118 ".global optprobe_template_val\n" 119 "optprobe_template_val:\n" 120 ASM_NOP5 121 ".global optprobe_template_call\n" 122 "optprobe_template_call:\n" 123 ASM_NOP5 124 RESTORE_REGS_STRING 125 " addl $4, %esp\n" /* skip cs */ 126 " popf\n" 127 #endif 128 ".global optprobe_template_end\n" 129 "optprobe_template_end:\n"); 130 131 #define TMPL_MOVE_IDX \ 132 ((long)&optprobe_template_val - (long)&optprobe_template_entry) 133 #define TMPL_CALL_IDX \ 134 ((long)&optprobe_template_call - (long)&optprobe_template_entry) 135 #define TMPL_END_IDX \ 136 ((long)&optprobe_template_end - (long)&optprobe_template_entry) 137 138 #define INT3_SIZE sizeof(kprobe_opcode_t) 139 140 /* Optimized kprobe call back function: called from optinsn */ 141 static void 142 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 143 { 144 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 145 unsigned long flags; 146 147 /* This is possible if op is under delayed unoptimizing */ 148 if (kprobe_disabled(&op->kp)) 149 return; 150 151 local_irq_save(flags); 152 if (kprobe_running()) { 153 kprobes_inc_nmissed_count(&op->kp); 154 } else { 155 /* Save skipped registers */ 156 #ifdef CONFIG_X86_64 157 regs->cs = __KERNEL_CS; 158 #else 159 regs->cs = __KERNEL_CS | get_kernel_rpl(); 160 regs->gs = 0; 161 #endif 162 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; 163 regs->orig_ax = ~0UL; 164 165 __this_cpu_write(current_kprobe, &op->kp); 166 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 167 opt_pre_handler(&op->kp, regs); 168 __this_cpu_write(current_kprobe, NULL); 169 } 170 local_irq_restore(flags); 171 } 172 NOKPROBE_SYMBOL(optimized_callback); 173 174 static int copy_optimized_instructions(u8 *dest, u8 *src) 175 { 176 int len = 0, ret; 177 178 while (len < RELATIVEJUMP_SIZE) { 179 ret = __copy_instruction(dest + len, src + len); 180 if (!ret || !can_boost(dest + len)) 181 return -EINVAL; 182 len += ret; 183 } 184 /* Check whether the address range is reserved */ 185 if (ftrace_text_reserved(src, src + len - 1) || 186 alternatives_text_reserved(src, src + len - 1) || 187 jump_label_text_reserved(src, src + len - 1)) 188 return -EBUSY; 189 190 return len; 191 } 192 193 /* Check whether insn is indirect jump */ 194 static int insn_is_indirect_jump(struct insn *insn) 195 { 196 return ((insn->opcode.bytes[0] == 0xff && 197 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ 198 insn->opcode.bytes[0] == 0xea); /* Segment based jump */ 199 } 200 201 /* Check whether insn jumps into specified address range */ 202 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) 203 { 204 unsigned long target = 0; 205 206 switch (insn->opcode.bytes[0]) { 207 case 0xe0: /* loopne */ 208 case 0xe1: /* loope */ 209 case 0xe2: /* loop */ 210 case 0xe3: /* jcxz */ 211 case 0xe9: /* near relative jump */ 212 case 0xeb: /* short relative jump */ 213 break; 214 case 0x0f: 215 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ 216 break; 217 return 0; 218 default: 219 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ 220 break; 221 return 0; 222 } 223 target = (unsigned long)insn->next_byte + insn->immediate.value; 224 225 return (start <= target && target <= start + len); 226 } 227 228 /* Decode whole function to ensure any instructions don't jump into target */ 229 static int can_optimize(unsigned long paddr) 230 { 231 unsigned long addr, size = 0, offset = 0; 232 struct insn insn; 233 kprobe_opcode_t buf[MAX_INSN_SIZE]; 234 235 /* Lookup symbol including addr */ 236 if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) 237 return 0; 238 239 /* 240 * Do not optimize in the entry code due to the unstable 241 * stack handling. 242 */ 243 if ((paddr >= (unsigned long)__entry_text_start) && 244 (paddr < (unsigned long)__entry_text_end)) 245 return 0; 246 247 /* Check there is enough space for a relative jump. */ 248 if (size - offset < RELATIVEJUMP_SIZE) 249 return 0; 250 251 /* Decode instructions */ 252 addr = paddr - offset; 253 while (addr < paddr - offset + size) { /* Decode until function end */ 254 unsigned long recovered_insn; 255 if (search_exception_tables(addr)) 256 /* 257 * Since some fixup code will jumps into this function, 258 * we can't optimize kprobe in this function. 259 */ 260 return 0; 261 recovered_insn = recover_probed_instruction(buf, addr); 262 if (!recovered_insn) 263 return 0; 264 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); 265 insn_get_length(&insn); 266 /* Another subsystem puts a breakpoint */ 267 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 268 return 0; 269 /* Recover address */ 270 insn.kaddr = (void *)addr; 271 insn.next_byte = (void *)(addr + insn.length); 272 /* Check any instructions don't jump into target */ 273 if (insn_is_indirect_jump(&insn) || 274 insn_jump_into_range(&insn, paddr + INT3_SIZE, 275 RELATIVE_ADDR_SIZE)) 276 return 0; 277 addr += insn.length; 278 } 279 280 return 1; 281 } 282 283 /* Check optimized_kprobe can actually be optimized. */ 284 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 285 { 286 int i; 287 struct kprobe *p; 288 289 for (i = 1; i < op->optinsn.size; i++) { 290 p = get_kprobe(op->kp.addr + i); 291 if (p && !kprobe_disabled(p)) 292 return -EEXIST; 293 } 294 295 return 0; 296 } 297 298 /* Check the addr is within the optimized instructions. */ 299 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 300 unsigned long addr) 301 { 302 return ((unsigned long)op->kp.addr <= addr && 303 (unsigned long)op->kp.addr + op->optinsn.size > addr); 304 } 305 306 /* Free optimized instruction slot */ 307 static 308 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 309 { 310 if (op->optinsn.insn) { 311 free_optinsn_slot(op->optinsn.insn, dirty); 312 op->optinsn.insn = NULL; 313 op->optinsn.size = 0; 314 } 315 } 316 317 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 318 { 319 __arch_remove_optimized_kprobe(op, 1); 320 } 321 322 /* 323 * Copy replacing target instructions 324 * Target instructions MUST be relocatable (checked inside) 325 * This is called when new aggr(opt)probe is allocated or reused. 326 */ 327 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, 328 struct kprobe *__unused) 329 { 330 u8 *buf; 331 int ret; 332 long rel; 333 334 if (!can_optimize((unsigned long)op->kp.addr)) 335 return -EILSEQ; 336 337 op->optinsn.insn = get_optinsn_slot(); 338 if (!op->optinsn.insn) 339 return -ENOMEM; 340 341 /* 342 * Verify if the address gap is in 2GB range, because this uses 343 * a relative jump. 344 */ 345 rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; 346 if (abs(rel) > 0x7fffffff) { 347 __arch_remove_optimized_kprobe(op, 0); 348 return -ERANGE; 349 } 350 351 buf = (u8 *)op->optinsn.insn; 352 353 /* Copy instructions into the out-of-line buffer */ 354 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); 355 if (ret < 0) { 356 __arch_remove_optimized_kprobe(op, 0); 357 return ret; 358 } 359 op->optinsn.size = ret; 360 361 /* Copy arch-dep-instance from template */ 362 memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); 363 364 /* Set probe information */ 365 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); 366 367 /* Set probe function call */ 368 synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); 369 370 /* Set returning jmp instruction at the tail of out-of-line buffer */ 371 synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, 372 (u8 *)op->kp.addr + op->optinsn.size); 373 374 flush_icache_range((unsigned long) buf, 375 (unsigned long) buf + TMPL_END_IDX + 376 op->optinsn.size + RELATIVEJUMP_SIZE); 377 return 0; 378 } 379 380 /* 381 * Replace breakpoints (int3) with relative jumps. 382 * Caller must call with locking kprobe_mutex and text_mutex. 383 */ 384 void arch_optimize_kprobes(struct list_head *oplist) 385 { 386 struct optimized_kprobe *op, *tmp; 387 u8 insn_buf[RELATIVEJUMP_SIZE]; 388 389 list_for_each_entry_safe(op, tmp, oplist, list) { 390 s32 rel = (s32)((long)op->optinsn.insn - 391 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 392 393 WARN_ON(kprobe_disabled(&op->kp)); 394 395 /* Backup instructions which will be replaced by jump address */ 396 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 397 RELATIVE_ADDR_SIZE); 398 399 insn_buf[0] = RELATIVEJUMP_OPCODE; 400 *(s32 *)(&insn_buf[1]) = rel; 401 402 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 403 op->optinsn.insn); 404 405 list_del_init(&op->list); 406 } 407 } 408 409 /* Replace a relative jump with a breakpoint (int3). */ 410 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 411 { 412 u8 insn_buf[RELATIVEJUMP_SIZE]; 413 414 /* Set int3 to first byte for kprobes */ 415 insn_buf[0] = BREAKPOINT_INSTRUCTION; 416 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 417 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 418 op->optinsn.insn); 419 } 420 421 /* 422 * Recover original instructions and breakpoints from relative jumps. 423 * Caller must call with locking kprobe_mutex. 424 */ 425 extern void arch_unoptimize_kprobes(struct list_head *oplist, 426 struct list_head *done_list) 427 { 428 struct optimized_kprobe *op, *tmp; 429 430 list_for_each_entry_safe(op, tmp, oplist, list) { 431 arch_unoptimize_kprobe(op); 432 list_move(&op->list, done_list); 433 } 434 } 435 436 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) 437 { 438 struct optimized_kprobe *op; 439 440 if (p->flags & KPROBE_FLAG_OPTIMIZED) { 441 /* This kprobe is really able to run optimized path. */ 442 op = container_of(p, struct optimized_kprobe, kp); 443 /* Detour through copied instructions */ 444 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; 445 if (!reenter) 446 reset_current_kprobe(); 447 preempt_enable_no_resched(); 448 return 1; 449 } 450 return 0; 451 } 452 NOKPROBE_SYMBOL(setup_detour_execution); 453