1 /* 2 * Kernel Probes Jump Optimization (Optprobes) 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright (C) IBM Corporation, 2002, 2004 19 * Copyright (C) Hitachi Ltd., 2012 20 */ 21 #include <linux/kprobes.h> 22 #include <linux/ptrace.h> 23 #include <linux/string.h> 24 #include <linux/slab.h> 25 #include <linux/hardirq.h> 26 #include <linux/preempt.h> 27 #include <linux/extable.h> 28 #include <linux/kdebug.h> 29 #include <linux/kallsyms.h> 30 #include <linux/ftrace.h> 31 32 #include <asm/text-patching.h> 33 #include <asm/cacheflush.h> 34 #include <asm/desc.h> 35 #include <asm/pgtable.h> 36 #include <linux/uaccess.h> 37 #include <asm/alternative.h> 38 #include <asm/insn.h> 39 #include <asm/debugreg.h> 40 41 #include "common.h" 42 43 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) 44 { 45 struct optimized_kprobe *op; 46 struct kprobe *kp; 47 long offs; 48 int i; 49 50 for (i = 0; i < RELATIVEJUMP_SIZE; i++) { 51 kp = get_kprobe((void *)addr - i); 52 /* This function only handles jump-optimized kprobe */ 53 if (kp && kprobe_optimized(kp)) { 54 op = container_of(kp, struct optimized_kprobe, kp); 55 /* If op->list is not empty, op is under optimizing */ 56 if (list_empty(&op->list)) 57 goto found; 58 } 59 } 60 61 return addr; 62 found: 63 /* 64 * If the kprobe can be optimized, original bytes which can be 65 * overwritten by jump destination address. In this case, original 66 * bytes must be recovered from op->optinsn.copied_insn buffer. 67 */ 68 memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); 69 if (addr == (unsigned long)kp->addr) { 70 buf[0] = kp->opcode; 71 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 72 } else { 73 offs = addr - (unsigned long)kp->addr - 1; 74 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); 75 } 76 77 return (unsigned long)buf; 78 } 79 80 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ 81 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) 82 { 83 #ifdef CONFIG_X86_64 84 *addr++ = 0x48; 85 *addr++ = 0xbf; 86 #else 87 *addr++ = 0xb8; 88 #endif 89 *(unsigned long *)addr = val; 90 } 91 92 asm ( 93 ".global optprobe_template_entry\n" 94 "optprobe_template_entry:\n" 95 #ifdef CONFIG_X86_64 96 /* We don't bother saving the ss register */ 97 " pushq %rsp\n" 98 " pushfq\n" 99 SAVE_REGS_STRING 100 " movq %rsp, %rsi\n" 101 ".global optprobe_template_val\n" 102 "optprobe_template_val:\n" 103 ASM_NOP5 104 ASM_NOP5 105 ".global optprobe_template_call\n" 106 "optprobe_template_call:\n" 107 ASM_NOP5 108 /* Move flags to rsp */ 109 " movq 144(%rsp), %rdx\n" 110 " movq %rdx, 152(%rsp)\n" 111 RESTORE_REGS_STRING 112 /* Skip flags entry */ 113 " addq $8, %rsp\n" 114 " popfq\n" 115 #else /* CONFIG_X86_32 */ 116 " pushf\n" 117 SAVE_REGS_STRING 118 " movl %esp, %edx\n" 119 ".global optprobe_template_val\n" 120 "optprobe_template_val:\n" 121 ASM_NOP5 122 ".global optprobe_template_call\n" 123 "optprobe_template_call:\n" 124 ASM_NOP5 125 RESTORE_REGS_STRING 126 " addl $4, %esp\n" /* skip cs */ 127 " popf\n" 128 #endif 129 ".global optprobe_template_end\n" 130 "optprobe_template_end:\n"); 131 132 #define TMPL_MOVE_IDX \ 133 ((long)&optprobe_template_val - (long)&optprobe_template_entry) 134 #define TMPL_CALL_IDX \ 135 ((long)&optprobe_template_call - (long)&optprobe_template_entry) 136 #define TMPL_END_IDX \ 137 ((long)&optprobe_template_end - (long)&optprobe_template_entry) 138 139 #define INT3_SIZE sizeof(kprobe_opcode_t) 140 141 /* Optimized kprobe call back function: called from optinsn */ 142 static void 143 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 144 { 145 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 146 unsigned long flags; 147 148 /* This is possible if op is under delayed unoptimizing */ 149 if (kprobe_disabled(&op->kp)) 150 return; 151 152 local_irq_save(flags); 153 if (kprobe_running()) { 154 kprobes_inc_nmissed_count(&op->kp); 155 } else { 156 /* Save skipped registers */ 157 #ifdef CONFIG_X86_64 158 regs->cs = __KERNEL_CS; 159 #else 160 regs->cs = __KERNEL_CS | get_kernel_rpl(); 161 regs->gs = 0; 162 #endif 163 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; 164 regs->orig_ax = ~0UL; 165 166 __this_cpu_write(current_kprobe, &op->kp); 167 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 168 opt_pre_handler(&op->kp, regs); 169 __this_cpu_write(current_kprobe, NULL); 170 } 171 local_irq_restore(flags); 172 } 173 NOKPROBE_SYMBOL(optimized_callback); 174 175 static int copy_optimized_instructions(u8 *dest, u8 *src) 176 { 177 int len = 0, ret; 178 179 while (len < RELATIVEJUMP_SIZE) { 180 ret = __copy_instruction(dest + len, src + len); 181 if (!ret || !can_boost(dest + len)) 182 return -EINVAL; 183 len += ret; 184 } 185 /* Check whether the address range is reserved */ 186 if (ftrace_text_reserved(src, src + len - 1) || 187 alternatives_text_reserved(src, src + len - 1) || 188 jump_label_text_reserved(src, src + len - 1)) 189 return -EBUSY; 190 191 return len; 192 } 193 194 /* Check whether insn is indirect jump */ 195 static int insn_is_indirect_jump(struct insn *insn) 196 { 197 return ((insn->opcode.bytes[0] == 0xff && 198 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ 199 insn->opcode.bytes[0] == 0xea); /* Segment based jump */ 200 } 201 202 /* Check whether insn jumps into specified address range */ 203 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) 204 { 205 unsigned long target = 0; 206 207 switch (insn->opcode.bytes[0]) { 208 case 0xe0: /* loopne */ 209 case 0xe1: /* loope */ 210 case 0xe2: /* loop */ 211 case 0xe3: /* jcxz */ 212 case 0xe9: /* near relative jump */ 213 case 0xeb: /* short relative jump */ 214 break; 215 case 0x0f: 216 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ 217 break; 218 return 0; 219 default: 220 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ 221 break; 222 return 0; 223 } 224 target = (unsigned long)insn->next_byte + insn->immediate.value; 225 226 return (start <= target && target <= start + len); 227 } 228 229 /* Decode whole function to ensure any instructions don't jump into target */ 230 static int can_optimize(unsigned long paddr) 231 { 232 unsigned long addr, size = 0, offset = 0; 233 struct insn insn; 234 kprobe_opcode_t buf[MAX_INSN_SIZE]; 235 236 /* Lookup symbol including addr */ 237 if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) 238 return 0; 239 240 /* 241 * Do not optimize in the entry code due to the unstable 242 * stack handling. 243 */ 244 if ((paddr >= (unsigned long)__entry_text_start) && 245 (paddr < (unsigned long)__entry_text_end)) 246 return 0; 247 248 /* Check there is enough space for a relative jump. */ 249 if (size - offset < RELATIVEJUMP_SIZE) 250 return 0; 251 252 /* Decode instructions */ 253 addr = paddr - offset; 254 while (addr < paddr - offset + size) { /* Decode until function end */ 255 unsigned long recovered_insn; 256 if (search_exception_tables(addr)) 257 /* 258 * Since some fixup code will jumps into this function, 259 * we can't optimize kprobe in this function. 260 */ 261 return 0; 262 recovered_insn = recover_probed_instruction(buf, addr); 263 if (!recovered_insn) 264 return 0; 265 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); 266 insn_get_length(&insn); 267 /* Another subsystem puts a breakpoint */ 268 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 269 return 0; 270 /* Recover address */ 271 insn.kaddr = (void *)addr; 272 insn.next_byte = (void *)(addr + insn.length); 273 /* Check any instructions don't jump into target */ 274 if (insn_is_indirect_jump(&insn) || 275 insn_jump_into_range(&insn, paddr + INT3_SIZE, 276 RELATIVE_ADDR_SIZE)) 277 return 0; 278 addr += insn.length; 279 } 280 281 return 1; 282 } 283 284 /* Check optimized_kprobe can actually be optimized. */ 285 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 286 { 287 int i; 288 struct kprobe *p; 289 290 for (i = 1; i < op->optinsn.size; i++) { 291 p = get_kprobe(op->kp.addr + i); 292 if (p && !kprobe_disabled(p)) 293 return -EEXIST; 294 } 295 296 return 0; 297 } 298 299 /* Check the addr is within the optimized instructions. */ 300 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 301 unsigned long addr) 302 { 303 return ((unsigned long)op->kp.addr <= addr && 304 (unsigned long)op->kp.addr + op->optinsn.size > addr); 305 } 306 307 /* Free optimized instruction slot */ 308 static 309 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 310 { 311 if (op->optinsn.insn) { 312 free_optinsn_slot(op->optinsn.insn, dirty); 313 op->optinsn.insn = NULL; 314 op->optinsn.size = 0; 315 } 316 } 317 318 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 319 { 320 __arch_remove_optimized_kprobe(op, 1); 321 } 322 323 /* 324 * Copy replacing target instructions 325 * Target instructions MUST be relocatable (checked inside) 326 * This is called when new aggr(opt)probe is allocated or reused. 327 */ 328 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, 329 struct kprobe *__unused) 330 { 331 u8 *buf; 332 int ret; 333 long rel; 334 335 if (!can_optimize((unsigned long)op->kp.addr)) 336 return -EILSEQ; 337 338 op->optinsn.insn = get_optinsn_slot(); 339 if (!op->optinsn.insn) 340 return -ENOMEM; 341 342 /* 343 * Verify if the address gap is in 2GB range, because this uses 344 * a relative jump. 345 */ 346 rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; 347 if (abs(rel) > 0x7fffffff) { 348 __arch_remove_optimized_kprobe(op, 0); 349 return -ERANGE; 350 } 351 352 buf = (u8 *)op->optinsn.insn; 353 354 /* Copy instructions into the out-of-line buffer */ 355 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); 356 if (ret < 0) { 357 __arch_remove_optimized_kprobe(op, 0); 358 return ret; 359 } 360 op->optinsn.size = ret; 361 362 /* Copy arch-dep-instance from template */ 363 memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); 364 365 /* Set probe information */ 366 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); 367 368 /* Set probe function call */ 369 synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); 370 371 /* Set returning jmp instruction at the tail of out-of-line buffer */ 372 synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, 373 (u8 *)op->kp.addr + op->optinsn.size); 374 375 flush_icache_range((unsigned long) buf, 376 (unsigned long) buf + TMPL_END_IDX + 377 op->optinsn.size + RELATIVEJUMP_SIZE); 378 return 0; 379 } 380 381 /* 382 * Replace breakpoints (int3) with relative jumps. 383 * Caller must call with locking kprobe_mutex and text_mutex. 384 */ 385 void arch_optimize_kprobes(struct list_head *oplist) 386 { 387 struct optimized_kprobe *op, *tmp; 388 u8 insn_buf[RELATIVEJUMP_SIZE]; 389 390 list_for_each_entry_safe(op, tmp, oplist, list) { 391 s32 rel = (s32)((long)op->optinsn.insn - 392 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 393 394 WARN_ON(kprobe_disabled(&op->kp)); 395 396 /* Backup instructions which will be replaced by jump address */ 397 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 398 RELATIVE_ADDR_SIZE); 399 400 insn_buf[0] = RELATIVEJUMP_OPCODE; 401 *(s32 *)(&insn_buf[1]) = rel; 402 403 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 404 op->optinsn.insn); 405 406 list_del_init(&op->list); 407 } 408 } 409 410 /* Replace a relative jump with a breakpoint (int3). */ 411 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 412 { 413 u8 insn_buf[RELATIVEJUMP_SIZE]; 414 415 /* Set int3 to first byte for kprobes */ 416 insn_buf[0] = BREAKPOINT_INSTRUCTION; 417 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 418 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 419 op->optinsn.insn); 420 } 421 422 /* 423 * Recover original instructions and breakpoints from relative jumps. 424 * Caller must call with locking kprobe_mutex. 425 */ 426 extern void arch_unoptimize_kprobes(struct list_head *oplist, 427 struct list_head *done_list) 428 { 429 struct optimized_kprobe *op, *tmp; 430 431 list_for_each_entry_safe(op, tmp, oplist, list) { 432 arch_unoptimize_kprobe(op); 433 list_move(&op->list, done_list); 434 } 435 } 436 437 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) 438 { 439 struct optimized_kprobe *op; 440 441 if (p->flags & KPROBE_FLAG_OPTIMIZED) { 442 /* This kprobe is really able to run optimized path. */ 443 op = container_of(p, struct optimized_kprobe, kp); 444 /* Detour through copied instructions */ 445 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; 446 if (!reenter) 447 reset_current_kprobe(); 448 preempt_enable_no_resched(); 449 return 1; 450 } 451 return 0; 452 } 453 NOKPROBE_SYMBOL(setup_detour_execution); 454