xref: /linux/arch/x86/kernel/kprobes/opt.c (revision 3c88c692c28746473791276f8b42d2c989d6cbe6)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  Kernel Probes Jump Optimization (Optprobes)
4  *
5  * Copyright (C) IBM Corporation, 2002, 2004
6  * Copyright (C) Hitachi Ltd., 2012
7  */
8 #include <linux/kprobes.h>
9 #include <linux/ptrace.h>
10 #include <linux/string.h>
11 #include <linux/slab.h>
12 #include <linux/hardirq.h>
13 #include <linux/preempt.h>
14 #include <linux/extable.h>
15 #include <linux/kdebug.h>
16 #include <linux/kallsyms.h>
17 #include <linux/ftrace.h>
18 #include <linux/frame.h>
19 
20 #include <asm/text-patching.h>
21 #include <asm/cacheflush.h>
22 #include <asm/desc.h>
23 #include <asm/pgtable.h>
24 #include <linux/uaccess.h>
25 #include <asm/alternative.h>
26 #include <asm/insn.h>
27 #include <asm/debugreg.h>
28 #include <asm/set_memory.h>
29 #include <asm/sections.h>
30 #include <asm/nospec-branch.h>
31 
32 #include "common.h"
33 
34 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
35 {
36 	struct optimized_kprobe *op;
37 	struct kprobe *kp;
38 	long offs;
39 	int i;
40 
41 	for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
42 		kp = get_kprobe((void *)addr - i);
43 		/* This function only handles jump-optimized kprobe */
44 		if (kp && kprobe_optimized(kp)) {
45 			op = container_of(kp, struct optimized_kprobe, kp);
46 			/* If op->list is not empty, op is under optimizing */
47 			if (list_empty(&op->list))
48 				goto found;
49 		}
50 	}
51 
52 	return addr;
53 found:
54 	/*
55 	 * If the kprobe can be optimized, original bytes which can be
56 	 * overwritten by jump destination address. In this case, original
57 	 * bytes must be recovered from op->optinsn.copied_insn buffer.
58 	 */
59 	if (probe_kernel_read(buf, (void *)addr,
60 		MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
61 		return 0UL;
62 
63 	if (addr == (unsigned long)kp->addr) {
64 		buf[0] = kp->opcode;
65 		memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
66 	} else {
67 		offs = addr - (unsigned long)kp->addr - 1;
68 		memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
69 	}
70 
71 	return (unsigned long)buf;
72 }
73 
74 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
75 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
76 {
77 #ifdef CONFIG_X86_64
78 	*addr++ = 0x48;
79 	*addr++ = 0xbf;
80 #else
81 	*addr++ = 0xb8;
82 #endif
83 	*(unsigned long *)addr = val;
84 }
85 
86 asm (
87 			".pushsection .rodata\n"
88 			"optprobe_template_func:\n"
89 			".global optprobe_template_entry\n"
90 			"optprobe_template_entry:\n"
91 #ifdef CONFIG_X86_64
92 			/* We don't bother saving the ss register */
93 			"	pushq %rsp\n"
94 			"	pushfq\n"
95 			SAVE_REGS_STRING
96 			"	movq %rsp, %rsi\n"
97 			".global optprobe_template_val\n"
98 			"optprobe_template_val:\n"
99 			ASM_NOP5
100 			ASM_NOP5
101 			".global optprobe_template_call\n"
102 			"optprobe_template_call:\n"
103 			ASM_NOP5
104 			/* Move flags to rsp */
105 			"	movq 18*8(%rsp), %rdx\n"
106 			"	movq %rdx, 19*8(%rsp)\n"
107 			RESTORE_REGS_STRING
108 			/* Skip flags entry */
109 			"	addq $8, %rsp\n"
110 			"	popfq\n"
111 #else /* CONFIG_X86_32 */
112 			"	pushl %esp\n"
113 			"	pushfl\n"
114 			SAVE_REGS_STRING
115 			"	movl %esp, %edx\n"
116 			".global optprobe_template_val\n"
117 			"optprobe_template_val:\n"
118 			ASM_NOP5
119 			".global optprobe_template_call\n"
120 			"optprobe_template_call:\n"
121 			ASM_NOP5
122 			/* Move flags into esp */
123 			"	movl 14*4(%esp), %edx\n"
124 			"	movl %edx, 15*4(%esp)\n"
125 			RESTORE_REGS_STRING
126 			/* Skip flags entry */
127 			"	addl $4, %esp\n"
128 			"	popfl\n"
129 #endif
130 			".global optprobe_template_end\n"
131 			"optprobe_template_end:\n"
132 			".popsection\n");
133 
134 void optprobe_template_func(void);
135 STACK_FRAME_NON_STANDARD(optprobe_template_func);
136 
137 #define TMPL_MOVE_IDX \
138 	((long)optprobe_template_val - (long)optprobe_template_entry)
139 #define TMPL_CALL_IDX \
140 	((long)optprobe_template_call - (long)optprobe_template_entry)
141 #define TMPL_END_IDX \
142 	((long)optprobe_template_end - (long)optprobe_template_entry)
143 
144 #define INT3_SIZE sizeof(kprobe_opcode_t)
145 
146 /* Optimized kprobe call back function: called from optinsn */
147 static void
148 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
149 {
150 	/* This is possible if op is under delayed unoptimizing */
151 	if (kprobe_disabled(&op->kp))
152 		return;
153 
154 	preempt_disable();
155 	if (kprobe_running()) {
156 		kprobes_inc_nmissed_count(&op->kp);
157 	} else {
158 		struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
159 		/* Save skipped registers */
160 		regs->cs = __KERNEL_CS;
161 #ifdef CONFIG_X86_32
162 		regs->cs |= get_kernel_rpl();
163 		regs->gs = 0;
164 #endif
165 		regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
166 		regs->orig_ax = ~0UL;
167 
168 		__this_cpu_write(current_kprobe, &op->kp);
169 		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
170 		opt_pre_handler(&op->kp, regs);
171 		__this_cpu_write(current_kprobe, NULL);
172 	}
173 	preempt_enable();
174 }
175 NOKPROBE_SYMBOL(optimized_callback);
176 
177 static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
178 {
179 	struct insn insn;
180 	int len = 0, ret;
181 
182 	while (len < RELATIVEJUMP_SIZE) {
183 		ret = __copy_instruction(dest + len, src + len, real + len, &insn);
184 		if (!ret || !can_boost(&insn, src + len))
185 			return -EINVAL;
186 		len += ret;
187 	}
188 	/* Check whether the address range is reserved */
189 	if (ftrace_text_reserved(src, src + len - 1) ||
190 	    alternatives_text_reserved(src, src + len - 1) ||
191 	    jump_label_text_reserved(src, src + len - 1))
192 		return -EBUSY;
193 
194 	return len;
195 }
196 
197 /* Check whether insn is indirect jump */
198 static int __insn_is_indirect_jump(struct insn *insn)
199 {
200 	return ((insn->opcode.bytes[0] == 0xff &&
201 		(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
202 		insn->opcode.bytes[0] == 0xea);	/* Segment based jump */
203 }
204 
205 /* Check whether insn jumps into specified address range */
206 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
207 {
208 	unsigned long target = 0;
209 
210 	switch (insn->opcode.bytes[0]) {
211 	case 0xe0:	/* loopne */
212 	case 0xe1:	/* loope */
213 	case 0xe2:	/* loop */
214 	case 0xe3:	/* jcxz */
215 	case 0xe9:	/* near relative jump */
216 	case 0xeb:	/* short relative jump */
217 		break;
218 	case 0x0f:
219 		if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
220 			break;
221 		return 0;
222 	default:
223 		if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
224 			break;
225 		return 0;
226 	}
227 	target = (unsigned long)insn->next_byte + insn->immediate.value;
228 
229 	return (start <= target && target <= start + len);
230 }
231 
232 static int insn_is_indirect_jump(struct insn *insn)
233 {
234 	int ret = __insn_is_indirect_jump(insn);
235 
236 #ifdef CONFIG_RETPOLINE
237 	/*
238 	 * Jump to x86_indirect_thunk_* is treated as an indirect jump.
239 	 * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
240 	 * older gcc may use indirect jump. So we add this check instead of
241 	 * replace indirect-jump check.
242 	 */
243 	if (!ret)
244 		ret = insn_jump_into_range(insn,
245 				(unsigned long)__indirect_thunk_start,
246 				(unsigned long)__indirect_thunk_end -
247 				(unsigned long)__indirect_thunk_start);
248 #endif
249 	return ret;
250 }
251 
252 /* Decode whole function to ensure any instructions don't jump into target */
253 static int can_optimize(unsigned long paddr)
254 {
255 	unsigned long addr, size = 0, offset = 0;
256 	struct insn insn;
257 	kprobe_opcode_t buf[MAX_INSN_SIZE];
258 
259 	/* Lookup symbol including addr */
260 	if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
261 		return 0;
262 
263 	/*
264 	 * Do not optimize in the entry code due to the unstable
265 	 * stack handling and registers setup.
266 	 */
267 	if (((paddr >= (unsigned long)__entry_text_start) &&
268 	     (paddr <  (unsigned long)__entry_text_end)) ||
269 	    ((paddr >= (unsigned long)__irqentry_text_start) &&
270 	     (paddr <  (unsigned long)__irqentry_text_end)))
271 		return 0;
272 
273 	/* Check there is enough space for a relative jump. */
274 	if (size - offset < RELATIVEJUMP_SIZE)
275 		return 0;
276 
277 	/* Decode instructions */
278 	addr = paddr - offset;
279 	while (addr < paddr - offset + size) { /* Decode until function end */
280 		unsigned long recovered_insn;
281 		if (search_exception_tables(addr))
282 			/*
283 			 * Since some fixup code will jumps into this function,
284 			 * we can't optimize kprobe in this function.
285 			 */
286 			return 0;
287 		recovered_insn = recover_probed_instruction(buf, addr);
288 		if (!recovered_insn)
289 			return 0;
290 		kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
291 		insn_get_length(&insn);
292 		/* Another subsystem puts a breakpoint */
293 		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
294 			return 0;
295 		/* Recover address */
296 		insn.kaddr = (void *)addr;
297 		insn.next_byte = (void *)(addr + insn.length);
298 		/* Check any instructions don't jump into target */
299 		if (insn_is_indirect_jump(&insn) ||
300 		    insn_jump_into_range(&insn, paddr + INT3_SIZE,
301 					 RELATIVE_ADDR_SIZE))
302 			return 0;
303 		addr += insn.length;
304 	}
305 
306 	return 1;
307 }
308 
309 /* Check optimized_kprobe can actually be optimized. */
310 int arch_check_optimized_kprobe(struct optimized_kprobe *op)
311 {
312 	int i;
313 	struct kprobe *p;
314 
315 	for (i = 1; i < op->optinsn.size; i++) {
316 		p = get_kprobe(op->kp.addr + i);
317 		if (p && !kprobe_disabled(p))
318 			return -EEXIST;
319 	}
320 
321 	return 0;
322 }
323 
324 /* Check the addr is within the optimized instructions. */
325 int arch_within_optimized_kprobe(struct optimized_kprobe *op,
326 				 unsigned long addr)
327 {
328 	return ((unsigned long)op->kp.addr <= addr &&
329 		(unsigned long)op->kp.addr + op->optinsn.size > addr);
330 }
331 
332 /* Free optimized instruction slot */
333 static
334 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
335 {
336 	if (op->optinsn.insn) {
337 		free_optinsn_slot(op->optinsn.insn, dirty);
338 		op->optinsn.insn = NULL;
339 		op->optinsn.size = 0;
340 	}
341 }
342 
343 void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
344 {
345 	__arch_remove_optimized_kprobe(op, 1);
346 }
347 
348 /*
349  * Copy replacing target instructions
350  * Target instructions MUST be relocatable (checked inside)
351  * This is called when new aggr(opt)probe is allocated or reused.
352  */
353 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
354 				  struct kprobe *__unused)
355 {
356 	u8 *buf = NULL, *slot;
357 	int ret, len;
358 	long rel;
359 
360 	if (!can_optimize((unsigned long)op->kp.addr))
361 		return -EILSEQ;
362 
363 	buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
364 	if (!buf)
365 		return -ENOMEM;
366 
367 	op->optinsn.insn = slot = get_optinsn_slot();
368 	if (!slot) {
369 		ret = -ENOMEM;
370 		goto out;
371 	}
372 
373 	/*
374 	 * Verify if the address gap is in 2GB range, because this uses
375 	 * a relative jump.
376 	 */
377 	rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
378 	if (abs(rel) > 0x7fffffff) {
379 		ret = -ERANGE;
380 		goto err;
381 	}
382 
383 	/* Copy arch-dep-instance from template */
384 	memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
385 
386 	/* Copy instructions into the out-of-line buffer */
387 	ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
388 					  slot + TMPL_END_IDX);
389 	if (ret < 0)
390 		goto err;
391 	op->optinsn.size = ret;
392 	len = TMPL_END_IDX + op->optinsn.size;
393 
394 	/* Set probe information */
395 	synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
396 
397 	/* Set probe function call */
398 	synthesize_relcall(buf + TMPL_CALL_IDX,
399 			   slot + TMPL_CALL_IDX, optimized_callback);
400 
401 	/* Set returning jmp instruction at the tail of out-of-line buffer */
402 	synthesize_reljump(buf + len, slot + len,
403 			   (u8 *)op->kp.addr + op->optinsn.size);
404 	len += RELATIVEJUMP_SIZE;
405 
406 	/* We have to use text_poke for instuction buffer because it is RO */
407 	text_poke(slot, buf, len);
408 	ret = 0;
409 out:
410 	kfree(buf);
411 	return ret;
412 
413 err:
414 	__arch_remove_optimized_kprobe(op, 0);
415 	goto out;
416 }
417 
418 /*
419  * Replace breakpoints (int3) with relative jumps.
420  * Caller must call with locking kprobe_mutex and text_mutex.
421  */
422 void arch_optimize_kprobes(struct list_head *oplist)
423 {
424 	struct optimized_kprobe *op, *tmp;
425 	u8 insn_buf[RELATIVEJUMP_SIZE];
426 
427 	list_for_each_entry_safe(op, tmp, oplist, list) {
428 		s32 rel = (s32)((long)op->optinsn.insn -
429 			((long)op->kp.addr + RELATIVEJUMP_SIZE));
430 
431 		WARN_ON(kprobe_disabled(&op->kp));
432 
433 		/* Backup instructions which will be replaced by jump address */
434 		memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
435 		       RELATIVE_ADDR_SIZE);
436 
437 		insn_buf[0] = RELATIVEJUMP_OPCODE;
438 		*(s32 *)(&insn_buf[1]) = rel;
439 
440 		text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
441 			     op->optinsn.insn);
442 
443 		list_del_init(&op->list);
444 	}
445 }
446 
447 /* Replace a relative jump with a breakpoint (int3).  */
448 void arch_unoptimize_kprobe(struct optimized_kprobe *op)
449 {
450 	u8 insn_buf[RELATIVEJUMP_SIZE];
451 
452 	/* Set int3 to first byte for kprobes */
453 	insn_buf[0] = BREAKPOINT_INSTRUCTION;
454 	memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
455 	text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
456 		     op->optinsn.insn);
457 }
458 
459 /*
460  * Recover original instructions and breakpoints from relative jumps.
461  * Caller must call with locking kprobe_mutex.
462  */
463 extern void arch_unoptimize_kprobes(struct list_head *oplist,
464 				    struct list_head *done_list)
465 {
466 	struct optimized_kprobe *op, *tmp;
467 
468 	list_for_each_entry_safe(op, tmp, oplist, list) {
469 		arch_unoptimize_kprobe(op);
470 		list_move(&op->list, done_list);
471 	}
472 }
473 
474 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
475 {
476 	struct optimized_kprobe *op;
477 
478 	if (p->flags & KPROBE_FLAG_OPTIMIZED) {
479 		/* This kprobe is really able to run optimized path. */
480 		op = container_of(p, struct optimized_kprobe, kp);
481 		/* Detour through copied instructions */
482 		regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
483 		if (!reenter)
484 			reset_current_kprobe();
485 		return 1;
486 	}
487 	return 0;
488 }
489 NOKPROBE_SYMBOL(setup_detour_execution);
490