xref: /freebsd/sys/cddl/dev/kinst/amd64/kinst_isa.c (revision 6fe0a6c80a1aff14236924eb33e4013aa8c14f91)
1 /*
2  * SPDX-License-Identifier: CDDL 1.0
3  *
4  * Copyright 2022 Christos Margiolis <christos@FreeBSD.org>
5  * Copyright 2022 Mark Johnston <markj@FreeBSD.org>
6  */
7 
8 #include <sys/param.h>
9 
10 #include <machine/cpufunc.h>
11 #include <machine/md_var.h>
12 
13 #include <sys/dtrace.h>
14 #include <cddl/dev/dtrace/dtrace_cddl.h>
15 #include <dis_tables.h>
16 
17 #include "kinst.h"
18 
19 #define KINST_PUSHL_RBP		0x55
20 #define KINST_STI		0xfb
21 #define KINST_POPF		0x9d
22 
23 #define KINST_MODRM_MOD(b)	(((b) & 0xc0) >> 6)
24 #define KINST_MODRM_REG(b)	(((b) & 0x38) >> 3)
25 #define KINST_MODRM_RM(b)	((b) & 0x07)
26 
27 #define KINST_SIB_SCALE(s)	(((s) & 0xc0) >> 6)
28 #define KINST_SIB_INDEX(s)	(((s) & 0x38) >> 3)
29 #define KINST_SIB_BASE(s)	(((s) & 0x07) >> 0)
30 
31 #define KINST_REX_W(r)		(((r) & 0x08) >> 3)
32 #define KINST_REX_R(r)		(((r) & 0x04) >> 2)
33 #define KINST_REX_X(r)		(((r) & 0x02) >> 1)
34 #define KINST_REX_B(r)		(((r) & 0x01) >> 0)
35 
36 #define KINST_F_CALL		0x0001	/* instruction is a "call" */
37 #define KINST_F_DIRECT_CALL	0x0002	/* instruction is a direct call */
38 #define KINST_F_RIPREL		0x0004	/* instruction is position-dependent */
39 #define KINST_F_JMP		0x0008	/* instruction is a %rip-relative jmp */
40 #define KINST_F_MOD_DIRECT	0x0010	/* operand is not a memory address */
41 
42 /*
43  * Map ModR/M register bits to a trapframe offset.
44  */
45 static int
46 kinst_regoff(int reg)
47 {
48 #define	_MATCH_REG(i, reg)			\
49 	case i:					\
50 		return (offsetof(struct trapframe, tf_ ## reg) / \
51 		    sizeof(register_t))
52 	switch (reg) {
53 	_MATCH_REG( 0, rax);
54 	_MATCH_REG( 1, rcx);
55 	_MATCH_REG( 2, rdx);
56 	_MATCH_REG( 3, rbx);
57 	_MATCH_REG( 4, rsp); /* SIB when mod != 3 */
58 	_MATCH_REG( 5, rbp);
59 	_MATCH_REG( 6, rsi);
60 	_MATCH_REG( 7, rdi);
61 	_MATCH_REG( 8, r8); /* REX.R is set */
62 	_MATCH_REG( 9, r9);
63 	_MATCH_REG(10, r10);
64 	_MATCH_REG(11, r11);
65 	_MATCH_REG(12, r12);
66 	_MATCH_REG(13, r13);
67 	_MATCH_REG(14, r14);
68 	_MATCH_REG(15, r15);
69 	}
70 #undef _MATCH_REG
71 	panic("%s: unhandled register index %d", __func__, reg);
72 }
73 
74 /*
75  * Obtain the specified register's value.
76  */
77 static uint64_t
78 kinst_regval(struct trapframe *frame, int reg)
79 {
80 	if (reg == -1)
81 		return (0);
82 	return (((register_t *)frame)[kinst_regoff(reg)]);
83 }
84 
85 static uint32_t
86 kinst_riprel_disp(struct kinst_probe *kp, void *dst)
87 {
88 	return ((uint32_t)((intptr_t)kp->kp_patchpoint + kp->kp_md.disp -
89 	    (intptr_t)dst));
90 }
91 
92 static void
93 kinst_trampoline_populate(struct kinst_probe *kp, uint8_t *tramp)
94 {
95 	uint8_t *instr;
96 	uint32_t disp;
97 	int ilen;
98 
99 	ilen = kp->kp_md.tinstlen;
100 
101 	memcpy(tramp, kp->kp_md.template, ilen);
102 	if ((kp->kp_md.flags & KINST_F_RIPREL) != 0) {
103 		disp = kinst_riprel_disp(kp, tramp);
104 		memcpy(&tramp[kp->kp_md.dispoff], &disp, sizeof(uint32_t));
105 	}
106 
107 	/*
108 	 * The following position-independent jmp takes us back to the
109 	 * original code.  It is encoded as "jmp *0(%rip)" (six bytes),
110 	 * followed by the absolute address of the instruction following
111 	 * the one that was traced (eight bytes).
112 	 */
113 	tramp[ilen + 0] = 0xff;
114 	tramp[ilen + 1] = 0x25;
115 	tramp[ilen + 2] = 0x00;
116 	tramp[ilen + 3] = 0x00;
117 	tramp[ilen + 4] = 0x00;
118 	tramp[ilen + 5] = 0x00;
119 	instr = kp->kp_patchpoint + kp->kp_md.instlen;
120 	memcpy(&tramp[ilen + 6], &instr, sizeof(uintptr_t));
121 }
122 
123 int
124 kinst_invop(uintptr_t addr, struct trapframe *frame, uintptr_t scratch)
125 {
126 	solaris_cpu_t *cpu;
127 	uintptr_t *stack, retaddr;
128 	struct kinst_probe *kp;
129 	struct kinst_probe_md *kpmd;
130 	uint8_t *tramp;
131 
132 	stack = (uintptr_t *)frame->tf_rsp;
133 	cpu = &solaris_cpu[curcpu];
134 
135 	LIST_FOREACH(kp, KINST_GETPROBE(addr), kp_hashnext) {
136 		if ((uintptr_t)kp->kp_patchpoint == addr)
137 			break;
138 	}
139 	if (kp == NULL)
140 		return (0);
141 
142 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
143 	cpu->cpu_dtrace_caller = stack[0];
144 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
145 	dtrace_probe(kp->kp_id, 0, 0, 0, 0, 0);
146 	cpu->cpu_dtrace_caller = 0;
147 
148 	kpmd = &kp->kp_md;
149 	if ((kpmd->flags & KINST_F_CALL) != 0) {
150 		/*
151 		 * dtrace_invop_start() reserves space on the stack to
152 		 * store the return address of the call instruction.
153 		 */
154 		retaddr = (uintptr_t)(kp->kp_patchpoint + kpmd->instlen);
155 		*(uintptr_t *)scratch = retaddr;
156 
157 		if ((kpmd->flags & KINST_F_DIRECT_CALL) != 0) {
158 			frame->tf_rip = (uintptr_t)(kp->kp_patchpoint +
159 			    kpmd->disp + kpmd->instlen);
160 		} else {
161 			register_t rval;
162 
163 			if (kpmd->reg1 == -1 && kpmd->reg2 == -1) {
164 				/* rip-relative */
165 				rval = frame->tf_rip - 1 + kpmd->instlen;
166 			} else {
167 				/* indirect */
168 				rval = kinst_regval(frame, kpmd->reg1) +
169 				    (kinst_regval(frame, kpmd->reg2) <<
170 				    kpmd->scale);
171 			}
172 
173 			if ((kpmd->flags & KINST_F_MOD_DIRECT) != 0) {
174 				frame->tf_rip = rval + kpmd->disp;
175 			} else {
176 				frame->tf_rip =
177 				    *(uintptr_t *)(rval + kpmd->disp);
178 			}
179 		}
180 		return (DTRACE_INVOP_CALL);
181 	} else {
182 		tramp = curthread->t_kinst;
183 		if (tramp == NULL) {
184 			/*
185 			 * A trampoline allocation failed, so this probe is
186 			 * effectively disabled.  Restore the original
187 			 * instruction.
188 			 *
189 			 * We can't safely print anything here, but the
190 			 * trampoline allocator should have left a breadcrumb in
191 			 * the dmesg.
192 			 */
193 			kinst_patch_tracepoint(kp, kp->kp_savedval);
194 			frame->tf_rip = (register_t)kp->kp_patchpoint;
195 		} else {
196 			kinst_trampoline_populate(kp, tramp);
197 			frame->tf_rip = (register_t)tramp;
198 		}
199 		return (DTRACE_INVOP_NOP);
200 	}
201 }
202 
203 void
204 kinst_patch_tracepoint(struct kinst_probe *kp, kinst_patchval_t val)
205 {
206 	register_t reg;
207 	int oldwp;
208 
209 	reg = intr_disable();
210 	oldwp = disable_wp();
211 	*kp->kp_patchpoint = val;
212 	restore_wp(oldwp);
213 	intr_restore(reg);
214 }
215 
216 static void
217 kinst_set_disp8(struct kinst_probe *kp, uint8_t byte)
218 {
219 	kp->kp_md.disp = (int64_t)(int8_t)byte;
220 }
221 
222 static void
223 kinst_set_disp32(struct kinst_probe *kp, uint8_t *bytes)
224 {
225 	int32_t disp32;
226 
227 	memcpy(&disp32, bytes, sizeof(disp32));
228 	kp->kp_md.disp = (int64_t)disp32;
229 }
230 
231 static int
232 kinst_dis_get_byte(void *p)
233 {
234 	int ret;
235 	uint8_t **instr = p;
236 
237 	ret = **instr;
238 	(*instr)++;
239 
240 	return (ret);
241 }
242 
243 /*
244  * Set up all of the state needed to faithfully execute a probed instruction.
245  *
246  * In the simple case, we copy the instruction unmodified to a per-thread
247  * trampoline, wherein it is followed by a jump back to the original code.
248  * - Instructions can have %rip as an operand:
249  *   - with %rip-relative addressing encoded in ModR/M, or
250  *   - implicitly as a part of the instruction definition (jmp, call).
251  * - Call instructions (which may be %rip-relative) need to push the correct
252  *   return address onto the stack.
253  *
254  * Call instructions are simple enough to be emulated in software, so we simply
255  * do not use the trampoline mechanism in that case.  kinst_invop() will compute
256  * the branch target using the address info computed here (register operands and
257  * displacement).
258  *
259  * %rip-relative operands encoded using the ModR/M byte always use a 32-bit
260  * displacement; when populating the trampoline the displacement is adjusted to
261  * be relative to the trampoline address.  Trampolines are always allocated
262  * above KERNBASE for this reason.
263  *
264  * For other %rip-relative operands (just jumps) we take the same approach.
265  * Instructions which specify an 8-bit displacement must be rewritten to use a
266  * 32-bit displacement.
267  */
268 static int
269 kinst_instr_dissect(struct kinst_probe *kp, uint8_t **instr)
270 {
271 	struct kinst_probe_md *kpmd;
272 	dis86_t d86;
273 	uint8_t *bytes, modrm, rex;
274 	int dispoff, i, ilen, opcidx;
275 
276 	kpmd = &kp->kp_md;
277 
278 	d86.d86_data = instr;
279 	d86.d86_get_byte = kinst_dis_get_byte;
280 	d86.d86_check_func = NULL;
281 	if (dtrace_disx86(&d86, SIZE64) != 0) {
282 		KINST_LOG("failed to disassemble instruction at: %p", *instr);
283 		return (EINVAL);
284 	}
285 	bytes = d86.d86_bytes;
286 	kpmd->instlen = kpmd->tinstlen = d86.d86_len;
287 
288 	/*
289 	 * Skip over prefixes, save REX.
290 	 */
291 	rex = 0;
292 	for (i = 0; i < kpmd->instlen; i++) {
293 		switch (bytes[i]) {
294 		case 0xf0 ... 0xf3:
295 			/* group 1 */
296 			continue;
297 		case 0x26:
298 		case 0x2e:
299 		case 0x36:
300 		case 0x3e:
301 		case 0x64:
302 		case 0x65:
303 			/* group 2 */
304 			continue;
305 		case 0x66:
306 			/* group 3 */
307 			continue;
308 		case 0x67:
309 			/* group 4 */
310 			continue;
311 		case 0x40 ... 0x4f:
312 			/* REX */
313 			rex = bytes[i];
314 			continue;
315 		}
316 		break;
317 	}
318 	KASSERT(i < kpmd->instlen,
319 	    ("%s: failed to disassemble instruction at %p", __func__, bytes));
320 	opcidx = i;
321 
322 	/*
323 	 * Identify instructions of interest by opcode: calls and jumps.
324 	 * Extract displacements.
325 	 */
326 	dispoff = -1;
327 	switch (bytes[opcidx]) {
328 	case 0x0f:
329 		switch (bytes[opcidx + 1]) {
330 		case 0x80 ... 0x8f:
331 			/* conditional jmp near */
332 			kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
333 			dispoff = opcidx + 2;
334 			kinst_set_disp32(kp, &bytes[dispoff]);
335 			break;
336 		}
337 		break;
338 	case 0xe3:
339 		/*
340 		 * There is no straightforward way to translate this instruction
341 		 * to use a 32-bit displacement.  Fortunately, it is rarely
342 		 * used.
343 		 */
344 		return (EINVAL);
345 	case 0x70 ... 0x7f:
346 		/* conditional jmp short */
347 		kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
348 		dispoff = opcidx + 1;
349 		kinst_set_disp8(kp, bytes[dispoff]);
350 		break;
351 	case 0xe9:
352 		/* unconditional jmp near */
353 		kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
354 		dispoff = opcidx + 1;
355 		kinst_set_disp32(kp, &bytes[dispoff]);
356 		break;
357 	case 0xeb:
358 		/* unconditional jmp short */
359 		kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
360 		dispoff = opcidx + 1;
361 		kinst_set_disp8(kp, bytes[dispoff]);
362 		break;
363 	case 0xe8:
364 	case 0x9a:
365 		/* direct call */
366 		kpmd->flags |= KINST_F_CALL | KINST_F_DIRECT_CALL;
367 		dispoff = opcidx + 1;
368 		kinst_set_disp32(kp, &bytes[dispoff]);
369 		break;
370 	case 0xff:
371 		KASSERT(d86.d86_got_modrm,
372 		    ("no ModR/M byte for instr at %p", *instr - kpmd->instlen));
373 		switch (KINST_MODRM_REG(bytes[d86.d86_rmindex])) {
374 		case 0x02:
375 		case 0x03:
376 			/* indirect call */
377 			kpmd->flags |= KINST_F_CALL;
378 			break;
379 		case 0x04:
380 		case 0x05:
381 			/* indirect jump */
382 			kpmd->flags |= KINST_F_JMP;
383 			break;
384 		}
385 	}
386 
387 	/*
388 	 * If there's a ModR/M byte, we need to check it to see if the operand
389 	 * is %rip-relative, and rewrite the displacement if so.  If not, we
390 	 * might still have to extract operand info if this is a call
391 	 * instruction.
392 	 */
393 	if (d86.d86_got_modrm) {
394 		uint8_t mod, rm, sib;
395 
396 		kpmd->reg1 = kpmd->reg2 = -1;
397 
398 		modrm = bytes[d86.d86_rmindex];
399 		mod = KINST_MODRM_MOD(modrm);
400 		rm = KINST_MODRM_RM(modrm);
401 		if (mod == 0 && rm == 5) {
402 			kpmd->flags |= KINST_F_RIPREL;
403 			dispoff = d86.d86_rmindex + 1;
404 			kinst_set_disp32(kp, &bytes[dispoff]);
405 		} else if ((kpmd->flags & KINST_F_CALL) != 0) {
406 			bool havesib;
407 
408 			havesib = (mod != 3 && rm == 4);
409 			dispoff = d86.d86_rmindex + (havesib ? 2 : 1);
410 			if (mod == 1)
411 				kinst_set_disp8(kp, bytes[dispoff]);
412 			else if (mod == 2)
413 				kinst_set_disp32(kp, &bytes[dispoff]);
414 			else if (mod == 3)
415 				kpmd->flags |= KINST_F_MOD_DIRECT;
416 
417 			if (havesib) {
418 				sib = bytes[d86.d86_rmindex + 1];
419 				if (KINST_SIB_BASE(sib) != 5) {
420 					kpmd->reg1 = KINST_SIB_BASE(sib) |
421 					    (KINST_REX_B(rex) << 3);
422 				}
423 				kpmd->scale = KINST_SIB_SCALE(sib);
424 				kpmd->reg2 = KINST_SIB_INDEX(sib) |
425 				    (KINST_REX_X(rex) << 3);
426 			} else {
427 				kpmd->reg1 = rm | (KINST_REX_B(rex) << 3);
428 			}
429 		}
430 	}
431 
432 	/*
433 	 * Calls are emulated in software; once operands are decoded we have
434 	 * nothing else to do.
435 	 */
436 	if ((kpmd->flags & KINST_F_CALL) != 0)
437 		return (0);
438 
439 	/*
440 	 * Allocate and populate an instruction trampoline template.
441 	 *
442 	 * Position-independent instructions can simply be copied, but
443 	 * position-dependent instructions require some surgery: jump
444 	 * instructions with an 8-bit displacement need to be converted to use a
445 	 * 32-bit displacement, and the adjusted displacement needs to be
446 	 * computed.
447 	 */
448 	ilen = kpmd->instlen;
449 	if ((kpmd->flags & KINST_F_RIPREL) != 0) {
450 		if ((kpmd->flags & KINST_F_JMP) == 0 ||
451 		    bytes[opcidx] == 0x0f ||
452 		    bytes[opcidx] == 0xe9 ||
453 		    bytes[opcidx] == 0xff) {
454 			memcpy(kpmd->template, bytes, dispoff);
455 			memcpy(&kpmd->template[dispoff + 4],
456 			    &bytes[dispoff + 4], ilen - (dispoff + 4));
457 			kpmd->dispoff = dispoff;
458 		} else if (bytes[opcidx] == 0xeb) {
459 			memcpy(kpmd->template, bytes, opcidx);
460 			kpmd->template[opcidx] = 0xe9;
461 			kpmd->dispoff = opcidx + 1;
462 
463 			/* Instruction length changes from 2 to 5. */
464 			kpmd->tinstlen = 5;
465 			kpmd->disp -= 3;
466 		} else if (bytes[opcidx] >= 0x70 && bytes[opcidx] <= 0x7f)  {
467 			memcpy(kpmd->template, bytes, opcidx);
468 			kpmd->template[opcidx] = 0x0f;
469 			kpmd->template[opcidx + 1] = bytes[opcidx] + 0x10;
470 			kpmd->dispoff = opcidx + 2;
471 
472 			/* Instruction length changes from 2 to 6. */
473 			kpmd->tinstlen = 6;
474 			kpmd->disp -= 4;
475 		} else {
476 			panic("unhandled opcode %#x", bytes[opcidx]);
477 		}
478 	} else {
479 		memcpy(kpmd->template, bytes, ilen);
480 	}
481 
482 	return (0);
483 }
484 
485 int
486 kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval,
487     void *opaque)
488 {
489 	struct kinst_probe *kp;
490 	dtrace_kinst_probedesc_t *pd;
491 	const char *func;
492 	int error, n, off;
493 	uint8_t *instr, *limit;
494 
495 	pd = opaque;
496 	func = symval->name;
497 	if (strcmp(func, pd->kpd_func) != 0 || strcmp(func, "trap_check") == 0)
498 		return (0);
499 
500 	instr = (uint8_t *)symval->value;
501 	limit = (uint8_t *)symval->value + symval->size;
502 	if (instr >= limit)
503 		return (0);
504 
505 	/*
506 	 * Ignore functions not beginning with the usual function prologue.
507 	 * These might correspond to assembly routines with which we should not
508 	 * meddle.
509 	 */
510 	if (*instr != KINST_PUSHL_RBP)
511 		return (0);
512 
513 	n = 0;
514 	while (instr < limit) {
515 		off = (int)(instr - (uint8_t *)symval->value);
516 		if (pd->kpd_off != -1 && off != pd->kpd_off) {
517 			instr += dtrace_instr_size(instr);
518 			continue;
519 		}
520 
521 		/*
522 		 * Prevent separate dtrace(1) instances from creating copies of
523 		 * the same probe.
524 		 */
525 		LIST_FOREACH(kp, KINST_GETPROBE(instr), kp_hashnext) {
526 			if (strcmp(kp->kp_func, func) == 0 &&
527 			    strtol(kp->kp_name, NULL, 10) == off)
528 				return (0);
529 		}
530 		if (++n > KINST_PROBETAB_MAX) {
531 			KINST_LOG("probe list full: %d entries", n);
532 			return (ENOMEM);
533 		}
534 		kp = malloc(sizeof(struct kinst_probe), M_KINST,
535 		    M_WAITOK | M_ZERO);
536 		kp->kp_func = func;
537 		snprintf(kp->kp_name, sizeof(kp->kp_name), "%d", off);
538 		kp->kp_savedval = *instr;
539 		kp->kp_patchval = KINST_PATCHVAL;
540 		kp->kp_patchpoint = instr;
541 
542 		error = kinst_instr_dissect(kp, &instr);
543 		if (error != 0)
544 			return (error);
545 
546 		kinst_probe_create(kp, lf);
547 	}
548 
549 	return (0);
550 }
551