1f0bc4ed1SChristos Margiolis /*
2f0bc4ed1SChristos Margiolis * SPDX-License-Identifier: CDDL 1.0
3f0bc4ed1SChristos Margiolis *
49310bf54SChristos Margiolis * Copyright (c) 2022 Christos Margiolis <christos@FreeBSD.org>
59310bf54SChristos Margiolis * Copyright (c) 2022 Mark Johnston <markj@FreeBSD.org>
69310bf54SChristos Margiolis * Copyright (c) 2023 The FreeBSD Foundation
79310bf54SChristos Margiolis *
89310bf54SChristos Margiolis * Portions of this software were developed by Christos Margiolis
99310bf54SChristos Margiolis * <christos@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
10f0bc4ed1SChristos Margiolis */
11f0bc4ed1SChristos Margiolis
12f0bc4ed1SChristos Margiolis #include <sys/param.h>
1384d7fe4aSMark Johnston #include <sys/pcpu.h>
14f0bc4ed1SChristos Margiolis
15f0bc4ed1SChristos Margiolis #include <machine/cpufunc.h>
16f0bc4ed1SChristos Margiolis #include <machine/md_var.h>
17f0bc4ed1SChristos Margiolis
18f0bc4ed1SChristos Margiolis #include <sys/dtrace.h>
19f0bc4ed1SChristos Margiolis #include <cddl/dev/dtrace/dtrace_cddl.h>
20f0bc4ed1SChristos Margiolis #include <dis_tables.h>
21f0bc4ed1SChristos Margiolis
22f0bc4ed1SChristos Margiolis #include "kinst.h"
23f0bc4ed1SChristos Margiolis
24f0bc4ed1SChristos Margiolis #define KINST_PUSHL_RBP 0x55
25f0bc4ed1SChristos Margiolis #define KINST_STI 0xfb
26f0bc4ed1SChristos Margiolis #define KINST_POPF 0x9d
27f0bc4ed1SChristos Margiolis
28f0bc4ed1SChristos Margiolis #define KINST_MODRM_MOD(b) (((b) & 0xc0) >> 6)
29f0bc4ed1SChristos Margiolis #define KINST_MODRM_REG(b) (((b) & 0x38) >> 3)
30f0bc4ed1SChristos Margiolis #define KINST_MODRM_RM(b) ((b) & 0x07)
31f0bc4ed1SChristos Margiolis
32f0bc4ed1SChristos Margiolis #define KINST_SIB_SCALE(s) (((s) & 0xc0) >> 6)
33f0bc4ed1SChristos Margiolis #define KINST_SIB_INDEX(s) (((s) & 0x38) >> 3)
34f0bc4ed1SChristos Margiolis #define KINST_SIB_BASE(s) (((s) & 0x07) >> 0)
35f0bc4ed1SChristos Margiolis
36f0bc4ed1SChristos Margiolis #define KINST_REX_W(r) (((r) & 0x08) >> 3)
37f0bc4ed1SChristos Margiolis #define KINST_REX_R(r) (((r) & 0x04) >> 2)
38f0bc4ed1SChristos Margiolis #define KINST_REX_X(r) (((r) & 0x02) >> 1)
39f0bc4ed1SChristos Margiolis #define KINST_REX_B(r) (((r) & 0x01) >> 0)
40f0bc4ed1SChristos Margiolis
41f0bc4ed1SChristos Margiolis #define KINST_F_CALL 0x0001 /* instruction is a "call" */
42f0bc4ed1SChristos Margiolis #define KINST_F_DIRECT_CALL 0x0002 /* instruction is a direct call */
43f0bc4ed1SChristos Margiolis #define KINST_F_RIPREL 0x0004 /* instruction is position-dependent */
44f0bc4ed1SChristos Margiolis #define KINST_F_JMP 0x0008 /* instruction is a %rip-relative jmp */
45f0bc4ed1SChristos Margiolis #define KINST_F_MOD_DIRECT 0x0010 /* operand is not a memory address */
46f0bc4ed1SChristos Margiolis
47f0bc4ed1SChristos Margiolis /*
4884d7fe4aSMark Johnston * Per-CPU trampolines used when the interrupted thread is executing with
4984d7fe4aSMark Johnston * interrupts disabled. If an interrupt is raised while executing a trampoline,
5084d7fe4aSMark Johnston * the interrupt thread cannot safely overwrite its trampoline if it hits a
5184d7fe4aSMark Johnston * kinst probe while executing the interrupt handler.
5284d7fe4aSMark Johnston */
5384d7fe4aSMark Johnston DPCPU_DEFINE_STATIC(uint8_t *, intr_tramp);
5484d7fe4aSMark Johnston
5584d7fe4aSMark Johnston /*
56f0bc4ed1SChristos Margiolis * Map ModR/M register bits to a trapframe offset.
57f0bc4ed1SChristos Margiolis */
58f0bc4ed1SChristos Margiolis static int
kinst_regoff(int reg)59f0bc4ed1SChristos Margiolis kinst_regoff(int reg)
60f0bc4ed1SChristos Margiolis {
61f0bc4ed1SChristos Margiolis #define _MATCH_REG(i, reg) \
62f0bc4ed1SChristos Margiolis case i: \
63f0bc4ed1SChristos Margiolis return (offsetof(struct trapframe, tf_ ## reg) / \
64f0bc4ed1SChristos Margiolis sizeof(register_t))
65f0bc4ed1SChristos Margiolis switch (reg) {
66f0bc4ed1SChristos Margiolis _MATCH_REG( 0, rax);
67f0bc4ed1SChristos Margiolis _MATCH_REG( 1, rcx);
68f0bc4ed1SChristos Margiolis _MATCH_REG( 2, rdx);
69f0bc4ed1SChristos Margiolis _MATCH_REG( 3, rbx);
70f0bc4ed1SChristos Margiolis _MATCH_REG( 4, rsp); /* SIB when mod != 3 */
71f0bc4ed1SChristos Margiolis _MATCH_REG( 5, rbp);
72f0bc4ed1SChristos Margiolis _MATCH_REG( 6, rsi);
73f0bc4ed1SChristos Margiolis _MATCH_REG( 7, rdi);
74f0bc4ed1SChristos Margiolis _MATCH_REG( 8, r8); /* REX.R is set */
75f0bc4ed1SChristos Margiolis _MATCH_REG( 9, r9);
76f0bc4ed1SChristos Margiolis _MATCH_REG(10, r10);
77f0bc4ed1SChristos Margiolis _MATCH_REG(11, r11);
78f0bc4ed1SChristos Margiolis _MATCH_REG(12, r12);
79f0bc4ed1SChristos Margiolis _MATCH_REG(13, r13);
80f0bc4ed1SChristos Margiolis _MATCH_REG(14, r14);
81f0bc4ed1SChristos Margiolis _MATCH_REG(15, r15);
82f0bc4ed1SChristos Margiolis }
83f0bc4ed1SChristos Margiolis #undef _MATCH_REG
84f0bc4ed1SChristos Margiolis panic("%s: unhandled register index %d", __func__, reg);
85f0bc4ed1SChristos Margiolis }
86f0bc4ed1SChristos Margiolis
87f0bc4ed1SChristos Margiolis /*
88f0bc4ed1SChristos Margiolis * Obtain the specified register's value.
89f0bc4ed1SChristos Margiolis */
90f0bc4ed1SChristos Margiolis static uint64_t
kinst_regval(struct trapframe * frame,int reg)91f0bc4ed1SChristos Margiolis kinst_regval(struct trapframe *frame, int reg)
92f0bc4ed1SChristos Margiolis {
93f0bc4ed1SChristos Margiolis if (reg == -1)
94f0bc4ed1SChristos Margiolis return (0);
95f0bc4ed1SChristos Margiolis return (((register_t *)frame)[kinst_regoff(reg)]);
96f0bc4ed1SChristos Margiolis }
97f0bc4ed1SChristos Margiolis
98f0bc4ed1SChristos Margiolis static uint32_t
kinst_riprel_disp(struct kinst_probe * kp,void * dst)99f0bc4ed1SChristos Margiolis kinst_riprel_disp(struct kinst_probe *kp, void *dst)
100f0bc4ed1SChristos Margiolis {
101f0bc4ed1SChristos Margiolis return ((uint32_t)((intptr_t)kp->kp_patchpoint + kp->kp_md.disp -
102f0bc4ed1SChristos Margiolis (intptr_t)dst));
103f0bc4ed1SChristos Margiolis }
104f0bc4ed1SChristos Margiolis
105f0bc4ed1SChristos Margiolis static void
kinst_trampoline_populate(struct kinst_probe * kp,uint8_t * tramp)106f0bc4ed1SChristos Margiolis kinst_trampoline_populate(struct kinst_probe *kp, uint8_t *tramp)
107f0bc4ed1SChristos Margiolis {
108f0bc4ed1SChristos Margiolis uint8_t *instr;
109f0bc4ed1SChristos Margiolis uint32_t disp;
110f0bc4ed1SChristos Margiolis int ilen;
111f0bc4ed1SChristos Margiolis
112f0bc4ed1SChristos Margiolis ilen = kp->kp_md.tinstlen;
113f0bc4ed1SChristos Margiolis
1145c134fbaSChristos Margiolis kinst_memcpy(tramp, kp->kp_md.template, ilen);
115f0bc4ed1SChristos Margiolis if ((kp->kp_md.flags & KINST_F_RIPREL) != 0) {
116f0bc4ed1SChristos Margiolis disp = kinst_riprel_disp(kp, tramp);
1175c134fbaSChristos Margiolis kinst_memcpy(&tramp[kp->kp_md.dispoff], &disp, sizeof(uint32_t));
118f0bc4ed1SChristos Margiolis }
119f0bc4ed1SChristos Margiolis
120f0bc4ed1SChristos Margiolis /*
121f0bc4ed1SChristos Margiolis * The following position-independent jmp takes us back to the
122f0bc4ed1SChristos Margiolis * original code. It is encoded as "jmp *0(%rip)" (six bytes),
123f0bc4ed1SChristos Margiolis * followed by the absolute address of the instruction following
124f0bc4ed1SChristos Margiolis * the one that was traced (eight bytes).
125f0bc4ed1SChristos Margiolis */
126f0bc4ed1SChristos Margiolis tramp[ilen + 0] = 0xff;
127f0bc4ed1SChristos Margiolis tramp[ilen + 1] = 0x25;
128f0bc4ed1SChristos Margiolis tramp[ilen + 2] = 0x00;
129f0bc4ed1SChristos Margiolis tramp[ilen + 3] = 0x00;
130f0bc4ed1SChristos Margiolis tramp[ilen + 4] = 0x00;
131f0bc4ed1SChristos Margiolis tramp[ilen + 5] = 0x00;
132f0bc4ed1SChristos Margiolis instr = kp->kp_patchpoint + kp->kp_md.instlen;
1335c134fbaSChristos Margiolis kinst_memcpy(&tramp[ilen + 6], &instr, sizeof(uintptr_t));
134f0bc4ed1SChristos Margiolis }
135f0bc4ed1SChristos Margiolis
136f0bc4ed1SChristos Margiolis int
kinst_invop(uintptr_t addr,struct trapframe * frame,uintptr_t scratch)137f0bc4ed1SChristos Margiolis kinst_invop(uintptr_t addr, struct trapframe *frame, uintptr_t scratch)
138f0bc4ed1SChristos Margiolis {
139f0bc4ed1SChristos Margiolis solaris_cpu_t *cpu;
140f0bc4ed1SChristos Margiolis uintptr_t *stack, retaddr;
141f0bc4ed1SChristos Margiolis struct kinst_probe *kp;
142f0bc4ed1SChristos Margiolis struct kinst_probe_md *kpmd;
143f0bc4ed1SChristos Margiolis uint8_t *tramp;
144f0bc4ed1SChristos Margiolis
145f0bc4ed1SChristos Margiolis stack = (uintptr_t *)frame->tf_rsp;
146f0bc4ed1SChristos Margiolis cpu = &solaris_cpu[curcpu];
147f0bc4ed1SChristos Margiolis
148f0bc4ed1SChristos Margiolis LIST_FOREACH(kp, KINST_GETPROBE(addr), kp_hashnext) {
149f0bc4ed1SChristos Margiolis if ((uintptr_t)kp->kp_patchpoint == addr)
150f0bc4ed1SChristos Margiolis break;
151f0bc4ed1SChristos Margiolis }
152f0bc4ed1SChristos Margiolis if (kp == NULL)
153f0bc4ed1SChristos Margiolis return (0);
154f0bc4ed1SChristos Margiolis
1550e69c959SMark Johnston /*
1560e69c959SMark Johnston * Report the address of the breakpoint for the benefit of consumers
1570e69c959SMark Johnston * fetching register values with regs[].
1580e69c959SMark Johnston */
1590e69c959SMark Johnston frame->tf_rip--;
1600e69c959SMark Johnston
161f0bc4ed1SChristos Margiolis DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
162f0bc4ed1SChristos Margiolis cpu->cpu_dtrace_caller = stack[0];
163f0bc4ed1SChristos Margiolis DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
164f0bc4ed1SChristos Margiolis dtrace_probe(kp->kp_id, 0, 0, 0, 0, 0);
165f0bc4ed1SChristos Margiolis cpu->cpu_dtrace_caller = 0;
166f0bc4ed1SChristos Margiolis
167f0bc4ed1SChristos Margiolis kpmd = &kp->kp_md;
168f0bc4ed1SChristos Margiolis if ((kpmd->flags & KINST_F_CALL) != 0) {
169f0bc4ed1SChristos Margiolis /*
170f0bc4ed1SChristos Margiolis * dtrace_invop_start() reserves space on the stack to
171f0bc4ed1SChristos Margiolis * store the return address of the call instruction.
172f0bc4ed1SChristos Margiolis */
173f0bc4ed1SChristos Margiolis retaddr = (uintptr_t)(kp->kp_patchpoint + kpmd->instlen);
174f0bc4ed1SChristos Margiolis *(uintptr_t *)scratch = retaddr;
175f0bc4ed1SChristos Margiolis
176f0bc4ed1SChristos Margiolis if ((kpmd->flags & KINST_F_DIRECT_CALL) != 0) {
177f0bc4ed1SChristos Margiolis frame->tf_rip = (uintptr_t)(kp->kp_patchpoint +
178f0bc4ed1SChristos Margiolis kpmd->disp + kpmd->instlen);
179f0bc4ed1SChristos Margiolis } else {
180f0bc4ed1SChristos Margiolis register_t rval;
181f0bc4ed1SChristos Margiolis
182f0bc4ed1SChristos Margiolis if (kpmd->reg1 == -1 && kpmd->reg2 == -1) {
183f0bc4ed1SChristos Margiolis /* rip-relative */
1840e69c959SMark Johnston rval = frame->tf_rip + kpmd->instlen;
185f0bc4ed1SChristos Margiolis } else {
186f0bc4ed1SChristos Margiolis /* indirect */
187f0bc4ed1SChristos Margiolis rval = kinst_regval(frame, kpmd->reg1) +
188f0bc4ed1SChristos Margiolis (kinst_regval(frame, kpmd->reg2) <<
189f0bc4ed1SChristos Margiolis kpmd->scale);
190f0bc4ed1SChristos Margiolis }
191f0bc4ed1SChristos Margiolis
192f0bc4ed1SChristos Margiolis if ((kpmd->flags & KINST_F_MOD_DIRECT) != 0) {
193f0bc4ed1SChristos Margiolis frame->tf_rip = rval + kpmd->disp;
194f0bc4ed1SChristos Margiolis } else {
195f0bc4ed1SChristos Margiolis frame->tf_rip =
196f0bc4ed1SChristos Margiolis *(uintptr_t *)(rval + kpmd->disp);
197f0bc4ed1SChristos Margiolis }
198f0bc4ed1SChristos Margiolis }
199f0bc4ed1SChristos Margiolis return (DTRACE_INVOP_CALL);
200f0bc4ed1SChristos Margiolis } else {
20184d7fe4aSMark Johnston if ((frame->tf_rflags & PSL_I) == 0)
20284d7fe4aSMark Johnston tramp = DPCPU_GET(intr_tramp);
20384d7fe4aSMark Johnston else
2041aa48621SChristos Margiolis tramp = curthread->t_kinst_tramp;
205f0bc4ed1SChristos Margiolis if (tramp == NULL) {
206f0bc4ed1SChristos Margiolis /*
207f0bc4ed1SChristos Margiolis * A trampoline allocation failed, so this probe is
208f0bc4ed1SChristos Margiolis * effectively disabled. Restore the original
209f0bc4ed1SChristos Margiolis * instruction.
210f0bc4ed1SChristos Margiolis *
211f0bc4ed1SChristos Margiolis * We can't safely print anything here, but the
212f0bc4ed1SChristos Margiolis * trampoline allocator should have left a breadcrumb in
213f0bc4ed1SChristos Margiolis * the dmesg.
214f0bc4ed1SChristos Margiolis */
215f0bc4ed1SChristos Margiolis kinst_patch_tracepoint(kp, kp->kp_savedval);
216f0bc4ed1SChristos Margiolis frame->tf_rip = (register_t)kp->kp_patchpoint;
217f0bc4ed1SChristos Margiolis } else {
218f0bc4ed1SChristos Margiolis kinst_trampoline_populate(kp, tramp);
219f0bc4ed1SChristos Margiolis frame->tf_rip = (register_t)tramp;
220f0bc4ed1SChristos Margiolis }
221f0bc4ed1SChristos Margiolis return (DTRACE_INVOP_NOP);
222f0bc4ed1SChristos Margiolis }
223f0bc4ed1SChristos Margiolis }
224f0bc4ed1SChristos Margiolis
225f0bc4ed1SChristos Margiolis void
kinst_patch_tracepoint(struct kinst_probe * kp,kinst_patchval_t val)226f0bc4ed1SChristos Margiolis kinst_patch_tracepoint(struct kinst_probe *kp, kinst_patchval_t val)
227f0bc4ed1SChristos Margiolis {
228f0bc4ed1SChristos Margiolis register_t reg;
229f0bc4ed1SChristos Margiolis int oldwp;
230f0bc4ed1SChristos Margiolis
231f0bc4ed1SChristos Margiolis reg = intr_disable();
232f0bc4ed1SChristos Margiolis oldwp = disable_wp();
233f0bc4ed1SChristos Margiolis *kp->kp_patchpoint = val;
234f0bc4ed1SChristos Margiolis restore_wp(oldwp);
235f0bc4ed1SChristos Margiolis intr_restore(reg);
236f0bc4ed1SChristos Margiolis }
237f0bc4ed1SChristos Margiolis
238f0bc4ed1SChristos Margiolis static void
kinst_set_disp8(struct kinst_probe * kp,uint8_t byte)239f0bc4ed1SChristos Margiolis kinst_set_disp8(struct kinst_probe *kp, uint8_t byte)
240f0bc4ed1SChristos Margiolis {
241f0bc4ed1SChristos Margiolis kp->kp_md.disp = (int64_t)(int8_t)byte;
242f0bc4ed1SChristos Margiolis }
243f0bc4ed1SChristos Margiolis
244f0bc4ed1SChristos Margiolis static void
kinst_set_disp32(struct kinst_probe * kp,uint8_t * bytes)245f0bc4ed1SChristos Margiolis kinst_set_disp32(struct kinst_probe *kp, uint8_t *bytes)
246f0bc4ed1SChristos Margiolis {
247f0bc4ed1SChristos Margiolis int32_t disp32;
248f0bc4ed1SChristos Margiolis
249f0bc4ed1SChristos Margiolis memcpy(&disp32, bytes, sizeof(disp32));
250f0bc4ed1SChristos Margiolis kp->kp_md.disp = (int64_t)disp32;
251f0bc4ed1SChristos Margiolis }
252f0bc4ed1SChristos Margiolis
253f0bc4ed1SChristos Margiolis /*
254f0bc4ed1SChristos Margiolis * Set up all of the state needed to faithfully execute a probed instruction.
255f0bc4ed1SChristos Margiolis *
256f0bc4ed1SChristos Margiolis * In the simple case, we copy the instruction unmodified to a per-thread
257f0bc4ed1SChristos Margiolis * trampoline, wherein it is followed by a jump back to the original code.
258f0bc4ed1SChristos Margiolis * - Instructions can have %rip as an operand:
259f0bc4ed1SChristos Margiolis * - with %rip-relative addressing encoded in ModR/M, or
260f0bc4ed1SChristos Margiolis * - implicitly as a part of the instruction definition (jmp, call).
261f0bc4ed1SChristos Margiolis * - Call instructions (which may be %rip-relative) need to push the correct
262f0bc4ed1SChristos Margiolis * return address onto the stack.
263f0bc4ed1SChristos Margiolis *
264f0bc4ed1SChristos Margiolis * Call instructions are simple enough to be emulated in software, so we simply
265f0bc4ed1SChristos Margiolis * do not use the trampoline mechanism in that case. kinst_invop() will compute
266f0bc4ed1SChristos Margiolis * the branch target using the address info computed here (register operands and
267f0bc4ed1SChristos Margiolis * displacement).
268f0bc4ed1SChristos Margiolis *
269f0bc4ed1SChristos Margiolis * %rip-relative operands encoded using the ModR/M byte always use a 32-bit
270f0bc4ed1SChristos Margiolis * displacement; when populating the trampoline the displacement is adjusted to
271f0bc4ed1SChristos Margiolis * be relative to the trampoline address. Trampolines are always allocated
272f0bc4ed1SChristos Margiolis * above KERNBASE for this reason.
273f0bc4ed1SChristos Margiolis *
274f0bc4ed1SChristos Margiolis * For other %rip-relative operands (just jumps) we take the same approach.
275f0bc4ed1SChristos Margiolis * Instructions which specify an 8-bit displacement must be rewritten to use a
276f0bc4ed1SChristos Margiolis * 32-bit displacement.
277f0bc4ed1SChristos Margiolis */
278f0bc4ed1SChristos Margiolis static int
kinst_instr_dissect(struct kinst_probe * kp,uint8_t ** instr)279f0bc4ed1SChristos Margiolis kinst_instr_dissect(struct kinst_probe *kp, uint8_t **instr)
280f0bc4ed1SChristos Margiolis {
281f0bc4ed1SChristos Margiolis struct kinst_probe_md *kpmd;
282f0bc4ed1SChristos Margiolis dis86_t d86;
283f0bc4ed1SChristos Margiolis uint8_t *bytes, modrm, rex;
284f0bc4ed1SChristos Margiolis int dispoff, i, ilen, opcidx;
285f0bc4ed1SChristos Margiolis
286f0bc4ed1SChristos Margiolis kpmd = &kp->kp_md;
287f0bc4ed1SChristos Margiolis
288f0bc4ed1SChristos Margiolis d86.d86_data = instr;
289ff624eb6SChristos Margiolis d86.d86_get_byte = dtrace_dis_get_byte;
290f0bc4ed1SChristos Margiolis d86.d86_check_func = NULL;
291f0bc4ed1SChristos Margiolis if (dtrace_disx86(&d86, SIZE64) != 0) {
292f0bc4ed1SChristos Margiolis KINST_LOG("failed to disassemble instruction at: %p", *instr);
293f0bc4ed1SChristos Margiolis return (EINVAL);
294f0bc4ed1SChristos Margiolis }
295f0bc4ed1SChristos Margiolis bytes = d86.d86_bytes;
296f0bc4ed1SChristos Margiolis kpmd->instlen = kpmd->tinstlen = d86.d86_len;
297f0bc4ed1SChristos Margiolis
298f0bc4ed1SChristos Margiolis /*
299f0bc4ed1SChristos Margiolis * Skip over prefixes, save REX.
300f0bc4ed1SChristos Margiolis */
301f0bc4ed1SChristos Margiolis rex = 0;
302f0bc4ed1SChristos Margiolis for (i = 0; i < kpmd->instlen; i++) {
303f0bc4ed1SChristos Margiolis switch (bytes[i]) {
304f0bc4ed1SChristos Margiolis case 0xf0 ... 0xf3:
305f0bc4ed1SChristos Margiolis /* group 1 */
306f0bc4ed1SChristos Margiolis continue;
307f0bc4ed1SChristos Margiolis case 0x26:
308f0bc4ed1SChristos Margiolis case 0x2e:
309f0bc4ed1SChristos Margiolis case 0x36:
310f0bc4ed1SChristos Margiolis case 0x3e:
311f0bc4ed1SChristos Margiolis case 0x64:
312f0bc4ed1SChristos Margiolis case 0x65:
313f0bc4ed1SChristos Margiolis /* group 2 */
314f0bc4ed1SChristos Margiolis continue;
315f0bc4ed1SChristos Margiolis case 0x66:
316f0bc4ed1SChristos Margiolis /* group 3 */
317f0bc4ed1SChristos Margiolis continue;
318f0bc4ed1SChristos Margiolis case 0x67:
319f0bc4ed1SChristos Margiolis /* group 4 */
320f0bc4ed1SChristos Margiolis continue;
321f0bc4ed1SChristos Margiolis case 0x40 ... 0x4f:
322f0bc4ed1SChristos Margiolis /* REX */
323f0bc4ed1SChristos Margiolis rex = bytes[i];
324f0bc4ed1SChristos Margiolis continue;
325f0bc4ed1SChristos Margiolis }
326f0bc4ed1SChristos Margiolis break;
327f0bc4ed1SChristos Margiolis }
328f0bc4ed1SChristos Margiolis KASSERT(i < kpmd->instlen,
329f0bc4ed1SChristos Margiolis ("%s: failed to disassemble instruction at %p", __func__, bytes));
330f0bc4ed1SChristos Margiolis opcidx = i;
331f0bc4ed1SChristos Margiolis
332f0bc4ed1SChristos Margiolis /*
333f0bc4ed1SChristos Margiolis * Identify instructions of interest by opcode: calls and jumps.
334f0bc4ed1SChristos Margiolis * Extract displacements.
335f0bc4ed1SChristos Margiolis */
336f0bc4ed1SChristos Margiolis dispoff = -1;
337f0bc4ed1SChristos Margiolis switch (bytes[opcidx]) {
338f0bc4ed1SChristos Margiolis case 0x0f:
339f0bc4ed1SChristos Margiolis switch (bytes[opcidx + 1]) {
340f0bc4ed1SChristos Margiolis case 0x80 ... 0x8f:
341f0bc4ed1SChristos Margiolis /* conditional jmp near */
342f0bc4ed1SChristos Margiolis kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
343f0bc4ed1SChristos Margiolis dispoff = opcidx + 2;
344f0bc4ed1SChristos Margiolis kinst_set_disp32(kp, &bytes[dispoff]);
345f0bc4ed1SChristos Margiolis break;
346f0bc4ed1SChristos Margiolis }
347f0bc4ed1SChristos Margiolis break;
348f0bc4ed1SChristos Margiolis case 0xe3:
349f0bc4ed1SChristos Margiolis /*
350f0bc4ed1SChristos Margiolis * There is no straightforward way to translate this instruction
351f0bc4ed1SChristos Margiolis * to use a 32-bit displacement. Fortunately, it is rarely
352f0bc4ed1SChristos Margiolis * used.
353f0bc4ed1SChristos Margiolis */
354f0bc4ed1SChristos Margiolis return (EINVAL);
355f0bc4ed1SChristos Margiolis case 0x70 ... 0x7f:
356f0bc4ed1SChristos Margiolis /* conditional jmp short */
357f0bc4ed1SChristos Margiolis kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
358f0bc4ed1SChristos Margiolis dispoff = opcidx + 1;
359f0bc4ed1SChristos Margiolis kinst_set_disp8(kp, bytes[dispoff]);
360f0bc4ed1SChristos Margiolis break;
361f0bc4ed1SChristos Margiolis case 0xe9:
362f0bc4ed1SChristos Margiolis /* unconditional jmp near */
363f0bc4ed1SChristos Margiolis kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
364f0bc4ed1SChristos Margiolis dispoff = opcidx + 1;
365f0bc4ed1SChristos Margiolis kinst_set_disp32(kp, &bytes[dispoff]);
366f0bc4ed1SChristos Margiolis break;
367f0bc4ed1SChristos Margiolis case 0xeb:
368f0bc4ed1SChristos Margiolis /* unconditional jmp short */
369f0bc4ed1SChristos Margiolis kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL;
370f0bc4ed1SChristos Margiolis dispoff = opcidx + 1;
371f0bc4ed1SChristos Margiolis kinst_set_disp8(kp, bytes[dispoff]);
372f0bc4ed1SChristos Margiolis break;
373f0bc4ed1SChristos Margiolis case 0xe8:
374f0bc4ed1SChristos Margiolis case 0x9a:
375f0bc4ed1SChristos Margiolis /* direct call */
376f0bc4ed1SChristos Margiolis kpmd->flags |= KINST_F_CALL | KINST_F_DIRECT_CALL;
377f0bc4ed1SChristos Margiolis dispoff = opcidx + 1;
378f0bc4ed1SChristos Margiolis kinst_set_disp32(kp, &bytes[dispoff]);
379f0bc4ed1SChristos Margiolis break;
380f0bc4ed1SChristos Margiolis case 0xff:
381f0bc4ed1SChristos Margiolis KASSERT(d86.d86_got_modrm,
382f0bc4ed1SChristos Margiolis ("no ModR/M byte for instr at %p", *instr - kpmd->instlen));
383f0bc4ed1SChristos Margiolis switch (KINST_MODRM_REG(bytes[d86.d86_rmindex])) {
384f0bc4ed1SChristos Margiolis case 0x02:
385f0bc4ed1SChristos Margiolis case 0x03:
386f0bc4ed1SChristos Margiolis /* indirect call */
387f0bc4ed1SChristos Margiolis kpmd->flags |= KINST_F_CALL;
388f0bc4ed1SChristos Margiolis break;
389f0bc4ed1SChristos Margiolis case 0x04:
390f0bc4ed1SChristos Margiolis case 0x05:
391f0bc4ed1SChristos Margiolis /* indirect jump */
392f0bc4ed1SChristos Margiolis kpmd->flags |= KINST_F_JMP;
393f0bc4ed1SChristos Margiolis break;
394f0bc4ed1SChristos Margiolis }
395f0bc4ed1SChristos Margiolis }
396f0bc4ed1SChristos Margiolis
397f0bc4ed1SChristos Margiolis /*
398f0bc4ed1SChristos Margiolis * If there's a ModR/M byte, we need to check it to see if the operand
399f0bc4ed1SChristos Margiolis * is %rip-relative, and rewrite the displacement if so. If not, we
400f0bc4ed1SChristos Margiolis * might still have to extract operand info if this is a call
401f0bc4ed1SChristos Margiolis * instruction.
402f0bc4ed1SChristos Margiolis */
403f0bc4ed1SChristos Margiolis if (d86.d86_got_modrm) {
404f0bc4ed1SChristos Margiolis uint8_t mod, rm, sib;
405f0bc4ed1SChristos Margiolis
406f0bc4ed1SChristos Margiolis kpmd->reg1 = kpmd->reg2 = -1;
407f0bc4ed1SChristos Margiolis
408f0bc4ed1SChristos Margiolis modrm = bytes[d86.d86_rmindex];
409f0bc4ed1SChristos Margiolis mod = KINST_MODRM_MOD(modrm);
410f0bc4ed1SChristos Margiolis rm = KINST_MODRM_RM(modrm);
411f0bc4ed1SChristos Margiolis if (mod == 0 && rm == 5) {
412f0bc4ed1SChristos Margiolis kpmd->flags |= KINST_F_RIPREL;
413f0bc4ed1SChristos Margiolis dispoff = d86.d86_rmindex + 1;
414f0bc4ed1SChristos Margiolis kinst_set_disp32(kp, &bytes[dispoff]);
415f0bc4ed1SChristos Margiolis } else if ((kpmd->flags & KINST_F_CALL) != 0) {
416f0bc4ed1SChristos Margiolis bool havesib;
417f0bc4ed1SChristos Margiolis
418f0bc4ed1SChristos Margiolis havesib = (mod != 3 && rm == 4);
419f0bc4ed1SChristos Margiolis dispoff = d86.d86_rmindex + (havesib ? 2 : 1);
420f0bc4ed1SChristos Margiolis if (mod == 1)
421f0bc4ed1SChristos Margiolis kinst_set_disp8(kp, bytes[dispoff]);
422f0bc4ed1SChristos Margiolis else if (mod == 2)
423f0bc4ed1SChristos Margiolis kinst_set_disp32(kp, &bytes[dispoff]);
424f0bc4ed1SChristos Margiolis else if (mod == 3)
425f0bc4ed1SChristos Margiolis kpmd->flags |= KINST_F_MOD_DIRECT;
426f0bc4ed1SChristos Margiolis
427f0bc4ed1SChristos Margiolis if (havesib) {
428f0bc4ed1SChristos Margiolis sib = bytes[d86.d86_rmindex + 1];
429f0bc4ed1SChristos Margiolis if (KINST_SIB_BASE(sib) != 5) {
430f0bc4ed1SChristos Margiolis kpmd->reg1 = KINST_SIB_BASE(sib) |
431f0bc4ed1SChristos Margiolis (KINST_REX_B(rex) << 3);
432f0bc4ed1SChristos Margiolis }
433f0bc4ed1SChristos Margiolis kpmd->scale = KINST_SIB_SCALE(sib);
434f0bc4ed1SChristos Margiolis kpmd->reg2 = KINST_SIB_INDEX(sib) |
435f0bc4ed1SChristos Margiolis (KINST_REX_X(rex) << 3);
436f0bc4ed1SChristos Margiolis } else {
437f0bc4ed1SChristos Margiolis kpmd->reg1 = rm | (KINST_REX_B(rex) << 3);
438f0bc4ed1SChristos Margiolis }
439f0bc4ed1SChristos Margiolis }
440f0bc4ed1SChristos Margiolis }
441f0bc4ed1SChristos Margiolis
442f0bc4ed1SChristos Margiolis /*
443f0bc4ed1SChristos Margiolis * Calls are emulated in software; once operands are decoded we have
444f0bc4ed1SChristos Margiolis * nothing else to do.
445f0bc4ed1SChristos Margiolis */
446f0bc4ed1SChristos Margiolis if ((kpmd->flags & KINST_F_CALL) != 0)
447f0bc4ed1SChristos Margiolis return (0);
448f0bc4ed1SChristos Margiolis
449f0bc4ed1SChristos Margiolis /*
450f0bc4ed1SChristos Margiolis * Allocate and populate an instruction trampoline template.
451f0bc4ed1SChristos Margiolis *
452f0bc4ed1SChristos Margiolis * Position-independent instructions can simply be copied, but
453f0bc4ed1SChristos Margiolis * position-dependent instructions require some surgery: jump
454f0bc4ed1SChristos Margiolis * instructions with an 8-bit displacement need to be converted to use a
455f0bc4ed1SChristos Margiolis * 32-bit displacement, and the adjusted displacement needs to be
456f0bc4ed1SChristos Margiolis * computed.
457f0bc4ed1SChristos Margiolis */
458f0bc4ed1SChristos Margiolis ilen = kpmd->instlen;
459f0bc4ed1SChristos Margiolis if ((kpmd->flags & KINST_F_RIPREL) != 0) {
460f0bc4ed1SChristos Margiolis if ((kpmd->flags & KINST_F_JMP) == 0 ||
461f0bc4ed1SChristos Margiolis bytes[opcidx] == 0x0f ||
462f0bc4ed1SChristos Margiolis bytes[opcidx] == 0xe9 ||
463f0bc4ed1SChristos Margiolis bytes[opcidx] == 0xff) {
464f0bc4ed1SChristos Margiolis memcpy(kpmd->template, bytes, dispoff);
465f0bc4ed1SChristos Margiolis memcpy(&kpmd->template[dispoff + 4],
466f0bc4ed1SChristos Margiolis &bytes[dispoff + 4], ilen - (dispoff + 4));
467f0bc4ed1SChristos Margiolis kpmd->dispoff = dispoff;
468f0bc4ed1SChristos Margiolis } else if (bytes[opcidx] == 0xeb) {
469f0bc4ed1SChristos Margiolis memcpy(kpmd->template, bytes, opcidx);
470f0bc4ed1SChristos Margiolis kpmd->template[opcidx] = 0xe9;
471f0bc4ed1SChristos Margiolis kpmd->dispoff = opcidx + 1;
472f0bc4ed1SChristos Margiolis
473f0bc4ed1SChristos Margiolis /* Instruction length changes from 2 to 5. */
474f0bc4ed1SChristos Margiolis kpmd->tinstlen = 5;
475f0bc4ed1SChristos Margiolis kpmd->disp -= 3;
476f0bc4ed1SChristos Margiolis } else if (bytes[opcidx] >= 0x70 && bytes[opcidx] <= 0x7f) {
477f0bc4ed1SChristos Margiolis memcpy(kpmd->template, bytes, opcidx);
478f0bc4ed1SChristos Margiolis kpmd->template[opcidx] = 0x0f;
479f0bc4ed1SChristos Margiolis kpmd->template[opcidx + 1] = bytes[opcidx] + 0x10;
480f0bc4ed1SChristos Margiolis kpmd->dispoff = opcidx + 2;
481f0bc4ed1SChristos Margiolis
482f0bc4ed1SChristos Margiolis /* Instruction length changes from 2 to 6. */
483f0bc4ed1SChristos Margiolis kpmd->tinstlen = 6;
484f0bc4ed1SChristos Margiolis kpmd->disp -= 4;
485f0bc4ed1SChristos Margiolis } else {
486f0bc4ed1SChristos Margiolis panic("unhandled opcode %#x", bytes[opcidx]);
487f0bc4ed1SChristos Margiolis }
488f0bc4ed1SChristos Margiolis } else {
489f0bc4ed1SChristos Margiolis memcpy(kpmd->template, bytes, ilen);
490f0bc4ed1SChristos Margiolis }
491f0bc4ed1SChristos Margiolis
492f0bc4ed1SChristos Margiolis return (0);
493f0bc4ed1SChristos Margiolis }
494f0bc4ed1SChristos Margiolis
495f0bc4ed1SChristos Margiolis int
kinst_make_probe(linker_file_t lf,int symindx,linker_symval_t * symval,void * opaque)496f0bc4ed1SChristos Margiolis kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval,
497f0bc4ed1SChristos Margiolis void *opaque)
498f0bc4ed1SChristos Margiolis {
499f0bc4ed1SChristos Margiolis struct kinst_probe *kp;
500f0bc4ed1SChristos Margiolis dtrace_kinst_probedesc_t *pd;
501f0bc4ed1SChristos Margiolis const char *func;
50284d7fe4aSMark Johnston int error, instrsize, n, off;
503ea89133dSChristos Margiolis uint8_t *instr, *limit, *tmp;
504*bbe8195bSChristos Margiolis bool push_found;
505f0bc4ed1SChristos Margiolis
506f0bc4ed1SChristos Margiolis pd = opaque;
507f0bc4ed1SChristos Margiolis func = symval->name;
5089c80ad68SChristos Margiolis if (kinst_excluded(func))
5099c80ad68SChristos Margiolis return (0);
5109c80ad68SChristos Margiolis if (strcmp(func, pd->kpd_func) != 0)
511f0bc4ed1SChristos Margiolis return (0);
512f0bc4ed1SChristos Margiolis
513f0bc4ed1SChristos Margiolis instr = (uint8_t *)symval->value;
514f0bc4ed1SChristos Margiolis limit = (uint8_t *)symval->value + symval->size;
515f0bc4ed1SChristos Margiolis if (instr >= limit)
516f0bc4ed1SChristos Margiolis return (0);
517f0bc4ed1SChristos Margiolis
518f0bc4ed1SChristos Margiolis /*
519ea89133dSChristos Margiolis * Refuse to instrument functions lacking the usual frame pointer
520ea89133dSChristos Margiolis * manipulations since they might correspond to exception handlers.
521f0bc4ed1SChristos Margiolis */
522ea89133dSChristos Margiolis tmp = instr;
523*bbe8195bSChristos Margiolis push_found = false;
524ea89133dSChristos Margiolis while (tmp < limit) {
525*bbe8195bSChristos Margiolis /*
526*bbe8195bSChristos Margiolis * Checking for 'pop %rbp' as well makes the filtering too
527*bbe8195bSChristos Margiolis * strict as it would skip functions that never return (e.g.,
528*bbe8195bSChristos Margiolis * vnlru_proc()).
529*bbe8195bSChristos Margiolis */
530*bbe8195bSChristos Margiolis if (*tmp == KINST_PUSHL_RBP) {
531ea89133dSChristos Margiolis push_found = true;
532ea89133dSChristos Margiolis break;
533*bbe8195bSChristos Margiolis }
534ea89133dSChristos Margiolis tmp += dtrace_instr_size(tmp);
535ea89133dSChristos Margiolis }
536*bbe8195bSChristos Margiolis if (!push_found)
537f0bc4ed1SChristos Margiolis return (0);
538f0bc4ed1SChristos Margiolis
539f0bc4ed1SChristos Margiolis n = 0;
540f0bc4ed1SChristos Margiolis while (instr < limit) {
54184d7fe4aSMark Johnston instrsize = dtrace_instr_size(instr);
542f0bc4ed1SChristos Margiolis off = (int)(instr - (uint8_t *)symval->value);
543f0bc4ed1SChristos Margiolis if (pd->kpd_off != -1 && off != pd->kpd_off) {
54484d7fe4aSMark Johnston instr += instrsize;
54584d7fe4aSMark Johnston continue;
54684d7fe4aSMark Johnston }
54784d7fe4aSMark Johnston
54884d7fe4aSMark Johnston /*
54984d7fe4aSMark Johnston * Check for instructions which may enable interrupts. Such
55084d7fe4aSMark Johnston * instructions are tricky to trace since it is unclear whether
55184d7fe4aSMark Johnston * to use the per-thread or per-CPU trampolines. Since they are
55284d7fe4aSMark Johnston * rare, we don't bother to implement special handling for them.
55384d7fe4aSMark Johnston *
55484d7fe4aSMark Johnston * If the caller specified an offset, return an error, otherwise
55584d7fe4aSMark Johnston * silently ignore the instruction so that it remains possible
55684d7fe4aSMark Johnston * to enable all instructions in a function.
55784d7fe4aSMark Johnston */
55884d7fe4aSMark Johnston if (instrsize == 1 &&
55984d7fe4aSMark Johnston (instr[0] == KINST_POPF || instr[0] == KINST_STI)) {
56084d7fe4aSMark Johnston if (pd->kpd_off != -1)
56184d7fe4aSMark Johnston return (EINVAL);
56284d7fe4aSMark Johnston instr += instrsize;
563f0bc4ed1SChristos Margiolis continue;
564f0bc4ed1SChristos Margiolis }
565f0bc4ed1SChristos Margiolis
566f0bc4ed1SChristos Margiolis /*
567f0bc4ed1SChristos Margiolis * Prevent separate dtrace(1) instances from creating copies of
568f0bc4ed1SChristos Margiolis * the same probe.
569f0bc4ed1SChristos Margiolis */
570f0bc4ed1SChristos Margiolis LIST_FOREACH(kp, KINST_GETPROBE(instr), kp_hashnext) {
571f0bc4ed1SChristos Margiolis if (strcmp(kp->kp_func, func) == 0 &&
572f0bc4ed1SChristos Margiolis strtol(kp->kp_name, NULL, 10) == off)
573f0bc4ed1SChristos Margiolis return (0);
574f0bc4ed1SChristos Margiolis }
575f0bc4ed1SChristos Margiolis if (++n > KINST_PROBETAB_MAX) {
576f0bc4ed1SChristos Margiolis KINST_LOG("probe list full: %d entries", n);
577f0bc4ed1SChristos Margiolis return (ENOMEM);
578f0bc4ed1SChristos Margiolis }
579f0bc4ed1SChristos Margiolis kp = malloc(sizeof(struct kinst_probe), M_KINST,
580f0bc4ed1SChristos Margiolis M_WAITOK | M_ZERO);
581f0bc4ed1SChristos Margiolis kp->kp_func = func;
582f0bc4ed1SChristos Margiolis snprintf(kp->kp_name, sizeof(kp->kp_name), "%d", off);
583f0bc4ed1SChristos Margiolis kp->kp_savedval = *instr;
584f0bc4ed1SChristos Margiolis kp->kp_patchval = KINST_PATCHVAL;
585f0bc4ed1SChristos Margiolis kp->kp_patchpoint = instr;
586f0bc4ed1SChristos Margiolis
587f0bc4ed1SChristos Margiolis error = kinst_instr_dissect(kp, &instr);
588f0bc4ed1SChristos Margiolis if (error != 0)
589f0bc4ed1SChristos Margiolis return (error);
590f0bc4ed1SChristos Margiolis
591f0bc4ed1SChristos Margiolis kinst_probe_create(kp, lf);
592f0bc4ed1SChristos Margiolis }
593f0bc4ed1SChristos Margiolis
594f0bc4ed1SChristos Margiolis return (0);
595f0bc4ed1SChristos Margiolis }
59684d7fe4aSMark Johnston
59784d7fe4aSMark Johnston int
kinst_md_init(void)59884d7fe4aSMark Johnston kinst_md_init(void)
59984d7fe4aSMark Johnston {
60084d7fe4aSMark Johnston uint8_t *tramp;
60184d7fe4aSMark Johnston int cpu;
60284d7fe4aSMark Johnston
60384d7fe4aSMark Johnston CPU_FOREACH(cpu) {
60484d7fe4aSMark Johnston tramp = kinst_trampoline_alloc(M_WAITOK);
60584d7fe4aSMark Johnston if (tramp == NULL)
60684d7fe4aSMark Johnston return (ENOMEM);
60784d7fe4aSMark Johnston DPCPU_ID_SET(cpu, intr_tramp, tramp);
60884d7fe4aSMark Johnston }
60984d7fe4aSMark Johnston
61084d7fe4aSMark Johnston return (0);
61184d7fe4aSMark Johnston }
61284d7fe4aSMark Johnston
61384d7fe4aSMark Johnston void
kinst_md_deinit(void)61484d7fe4aSMark Johnston kinst_md_deinit(void)
61584d7fe4aSMark Johnston {
61684d7fe4aSMark Johnston uint8_t *tramp;
61784d7fe4aSMark Johnston int cpu;
61884d7fe4aSMark Johnston
61984d7fe4aSMark Johnston CPU_FOREACH(cpu) {
62084d7fe4aSMark Johnston tramp = DPCPU_ID_GET(cpu, intr_tramp);
62184d7fe4aSMark Johnston if (tramp != NULL) {
622b999f245SChristos Margiolis kinst_trampoline_dealloc(tramp);
62384d7fe4aSMark Johnston DPCPU_ID_SET(cpu, intr_tramp, NULL);
62484d7fe4aSMark Johnston }
62584d7fe4aSMark Johnston }
62684d7fe4aSMark Johnston }
6279c80ad68SChristos Margiolis
6289c80ad68SChristos Margiolis /*
6299c80ad68SChristos Margiolis * Exclude machine-dependent functions that are not safe-to-trace.
6309c80ad68SChristos Margiolis */
631d434607bSChristos Margiolis bool
kinst_md_excluded(const char * name)6329c80ad68SChristos Margiolis kinst_md_excluded(const char *name)
6339c80ad68SChristos Margiolis {
634d434607bSChristos Margiolis return (false);
6359c80ad68SChristos Margiolis }
636