xref: /freebsd/sys/cddl/dev/kinst/aarch64/kinst_isa.c (revision d37eb51047221dc3322b34db1038ff3aa533883f)
1 /*
2  * SPDX-License-Identifier: CDDL 1.0
3  *
4  * Copyright (c) 2022 Christos Margiolis <christos@FreeBSD.org>
5  * Copyright (c) 2022 Mark Johnston <markj@FreeBSD.org>
6  * Copyright (c) 2023 The FreeBSD Foundation
7  *
8  * Portions of this software were developed by Christos Margiolis
9  * <christos@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
10  */
11 
12 #include <sys/param.h>
13 
14 #include <sys/dtrace.h>
15 #include <cddl/dev/dtrace/dtrace_cddl.h>
16 
17 #include "kinst.h"
18 
19 DPCPU_DEFINE_STATIC(struct kinst_cpu_state, kinst_state);
20 
21 static int
22 kinst_emulate(struct trapframe *frame, const struct kinst_probe *kp)
23 {
24 	kinst_patchval_t instr = kp->kp_savedval;
25 	uint64_t imm;
26 	uint8_t cond, reg, bitpos;
27 	bool res;
28 
29 	if (((instr >> 24) & 0x1f) == 0b10000) {
30 		/* adr/adrp */
31 		reg = instr & 0x1f;
32 		imm = (instr >> 29) & 0x3;
33 		imm |= ((instr >> 5) & 0x0007ffff) << 2;
34 		if (((instr >> 31) & 0x1) == 0) {
35 			/* adr */
36 			if (imm & 0x0000000000100000)
37 				imm |= 0xfffffffffff00000;
38 			frame->tf_x[reg] = frame->tf_elr + imm;
39 		} else {
40 			/* adrp */
41 			imm <<= 12;
42 			if (imm & 0x0000000100000000)
43 				imm |= 0xffffffff00000000;
44 			frame->tf_x[reg] = (frame->tf_elr & ~0xfff) + imm;
45 		}
46 		frame->tf_elr += INSN_SIZE;
47 	} else if (((instr >> 26) & 0x3f) == 0b000101) {
48 		/* b */
49 		imm = instr & 0x03ffffff;
50 		if (imm & 0x0000000002000000)
51 			imm |= 0xfffffffffe000000;
52 		frame->tf_elr += imm << 2;
53 	} else if (((instr >> 24) & 0xff) == 0b01010100) {
54 		/* b.cond */
55 		imm = (instr >> 5) & 0x0007ffff;
56 		if (imm & 0x0000000000040000)
57 			imm |= 0xfffffffffffc0000;
58 		cond = instr & 0xf;
59 		switch ((cond >> 1) & 0x7) {
60 		case 0b000:	/* eq/ne */
61 			res = (frame->tf_spsr & PSR_Z) != 0;
62 			break;
63 		case 0b001:	/* cs/cc */
64 			res = (frame->tf_spsr & PSR_C) != 0;
65 			break;
66 		case 0b010:	/* mi/pl */
67 			res = (frame->tf_spsr & PSR_N) != 0;
68 			break;
69 		case 0b011:	/* vs/vc */
70 			res = (frame->tf_spsr & PSR_V) != 0;
71 			break;
72 		case 0b100:	/* hi/ls */
73 			res = ((frame->tf_spsr & PSR_C) != 0) &&
74 			    ((frame->tf_spsr & PSR_Z) == 0);
75 			break;
76 		case 0b101:	/* ge/lt */
77 			res = ((frame->tf_spsr & PSR_N) != 0) ==
78 			    ((frame->tf_spsr & PSR_V) != 0);
79 			break;
80 		case 0b110:	/* gt/le */
81 			res = ((frame->tf_spsr & PSR_Z) == 0) &&
82 			    (((frame->tf_spsr & PSR_N) != 0) ==
83 			    ((frame->tf_spsr & PSR_V) != 0));
84 			break;
85 		case 0b111:	/* al */
86 			res = 1;
87 			break;
88 		}
89 		if ((cond & 0x1) && cond != 0b1111)
90 			res = !res;
91 		if (res)
92 			frame->tf_elr += imm << 2;
93 		else
94 			frame->tf_elr += INSN_SIZE;
95 	} else if (((instr >> 26) & 0x3f) == 0b100101) {
96 		/* bl */
97 		imm = instr & 0x03ffffff;
98 		if (imm & 0x0000000002000000)
99 			imm |= 0xfffffffffe000000;
100 		frame->tf_lr = frame->tf_elr + INSN_SIZE;
101 		frame->tf_elr += imm << 2;
102 	} else if (((instr >> 25) & 0x3f) == 0b011010) {
103 		/* cbnz/cbz */
104 		cond = (instr >> 24) & 0x1;
105 		reg = instr & 0x1f;
106 		imm = (instr >> 5) & 0x0007ffff;
107 		if (imm & 0x0000000000040000)
108 			imm |= 0xfffffffffffc0000;
109 		if (cond == 1 && frame->tf_x[reg] != 0)
110 			/* cbnz */
111 			frame->tf_elr += imm << 2;
112 		else if (cond == 0 && frame->tf_x[reg] == 0)
113 			/* cbz */
114 			frame->tf_elr += imm << 2;
115 		else
116 			frame->tf_elr += INSN_SIZE;
117 	} else if (((instr >> 25) & 0x3f) == 0b011011) {
118 		/* tbnz/tbz */
119 		cond = (instr >> 24) & 0x1;
120 		reg = instr & 0x1f;
121 		bitpos = (instr >> 19) & 0x1f;
122 		bitpos |= ((instr >> 31) & 0x1) << 5;
123 		imm = (instr >> 5) & 0x3fff;
124 		if (imm & 0x0000000000002000)
125 			imm |= 0xffffffffffffe000;
126 		if (cond == 1 && (frame->tf_x[reg] & (1 << bitpos)) != 0)
127 			/* tbnz */
128 			frame->tf_elr += imm << 2;
129 		else if (cond == 0 && (frame->tf_x[reg] & (1 << bitpos)) == 0)
130 			/* tbz */
131 			frame->tf_elr += imm << 2;
132 		else
133 			frame->tf_elr += INSN_SIZE;
134 	}
135 
136 	return (0);
137 }
138 
139 static int
140 kinst_jump_next_instr(struct trapframe *frame, const struct kinst_probe *kp)
141 {
142 	frame->tf_elr = (register_t)((const uint8_t *)kp->kp_patchpoint +
143 	    INSN_SIZE);
144 
145 	return (0);
146 }
147 
148 static void
149 kinst_trampoline_populate(struct kinst_probe *kp)
150 {
151 	static uint32_t bpt = KINST_PATCHVAL;
152 
153 	kinst_memcpy(kp->kp_tramp, &kp->kp_savedval, INSN_SIZE);
154 	kinst_memcpy(&kp->kp_tramp[INSN_SIZE], &bpt, INSN_SIZE);
155 
156 	cpu_icache_sync_range(kp->kp_tramp, KINST_TRAMP_SIZE);
157 }
158 
159 /*
160  * There are two ways by which an instruction is traced:
161  *
162  * - By using the trampoline.
163  * - By emulating it in software (see kinst_emulate()).
164  *
165  * The trampoline is used for instructions that can be copied and executed
166  * as-is without additional modification. However, instructions that use
167  * PC-relative addressing have to be emulated, because ARM64 doesn't allow
168  * encoding of large displacements in a single instruction, and since we cannot
169  * clobber a register in order to encode the two-instruction sequence needed to
170  * create large displacements, we cannot use the trampoline at all.
171  * Fortunately, the instructions are simple enough to be emulated in just a few
172  * lines of code.
173  *
174  * The problem discussed above also means that, unlike amd64, we cannot encode
175  * a far-jump back from the trampoline to the next instruction. The mechanism
176  * employed to achieve this functionality, is to use a breakpoint instead of a
177  * jump after the copied instruction. This breakpoint is detected and handled
178  * by kinst_invop(), which performs the jump back to the next instruction
179  * manually (see kinst_jump_next_instr()).
180  */
181 int
182 kinst_invop(uintptr_t addr, struct trapframe *frame, uintptr_t scratch)
183 {
184 	solaris_cpu_t *cpu;
185 	struct kinst_cpu_state *ks;
186 	const struct kinst_probe *kp;
187 
188 	ks = DPCPU_PTR(kinst_state);
189 
190 	/*
191 	 * Detect if the breakpoint was triggered by the trampoline, and
192 	 * manually set the PC to the next instruction.
193 	 */
194 	if (ks->state == KINST_PROBE_FIRED &&
195 	    addr == (uintptr_t)(ks->kp->kp_tramp + INSN_SIZE)) {
196 		/*
197 		 * Restore interrupts if they were enabled prior to the first
198 		 * breakpoint.
199 		 */
200 		if ((ks->status & PSR_I) == 0)
201 			frame->tf_spsr &= ~PSR_I;
202 		ks->state = KINST_PROBE_ARMED;
203 		return (kinst_jump_next_instr(frame, ks->kp));
204 	}
205 
206 	LIST_FOREACH(kp, KINST_GETPROBE(addr), kp_hashnext) {
207 		if ((uintptr_t)kp->kp_patchpoint == addr)
208 			break;
209 	}
210 	if (kp == NULL)
211 		return (0);
212 
213 	cpu = &solaris_cpu[curcpu];
214 	cpu->cpu_dtrace_caller = addr;
215 	dtrace_probe(kp->kp_id, 0, 0, 0, 0, 0);
216 	cpu->cpu_dtrace_caller = 0;
217 
218 	if (kp->kp_md.emulate)
219 		return (kinst_emulate(frame, kp));
220 
221 	ks->state = KINST_PROBE_FIRED;
222 	ks->kp = kp;
223 
224 	/*
225 	 * Cache the current SPSR and clear interrupts for the duration
226 	 * of the double breakpoint.
227 	 */
228 	ks->status = frame->tf_spsr;
229 	frame->tf_spsr |= PSR_I;
230 	frame->tf_elr = (register_t)kp->kp_tramp;
231 
232 	return (0);
233 }
234 
235 void
236 kinst_patch_tracepoint(struct kinst_probe *kp, kinst_patchval_t val)
237 {
238 	void *addr;
239 
240 	if (!arm64_get_writable_addr(kp->kp_patchpoint, &addr))
241 		panic("%s: Unable to write new instruction", __func__);
242 	*(kinst_patchval_t *)addr = val;
243 	cpu_icache_sync_range(kp->kp_patchpoint, INSN_SIZE);
244 }
245 
246 static void
247 kinst_instr_dissect(struct kinst_probe *kp)
248 {
249 	struct kinst_probe_md *kpmd;
250 	kinst_patchval_t instr = kp->kp_savedval;
251 
252 	kpmd = &kp->kp_md;
253 	kpmd->emulate = false;
254 
255 	if (((instr >> 24) & 0x1f) == 0b10000)
256 		kpmd->emulate = true;	/* adr/adrp */
257 	else if (((instr >> 26) & 0x3f) == 0b000101)
258 		kpmd->emulate = true;	/* b */
259 	else if (((instr >> 24) & 0xff) == 0b01010100)
260 		kpmd->emulate = true;	/* b.cond */
261 	else if (((instr >> 26) & 0x3f) == 0b100101)
262 		kpmd->emulate = true;	/* bl */
263 	else if (((instr >> 25) & 0x3f) == 0b011010)
264 		kpmd->emulate = true;	/* cbnz/cbz */
265 	else if (((instr >> 25) & 0x3f) == 0b011011)
266 		kpmd->emulate = true;	/* tbnz/tbz */
267 
268 	if (!kpmd->emulate)
269 		kinst_trampoline_populate(kp);
270 }
271 
272 static bool
273 kinst_instr_ldx(kinst_patchval_t instr)
274 {
275 	if (((instr >> 22) & 0xff) == 0b00100001)
276 		return (true);
277 
278 	return (false);
279 }
280 
281 static bool
282 kinst_instr_stx(kinst_patchval_t instr)
283 {
284 	if (((instr >> 22) & 0xff) == 0b00100000)
285 		return (true);
286 
287 	return (false);
288 }
289 
290 int
291 kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval,
292     void *opaque)
293 {
294 	struct kinst_probe *kp;
295 	dtrace_kinst_probedesc_t *pd;
296 	const char *func;
297 	kinst_patchval_t *instr, *limit, *tmp;
298 	int n, off;
299 	bool ldxstx_block, found;
300 
301 	pd = opaque;
302 	func = symval->name;
303 
304 	if (kinst_excluded(func))
305 		return (0);
306 	if (strcmp(func, pd->kpd_func) != 0)
307 		return (0);
308 
309 	instr = (kinst_patchval_t *)(symval->value);
310 	limit = (kinst_patchval_t *)(symval->value + symval->size);
311 	if (instr >= limit)
312 		return (0);
313 
314 	tmp = instr;
315 
316 	/*
317 	 * Ignore any bti instruction at the start of the function
318 	 * we need to keep it there for any indirect branches calling
319 	 * the function on Armv8.5+
320 	 */
321 	if ((*tmp & BTI_MASK) == BTI_INSTR)
322 		tmp++;
323 
324 	/* Look for stp (pre-indexed) operation */
325 	found = false;
326 
327 	/*
328 	 * If the first instruction is a nop it's a specially marked
329 	 * asm function. We only support a nop first as it's not a normal
330 	 * part of the function prologue.
331 	 */
332 	if (*tmp == NOP_INSTR)
333 		found = true;
334 	for (; !found && tmp < limit; tmp++) {
335 		/*
336 		 * Functions start with "stp xt1, xt2, [xn, <const>]!" or
337 		 * "sub sp, sp, <const>".
338 		 *
339 		 * Sometimes the compiler will have a sub instruction that is
340 		 * not of the above type so don't stop if we see one.
341 		 */
342 		if ((*tmp & LDP_STP_MASK) == STP_64) {
343 			/*
344 			 * Assume any other store of this type means we are
345 			 * past the function prolog.
346 			 */
347 			if (((*tmp >> ADDR_SHIFT) & ADDR_MASK) == 31)
348 				found = true;
349 		} else if ((*tmp & SUB_MASK) == SUB_INSTR &&
350 		    ((*tmp >> SUB_RD_SHIFT) & SUB_R_MASK) == 31 &&
351 		    ((*tmp >> SUB_RN_SHIFT) & SUB_R_MASK) == 31)
352 			found = true;
353 	}
354 
355 	if (!found)
356 		return (0);
357 
358 	ldxstx_block = false;
359 	for (n = 0; instr < limit; instr++) {
360 		off = (int)((uint8_t *)instr - (uint8_t *)symval->value);
361 
362 		/*
363 		 * Skip LDX/STX blocks that contain atomic operations. If a
364 		 * breakpoint is placed in a LDX/STX block, we violate the
365 		 * operation and the loop might fail.
366 		 */
367 		if (kinst_instr_ldx(*instr))
368 			ldxstx_block = true;
369 		else if (kinst_instr_stx(*instr)) {
370 			ldxstx_block = false;
371 			continue;
372 		}
373 		if (ldxstx_block)
374 			continue;
375 
376 		/*
377 		 * XXX: Skip ADR and ADRP instructions. The arm64 exception
378 		 * handler has a micro-optimization where it doesn't restore
379 		 * callee-saved registers when returning from exceptions in
380 		 * EL1. This results in a panic when the kinst emulation code
381 		 * modifies one of those registers.
382 		 */
383 		if (((*instr >> 24) & 0x1f) == 0b10000)
384 			continue;
385 
386 		if (pd->kpd_off != -1 && off != pd->kpd_off)
387 			continue;
388 
389 		/*
390 		 * Prevent separate dtrace(1) instances from creating copies of
391 		 * the same probe.
392 		 */
393 		LIST_FOREACH(kp, KINST_GETPROBE(instr), kp_hashnext) {
394 			if (strcmp(kp->kp_func, func) == 0 &&
395 			    strtol(kp->kp_name, NULL, 10) == off)
396 				return (0);
397 		}
398 		if (++n > KINST_PROBETAB_MAX) {
399 			KINST_LOG("probe list full: %d entries", n);
400 			return (ENOMEM);
401 		}
402 		kp = malloc(sizeof(struct kinst_probe), M_KINST,
403 		    M_WAITOK | M_ZERO);
404 		kp->kp_func = func;
405 		snprintf(kp->kp_name, sizeof(kp->kp_name), "%d", off);
406 		kp->kp_patchpoint = instr;
407 		kp->kp_savedval = *instr;
408 		kp->kp_patchval = KINST_PATCHVAL;
409 		if ((kp->kp_tramp = kinst_trampoline_alloc(M_WAITOK)) == NULL) {
410 			KINST_LOG("cannot allocate trampoline for %p", instr);
411 			return (ENOMEM);
412 		}
413 
414 		kinst_instr_dissect(kp);
415 		kinst_probe_create(kp, lf);
416 	}
417 	if (ldxstx_block)
418 		KINST_LOG("warning: unterminated LDX/STX block");
419 
420 	return (0);
421 }
422 
423 int
424 kinst_md_init(void)
425 {
426 	struct kinst_cpu_state *ks;
427 	int cpu;
428 
429 	CPU_FOREACH(cpu) {
430 		ks = DPCPU_PTR(kinst_state);
431 		ks->state = KINST_PROBE_ARMED;
432 	}
433 
434 	return (0);
435 }
436 
437 void
438 kinst_md_deinit(void)
439 {
440 }
441 
442 /*
443  * Exclude machine-dependent functions that are not safe-to-trace.
444  */
445 bool
446 kinst_md_excluded(const char *name)
447 {
448 	if (strcmp(name, "handle_el1h_sync") == 0 ||
449 	    strcmp(name, "do_el1h_sync") == 0)
450                 return (true);
451 
452 	return (false);
453 }
454