xref: /freebsd/sys/arm64/arm64/disassem.c (revision 0bc2abddc8d4abb89a210f2bb113e9e7c2d4ce18)
1 /*-
2  * Copyright (c) 2016 Cavium
3  * All rights reserved.
4  *
5  * This software was developed by Semihalf.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 #include <sys/param.h>
32 
33 #include <sys/systm.h>
34 #include <machine/disassem.h>
35 #include <machine/armreg.h>
36 #include <ddb/ddb.h>
37 
38 #define	ARM64_MAX_TOKEN_LEN	8
39 #define	ARM64_MAX_TOKEN_CNT	10
40 
41 #define	ARM_INSN_SIZE_OFFSET	30
42 #define	ARM_INSN_SIZE_MASK	0x3
43 
44 /* Special options for instruction printing */
45 #define	OP_SIGN_EXT	(1UL << 0)	/* Sign-extend immediate value */
46 #define	OP_LITERAL	(1UL << 1)	/* Use literal (memory offset) */
47 #define	OP_MULT_4	(1UL << 2)	/* Multiply immediate by 4 */
48 #define	OP_SF32		(1UL << 3)	/* Force 32-bit access */
49 #define	OP_SF_INV	(1UL << 6)	/* SF is inverted (1 means 32 bit access) */
50 
51 static const char *w_reg[] = {
52 	"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
53 	"w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
54 	"w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
55 	"w24", "w25", "w26", "w27", "w28", "w29", "w30", "wSP",
56 };
57 
58 static const char *x_reg[] = {
59 	"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
60 	"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
61 	"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
62 	"x24", "x25", "x26", "x27", "x28", "x29", "LR", "SP",
63 };
64 
65 static const char *shift_2[] = {
66 	"LSL", "LSR", "ASR", "RSV"
67 };
68 
69 /*
70  * Structure representing single token (operand) inside instruction.
71  * name   - name of operand
72  * pos    - position within the instruction (in bits)
73  * len    - operand length (in bits)
74  */
75 struct arm64_insn_token {
76 	char name[ARM64_MAX_TOKEN_LEN];
77 	int pos;
78 	int len;
79 };
80 
81 /*
82  * Define generic types for instruction printing.
83  */
84 enum arm64_format_type {
85 	TYPE_01,	/* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
86 			   OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
87 	TYPE_02,	/* OP <RT>, [<RN>, #<imm>]{!}] SF32/64
88 			   OP <RT>, [<RN>], #<imm>{!} SF32/64
89 			   OP <RT>, <RN>, <RM> {, EXTEND AMOUNT } */
90 	TYPE_03,	/* OP <RT>, #imm SF32/64 */
91 };
92 
93 /*
94  * Structure representing single parsed instruction format.
95  * name   - opcode name
96  * format - opcode format in a human-readable way
97  * type   - syntax type for printing
98  * special_ops  - special options passed to a printer (if any)
99  * mask   - bitmask for instruction matching
100  * pattern      - pattern to look for
101  * tokens - array of tokens (operands) inside instruction
102  */
103 struct arm64_insn {
104 	char* name;
105 	char* format;
106 	enum arm64_format_type type;
107 	uint64_t special_ops;
108 	uint32_t mask;
109 	uint32_t pattern;
110 	struct arm64_insn_token tokens[ARM64_MAX_TOKEN_CNT];
111 };
112 
113 /*
114  * Specify instruction opcode format in a human-readable way. Use notation
115  * obtained from ARM Architecture Reference Manual for ARMv8-A.
116  *
117  * Format string description:
118  *  Each group must be separated by "|". Group made of 0/1 is used to
119  *  generate mask and pattern for instruction matching. Groups containing
120  *  an operand token (in format NAME(length_bits)) are used to retrieve any
121  *  operand data from the instruction. Names here must be meaningful
122  *  and match the one described in the Manual.
123  *
124  * Token description:
125  * SF     - "0" represents 32-bit access, "1" represents 64-bit access
126  * SHIFT  - type of shift (instruction dependent)
127  * IMM    - immediate value
128  * Rx     - register number
129  * OPTION - command specific options
130  * SCALE  - scaling of immediate value
131  */
132 static struct arm64_insn arm64_i[] = {
133 	{ "add", "SF(1)|0001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
134 	    TYPE_01, 0 },
135 	{ "mov", "SF(1)|001000100000000000000|RN(5)|RD(5)",
136 	    TYPE_01, 0 },
137 	{ "add", "SF(1)|0010001|SHIFT(2)|IMM(12)|RN(5)|RD(5)",
138 	    TYPE_01, 0 },
139 	{ "ldr", "1|SF(1)|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)",
140 	    TYPE_02, OP_SIGN_EXT },		/* ldr immediate post/pre index */
141 	{ "ldr", "1|SF(1)|11100101|IMM(12)|RN(5)|RT(5)",
142 	    TYPE_02, 0 },			/* ldr immediate unsigned */
143 	{ "ldr", "1|SF(1)|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
144 	    TYPE_02, 0 },			/* ldr register */
145 	{ "ldr", "0|SF(1)|011000|IMM(19)|RT(5)",
146 	    TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 },	/* ldr literal */
147 	{ "ldrb", "00|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)",
148 	    TYPE_02, OP_SIGN_EXT | OP_SF32 },	/* ldrb immediate post/pre index */
149 	{ "ldrb", "00|11100101|IMM(12)|RN(5)|RT(5)",
150 	    TYPE_02, OP_SF32 },			/* ldrb immediate unsigned */
151 	{ "ldrb", "00|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
152 	    TYPE_02, OP_SF32  },		/* ldrb register */
153 	{ "ldrh", "01|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", TYPE_02,
154 	    OP_SIGN_EXT | OP_SF32 },		/* ldrh immediate post/pre index */
155 	{ "ldrh", "01|11100101|IMM(12)|RN(5)|RT(5)",
156 	    TYPE_02, OP_SF32 },			/* ldrh immediate unsigned */
157 	{ "ldrh", "01|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
158 	    TYPE_02, OP_SF32 },			/* ldrh register */
159 	{ "ldrsb", "001110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)",
160 	    TYPE_02, OP_SIGN_EXT | OP_SF_INV },	/* ldrsb immediate post/pre index */
161 	{ "ldrsb", "001110011|SF(1)|IMM(12)|RN(5)|RT(5)",\
162 	    TYPE_02, OP_SF_INV},		/* ldrsb immediate unsigned */
163 	{ "ldrsb", "001110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
164 	    TYPE_02,  OP_SF_INV },		/* ldrsb register */
165 	{ "ldrsh", "011110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)",
166 	    TYPE_02, OP_SIGN_EXT | OP_SF_INV },	/* ldrsh immediate post/pre index */
167 	{ "ldrsh", "011110011|SF(1)|IMM(12)|RN(5)|RT(5)",
168 	    TYPE_02, OP_SF_INV},		/* ldrsh immediate unsigned */
169 	{ "ldrsh", "011110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
170 	    TYPE_02, OP_SF_INV },		/* ldrsh register */
171 	{ "ldrsw", "10111000100|IMM(9)|OPTION(2)|RN(5)|RT(5)",
172 	    TYPE_02, OP_SIGN_EXT },		/* ldrsw immediate post/pre index */
173 	{ "ldrsw", "1011100110|IMM(12)|RN(5)|RT(5)",
174 	    TYPE_02, 0 },			/* ldrsw immediate unsigned */
175 	{ "ldrsw", "10111000101|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
176 	    TYPE_02, 0 },			/* ldrsw register */
177 	{ "ldrsw", "10011000|IMM(19)|RT(5)",
178 	    TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 },	/* ldr literal */
179 	{ NULL, NULL }
180 };
181 
182 static void
183 arm64_disasm_generate_masks(struct arm64_insn *tab)
184 {
185 	uint32_t mask, val;
186 	int a, i;
187 	int len, ret;
188 	int token = 0;
189 	char *format;
190 	int error;
191 
192 	while (tab->name != NULL) {
193 		mask = 0;
194 		val = 0;
195 		format = tab->format;
196 		token = 0;
197 		error = 0;
198 
199 		/*
200 		 * For each entry analyze format strings from the
201 		 * left (i.e. from the MSB).
202 		 */
203 		a = (INSN_SIZE * NBBY) - 1;
204 		while (*format != '\0' && (a >= 0)) {
205 			switch(*format) {
206 			case '0':
207 				/* Bit is 0, add to mask and pattern */
208 				mask |= (1 << a);
209 				a--;
210 				format++;
211 				break;
212 			case '1':
213 				/* Bit is 1, add to mask and pattern */
214 				mask |= (1 << a);
215 				val |= (1 << a);
216 				a--;
217 				format++;
218 				break;
219 			case '|':
220 				/* skip */
221 				format++;
222 				break;
223 			default:
224 				/* Token found, copy the name */
225 				memset(tab->tokens[token].name, 0,
226 				    sizeof(tab->tokens[token].name));
227 				i = 0;
228 				while (*format != '(') {
229 					tab->tokens[token].name[i] = *format;
230 					i++;
231 					format++;
232 					if (i >= ARM64_MAX_TOKEN_LEN) {
233 						printf("ERROR: token too long in op %s\n",
234 						    tab->name);
235 						error = 1;
236 						break;
237 					}
238 				}
239 				if (error != 0)
240 					break;
241 
242 				/* Read the length value */
243 				ret = sscanf(format, "(%d)", &len);
244 				if (ret == 1) {
245 					if (token >= ARM64_MAX_TOKEN_CNT) {
246 						printf("ERROR: to many tokens in op %s\n",
247 						    tab->name);
248 						error = 1;
249 						break;
250 					}
251 
252 					a -= len;
253 					tab->tokens[token].pos = a + 1;
254 					tab->tokens[token].len = len;
255 					token++;
256 				}
257 
258 				/* Skip to the end of the token */
259 				while (*format != 0 && *format != '|')
260 					format++;
261 			}
262 		}
263 
264 		/* Write mask and pattern to the instruction array */
265 		tab->mask = mask;
266 		tab->pattern = val;
267 
268 		/*
269 		 * If we got here, format string must be parsed and "a"
270 		 * should point to -1. If it's not, wrong number of bits
271 		 * in format string. Mark this as invalid and prevent
272 		 * from being matched.
273 		 */
274 		if (*format != 0 || (a != -1) || (error != 0)) {
275 			tab->mask = 0;
276 			tab->pattern = 0xffffffff;
277 			printf("ERROR: skipping instruction op %s\n",
278 			    tab->name);
279 		}
280 
281 		tab++;
282 	}
283 }
284 
285 static int
286 arm64_disasm_read_token(struct arm64_insn *insn, u_int opcode,
287     const char *token, int *val)
288 {
289 	int i;
290 
291 	for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
292 		if (strcmp(insn->tokens[i].name, token) == 0) {
293 			*val = (opcode >> insn->tokens[i].pos &
294 			    ((1 << insn->tokens[i].len) - 1));
295 			return (0);
296 		}
297 	}
298 
299 	return (EINVAL);
300 }
301 
302 static int
303 arm64_disasm_read_token_sign_ext(struct arm64_insn *insn, u_int opcode,
304     const char *token, int *val)
305 {
306 	int i;
307 	int msk;
308 
309 	for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
310 		if (strcmp(insn->tokens[i].name, token) == 0) {
311 			msk = (1 << insn->tokens[i].len) - 1;
312 			*val = ((opcode >> insn->tokens[i].pos) & msk);
313 
314 			/* If last bit is 1, sign-extend the value */
315 			if (*val & (1 << (insn->tokens[i].len - 1)))
316 				*val |= ~msk;
317 
318 			return (0);
319 		}
320 	}
321 
322 	return (EINVAL);
323 }
324 
325 static const char *
326 arm64_reg(int b64, int num)
327 {
328 
329 	if (b64 != 0)
330 		return (x_reg[num]);
331 
332 	return (w_reg[num]);
333 }
334 
335 vm_offset_t
336 disasm(const struct disasm_interface *di, vm_offset_t loc, int altfmt)
337 {
338 	struct arm64_insn *i_ptr = arm64_i;
339 	uint32_t insn;
340 	int matchp;
341 	int ret;
342 	int shift, rm, rt, rd, rn, imm, sf, idx, option, scale, amount;
343 	int sign_ext;
344 	int rm_absent;
345 	/* Indicate if immediate should be outside or inside brackets */
346 	int inside;
347 	/* Print exclamation mark if pre-incremented */
348 	int pre;
349 
350 	/* Initialize defaults, all are 0 except SF indicating 64bit access */
351 	shift = rd = rm = rn = imm = idx = option = amount = scale = 0;
352 	sign_ext = 0;
353 	sf = 1;
354 
355 	matchp = 0;
356 	insn = di->di_readword(loc);
357 	while (i_ptr->name) {
358 		/* If mask is 0 then the parser was not initialized yet */
359 		if ((i_ptr->mask != 0) &&
360 		    ((insn & i_ptr->mask) ==  i_ptr->pattern)) {
361 			matchp = 1;
362 			break;
363 		}
364 		i_ptr++;
365 	}
366 	if (matchp == 0)
367 		goto undefined;
368 
369 	/* Global options */
370 	if (i_ptr->special_ops & OP_SF32)
371 		sf = 0;
372 
373 	/* Global optional tokens */
374 	arm64_disasm_read_token(i_ptr, insn, "SF", &sf);
375 	if (i_ptr->special_ops & OP_SF_INV)
376 		sf = 1 - sf;
377 	if (arm64_disasm_read_token(i_ptr, insn, "SIGN", &sign_ext) == 0)
378 		sign_ext = 1 - sign_ext;
379 	if (i_ptr->special_ops & OP_SIGN_EXT)
380 		sign_ext = 1;
381 	if (sign_ext != 0)
382 		arm64_disasm_read_token_sign_ext(i_ptr, insn, "IMM", &imm);
383 	else
384 		arm64_disasm_read_token(i_ptr, insn, "IMM", &imm);
385 	if (i_ptr->special_ops & OP_MULT_4)
386 		imm <<= 2;
387 
388 	/* Print opcode by type */
389 	switch (i_ptr->type) {
390 	case TYPE_01:
391 		/* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
392 		   OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
393 
394 		/* Mandatory tokens */
395 		ret = arm64_disasm_read_token(i_ptr, insn, "RD", &rd);
396 		ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
397 		if (ret != 0) {
398 			printf("ERROR: Missing mandatory token for op %s type %d\n",
399 			    i_ptr->name, i_ptr->type);
400 			goto undefined;
401 		}
402 
403 		/* Optional tokens */
404 		arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift);
405 		rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
406 
407 		di->di_printf("%s\t%s, %s", i_ptr->name, arm64_reg(sf, rd),
408 		    arm64_reg(sf, rn));
409 
410 		/* If RM is present use it, otherwise use immediate notation */
411 		if (rm_absent == 0) {
412 			di->di_printf(", %s", arm64_reg(sf, rm));
413 			if (imm != 0)
414 				di->di_printf(", %s #%d", shift_2[shift], imm);
415 		} else {
416 			if (imm != 0 || shift != 0)
417 				di->di_printf(", #0x%x", imm);
418 			if (shift != 0)
419 				di->di_printf(" LSL #12");
420 		}
421 		break;
422 	case TYPE_02:
423 		/* OP <RT>, [<RN>, #<imm>]{!}] SF32/64
424 		   OP <RT>, [<RN>], #<imm>{!} SF32/64
425 		   OP <RT>, <RN>, <RM> {, EXTEND AMOUNT } */
426 
427 		/* Mandatory tokens */
428 		ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt);
429 		ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
430 		if (ret != 0) {
431 			printf("ERROR: Missing mandatory token for op %s type %d\n",
432 			    i_ptr->name, i_ptr->type);
433 			goto undefined;
434 		}
435 
436 		/* Optional tokens */
437 		arm64_disasm_read_token(i_ptr, insn, "OPTION", &option);
438 		arm64_disasm_read_token(i_ptr, insn, "SCALE", &scale);
439 		rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
440 
441 		if (rm_absent) {
442 			/*
443 			 * In unsigned operation, shift immediate value
444 			 * and reset options to default.
445 			 */
446 			if (sign_ext == 0) {
447 				imm = imm << ((insn >> ARM_INSN_SIZE_OFFSET) &
448 				    ARM_INSN_SIZE_MASK);
449 				option = 0;
450 			}
451 			switch (option) {
452 			case 0x0:
453 				pre = 0;
454 				inside = 1;
455 				break;
456 			case 0x1:
457 				pre = 0;
458 				inside = 0;
459 				break;
460 			case 0x2:
461 			default:
462 				pre = 1;
463 				inside = 1;
464 				break;
465 			}
466 
467 			di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt));
468 			if (inside != 0) {
469 				di->di_printf("[%s", arm64_reg(1, rn));
470 				if (imm != 0)
471 					di->di_printf(", #%d", imm);
472 				di->di_printf("]");
473 			} else {
474 				di->di_printf("[%s]", arm64_reg(1, rn));
475 				if (imm != 0)
476 					di->di_printf(", #%d", imm);
477 			}
478 			if (pre != 0)
479 				di->di_printf("!");
480 		} else {
481 			/* Last bit of option field determines 32/64 bit offset */
482 			di->di_printf("%s\t%s, [%s, %s", i_ptr->name,
483 			    arm64_reg(sf, rt), arm64_reg(1, rn),
484 			    arm64_reg(option & 1, rm));
485 
486 			/* Calculate amount, it's op(31:30) */
487 			amount = (insn >> ARM_INSN_SIZE_OFFSET) &
488 			    ARM_INSN_SIZE_MASK;
489 
490 			switch (option) {
491 			case 0x2:
492 				di->di_printf(", uxtw #%d", amount);
493 				break;
494 			case 0x3:
495 				if (scale != 0)
496 					di->di_printf(", lsl #%d", amount);
497 				break;
498 			case 0x6:
499 				di->di_printf(", sxtw #%d", amount);
500 				break;
501 			case 0x7:
502 				di->di_printf(", sxts #%d", amount);
503 				break;
504 			default:
505 				di->di_printf(", RSVD");
506 				break;
507 			}
508 			di->di_printf("]");
509 		}
510 
511 		break;
512 
513 	case TYPE_03:
514 		/* OP <RT>, #imm SF32/64 */
515 
516 		/* Mandatory tokens */
517 		ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt);
518 		if (ret != 0) {
519 			printf("ERROR: Missing mandatory token for op %s type %d\n",
520 			    i_ptr->name, i_ptr->type);
521 			goto undefined;
522 		}
523 
524 		di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt));
525 		if (i_ptr->special_ops & OP_LITERAL)
526 			di->di_printf("0x%lx", loc + imm);
527 		else
528 			di->di_printf("#%d", imm);
529 
530 		break;
531 	default:
532 		goto undefined;
533 	}
534 
535 	di->di_printf("\n");
536 	return(loc + INSN_SIZE);
537 
538 undefined:
539 	di->di_printf("undefined\t%08x\n", insn);
540 	return(loc + INSN_SIZE);
541 }
542 
543 /* Parse format strings at the very beginning */
544 SYSINIT(arm64_disasm_generate_masks, SI_SUB_DDB_SERVICES,
545     SI_ORDER_FIRST, arm64_disasm_generate_masks, arm64_i);
546