xref: /freebsd/sys/arm64/arm64/disassem.c (revision 2d004dd5bc51eaef924f55d1e2407e80a9b4bcb5)
1 /*-
2  * Copyright (c) 2016 Cavium
3  * All rights reserved.
4  *
5  * This software was developed by Semihalf.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 #include <sys/param.h>
32 
33 #include <sys/systm.h>
34 #include <machine/disassem.h>
35 #include <machine/armreg.h>
36 #include <ddb/ddb.h>
37 
38 #define	ARM64_MAX_TOKEN_LEN	8
39 #define	ARM64_MAX_TOKEN_CNT	10
40 
41 static const char *w_reg[] = {
42 	"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
43 	"w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
44 	"w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
45 	"w24", "w25", "w26", "w27", "w28", "w29", "w30", "wSP",
46 };
47 
48 static const char *x_reg[] = {
49 	"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
50 	"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
51 	"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
52 	"x24", "x25", "x26", "x27", "x28", "x29", "LR", "SP",
53 };
54 
55 static const char *shift_2[] = {
56 	"LSL", "LSR", "ASR", "RSV"
57 };
58 
59 /*
60  * Structure representing single token (operand) inside instruction.
61  * name   - name of operand
62  * pos    - position within the instruction (in bits)
63  * len    - operand length (in bits)
64  */
65 struct arm64_insn_token {
66 	char name[ARM64_MAX_TOKEN_LEN];
67 	int pos;
68 	int len;
69 };
70 
71 /*
72  * Define generic types for instruction printing.
73  */
74 enum arm64_format_type {
75 	TYPE_01,	/* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
76 			   OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
77 };
78 
79 /*
80  * Structure representing single parsed instruction format.
81  * name   - opcode name
82  * format - opcode format in a human-readable way
83  * type   - syntax type for printing
84  * special_ops  - special options passed to a printer (if any)
85  * mask   - bitmask for instruction matching
86  * pattern      - pattern to look for
87  * tokens - array of tokens (operands) inside instruction
88  */
89 struct arm64_insn {
90 	char* name;
91 	char* format;
92 	enum arm64_format_type type;
93 	uint64_t special_ops;
94 	uint32_t mask;
95 	uint32_t pattern;
96 	struct arm64_insn_token tokens[ARM64_MAX_TOKEN_CNT];
97 };
98 
99 /*
100  * Specify instruction opcode format in a human-readable way. Use notation
101  * obtained from ARM Architecture Reference Manual for ARMv8-A.
102  *
103  * Format string description:
104  *  Each group must be separated by "|". Group made of 0/1 is used to
105  *  generate mask and pattern for instruction matching. Groups containing
106  *  an operand token (in format NAME(length_bits)) are used to retrieve any
107  *  operand data from the instruction. Names here must be meaningful
108  *  and match the one described in the Manual.
109  *
110  * Token description:
111  * SF     - "0" represents 32-bit access, "1" represents 64-bit access
112  * SHIFT  - type of shift (instruction dependent)
113  * IMM    - immediate value
114  * Rx     - register number
115  */
116 static struct arm64_insn arm64_i[] = {
117     { "add", "SF(1)|0001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)", TYPE_01, 0 },
118     { "mov", "SF(1)|001000100000000000000|RN(5)|RD(5)", TYPE_01, 0 },
119     { "add", "SF(1)|0010001|SHIFT(2)|IMM(12)|RN(5)|RD(5)", TYPE_01, 0 },
120     { NULL, NULL }
121 };
122 
123 static void
124 arm64_disasm_generate_masks(struct arm64_insn *tab)
125 {
126 	uint32_t mask, val;
127 	int a, i;
128 	int len, ret;
129 	int token = 0;
130 	char *format;
131 	int error;
132 
133 	while (tab->name != NULL) {
134 		mask = 0;
135 		val = 0;
136 		format = tab->format;
137 		token = 0;
138 		error = 0;
139 
140 		/*
141 		 * For each entry analyze format strings from the
142 		 * left (i.e. from the MSB).
143 		 */
144 		a = (INSN_SIZE * NBBY) - 1;
145 		while (*format != '\0' && (a >= 0)) {
146 			switch(*format) {
147 			case '0':
148 				/* Bit is 0, add to mask and pattern */
149 				mask |= (1 << a);
150 				a--;
151 				format++;
152 				break;
153 			case '1':
154 				/* Bit is 1, add to mask and pattern */
155 				mask |= (1 << a);
156 				val |= (1 << a);
157 				a--;
158 				format++;
159 				break;
160 			case '|':
161 				/* skip */
162 				format++;
163 				break;
164 			default:
165 				/* Token found, copy the name */
166 				memset(tab->tokens[token].name, 0,
167 				    sizeof(tab->tokens[token].name));
168 				i = 0;
169 				while (*format != '(') {
170 					tab->tokens[token].name[i] = *format;
171 					i++;
172 					format++;
173 					if (i >= ARM64_MAX_TOKEN_LEN) {
174 						printf("ERROR: token too long in op %s\n",
175 						    tab->name);
176 						error = 1;
177 						break;
178 					}
179 				}
180 				if (error != 0)
181 					break;
182 
183 				/* Read the length value */
184 				ret = sscanf(format, "(%d)", &len);
185 				if (ret == 1) {
186 					if (token >= ARM64_MAX_TOKEN_CNT) {
187 						printf("ERROR: to many tokens in op %s\n",
188 						    tab->name);
189 						error = 1;
190 						break;
191 					}
192 
193 					a -= len;
194 					tab->tokens[token].pos = a + 1;
195 					tab->tokens[token].len = len;
196 					token++;
197 				}
198 
199 				/* Skip to the end of the token */
200 				while (*format != 0 && *format != '|')
201 					format++;
202 			}
203 		}
204 
205 		/* Write mask and pattern to the instruction array */
206 		tab->mask = mask;
207 		tab->pattern = val;
208 
209 		/*
210 		 * If we got here, format string must be parsed and "a"
211 		 * should point to -1. If it's not, wrong number of bits
212 		 * in format string. Mark this as invalid and prevent
213 		 * from being matched.
214 		 */
215 		if (*format != 0 || (a != -1) || (error != 0)) {
216 			tab->mask = 0;
217 			tab->pattern = 0xffffffff;
218 			printf("ERROR: skipping instruction op %s\n",
219 			    tab->name);
220 		}
221 
222 		tab++;
223 	}
224 }
225 
226 static int
227 arm64_disasm_read_token(struct arm64_insn *insn, u_int opcode,
228     const char *token, int *val)
229 {
230 	int i;
231 
232 	for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
233 		if (strcmp(insn->tokens[i].name, token) == 0) {
234 			*val = (opcode >> insn->tokens[i].pos &
235 			    ((1 << insn->tokens[i].len) - 1));
236 			return (0);
237 		}
238 	}
239 
240 	return (EINVAL);
241 }
242 
243 static const char *
244 arm64_reg(int b64, int num)
245 {
246 
247 	if (b64 != 0)
248 		return (x_reg[num]);
249 
250 	return (w_reg[num]);
251 }
252 
253 vm_offset_t
254 disasm(const struct disasm_interface *di, vm_offset_t loc, int altfmt)
255 {
256 	struct arm64_insn *i_ptr = arm64_i;
257 	uint32_t insn;
258 	int matchp;
259 	int ret;
260 	int shift, rm, rd, rn, imm, sf;
261 	int rm_absent;
262 
263 	/* Initialize defaults, all are 0 except SF indicating 64bit access */
264 	shift = rd = rm = rn = imm = 0;
265 	sf = 1;
266 
267 	matchp = 0;
268 	insn = di->di_readword(loc);
269 	while (i_ptr->name) {
270 		/* If mask is 0 then the parser was not initialized yet */
271 		if ((i_ptr->mask != 0) &&
272 		    ((insn & i_ptr->mask) ==  i_ptr->pattern)) {
273 			matchp = 1;
274 			break;
275 		}
276 		i_ptr++;
277 	}
278 	if (matchp == 0)
279 		goto undefined;
280 
281 	switch (i_ptr->type) {
282 	case TYPE_01:
283 		/* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
284 		   OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
285 
286 		/* Mandatory tokens */
287 		ret = arm64_disasm_read_token(i_ptr, insn, "SF", &sf);
288 		ret |= arm64_disasm_read_token(i_ptr, insn, "RD", &rd);
289 		ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
290 		if (ret != 0) {
291 			printf("ERROR: Missing mandatory token for op %s type %d\n",
292 			    i_ptr->name, i_ptr->type);
293 			goto undefined;
294 		}
295 
296 		/* Optional tokens */
297 		arm64_disasm_read_token(i_ptr, insn, "IMM", &imm);
298 		arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift);
299 		rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
300 
301 		di->di_printf("%s\t%s, %s", i_ptr->name, arm64_reg(sf, rd),
302 		    arm64_reg(sf, rn));
303 
304 		/* If RM is present use it, otherwise use immediate notation */
305 		if (rm_absent == 0) {
306 			di->di_printf(", %s", arm64_reg(sf, rm));
307 			if (imm != 0)
308 				di->di_printf(", %s #%d", shift_2[shift], imm);
309 		} else {
310 			if (imm != 0 || shift != 0)
311 				di->di_printf(", #0x%x", imm);
312 			if (shift != 0)
313 				di->di_printf(" LSL #12");
314 		}
315 		break;
316 	default:
317 		goto undefined;
318 	}
319 
320 	di->di_printf("\n");
321 	return(loc + INSN_SIZE);
322 
323 undefined:
324 	di->di_printf("undefined\t%08x\n", insn);
325 	return(loc + INSN_SIZE);
326 }
327 
328 /* Parse format strings at the very beginning */
329 SYSINIT(arm64_disasm_generate_masks, SI_SUB_DDB_SERVICES,
330     SI_ORDER_FIRST, arm64_disasm_generate_masks, arm64_i);
331