xref: /freebsd/sys/arm64/arm64/disassem.c (revision 2e3507c25e42292b45a5482e116d278f5515d04d)
1 /*-
2  * Copyright (c) 2016 Cavium
3  * All rights reserved.
4  *
5  * This software was developed by Semihalf.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 
32 #include <machine/armreg.h>
33 #include <machine/disassem.h>
34 
35 #include <ddb/ddb.h>
36 
37 #define	ARM64_MAX_TOKEN_LEN	8
38 #define	ARM64_MAX_TOKEN_CNT	10
39 
40 #define	ARM_INSN_SIZE_OFFSET	30
41 #define	ARM_INSN_SIZE_MASK	0x3
42 
43 /* Special options for instruction printing */
44 #define	OP_SIGN_EXT	(1UL << 0)	/* Sign-extend immediate value */
45 #define	OP_LITERAL	(1UL << 1)	/* Use literal (memory offset) */
46 #define	OP_MULT_4	(1UL << 2)	/* Multiply immediate by 4 */
47 #define	OP_SF32		(1UL << 3)	/* Force 32-bit access */
48 #define	OP_SF_INV	(1UL << 6)	/* SF is inverted (1 means 32 bit access) */
49 #define	OP_RD_SP	(1UL << 7)	/* Use sp for RD otherwise xzr */
50 #define	OP_RT_SP	(1UL << 8)	/* Use sp for RT otherwise xzr */
51 #define	OP_RN_SP	(1UL << 9)	/* Use sp for RN otherwise xzr */
52 #define	OP_RM_SP	(1UL << 10)	/* Use sp for RM otherwise xzr */
53 #define	OP_SHIFT_ROR	(1UL << 11)	/* Use ror shift type */
54 
55 static const char *w_reg[] = {
56 	"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
57 	"w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
58 	"w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
59 	"w24", "w25", "w26", "w27", "w28", "w29", "w30"
60 };
61 
62 static const char *x_reg[] = {
63 	"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
64 	"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
65 	"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
66 	"x24", "x25", "x26", "x27", "x28", "x29", "lr"
67 };
68 
69 static const char *shift_2[] = {
70 	"lsl", "lsr", "asr", "ror"
71 };
72 
73 static const char *extend_types[] = {
74 	"uxtb", "uxth", "uxtw", "uxtx",
75 	"sxtb", "sxth", "sxtw", "sxtx",
76 };
77 
78 /*
79  * Structure representing single token (operand) inside instruction.
80  * name   - name of operand
81  * pos    - position within the instruction (in bits)
82  * len    - operand length (in bits)
83  */
84 struct arm64_insn_token {
85 	char name[ARM64_MAX_TOKEN_LEN];
86 	int pos;
87 	int len;
88 };
89 
90 /*
91  * Define generic types for instruction printing.
92  */
93 enum arm64_format_type {
94 	/*
95 	 * OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #imm} SF32/64
96 	 * OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64
97 	 * OP <RD>, <RM> {, <shift> #<imm> }
98 	 * OP <RN>, <RM> {, <shift> #<imm> }
99 	 */
100 	TYPE_01,
101 
102 	/*
103 	 * OP <RT>, [<XN|SP>, #<simm>]!
104 	 * OP <RT>, [<XN|SP>], #<simm>
105 	 * OP <RT>, [<XN|SP> {, #<pimm> }]
106 	 * OP <RT>, [<XN|SP>, <RM> {, EXTEND AMOUNT }]
107 	 */
108 	TYPE_02,
109 
110 	/* OP <RT>, #imm SF32/64 */
111 	TYPE_03,
112 
113 	/*
114 	 * OP <RD>, <RN|SP>, <RM> {, <extend> { #<amount> } }
115 	 * OP <RN|SP>, <RM>, {, <extend> { #<amount> } }
116 	 */
117 	TYPE_04,
118 };
119 
120 /*
121  * Structure representing single parsed instruction format.
122  * name   - opcode name
123  * format - opcode format in a human-readable way
124  * type   - syntax type for printing
125  * special_ops  - special options passed to a printer (if any)
126  * mask   - bitmask for instruction matching
127  * pattern      - pattern to look for
128  * tokens - array of tokens (operands) inside instruction
129  */
130 struct arm64_insn {
131 	char *name;
132 	char *format;
133 	enum arm64_format_type type;
134 	uint64_t special_ops;
135 	uint32_t mask;
136 	uint32_t pattern;
137 	struct arm64_insn_token tokens[ARM64_MAX_TOKEN_CNT];
138 };
139 
140 /*
141  * Specify instruction opcode format in a human-readable way. Use notation
142  * obtained from ARM Architecture Reference Manual for ARMv8-A.
143  *
144  * Format string description:
145  *  Each group must be separated by "|". Group made of 0/1 is used to
146  *  generate mask and pattern for instruction matching. Groups containing
147  *  an operand token (in format NAME(length_bits)) are used to retrieve any
148  *  operand data from the instruction. Names here must be meaningful
149  *  and match the one described in the Manual.
150  *
151  * Token description:
152  * SF     - "0" represents 32-bit access, "1" represents 64-bit access
153  * SHIFT  - type of shift (instruction dependent)
154  * IMM    - immediate value
155  * Rx     - register number
156  * OPTION - command specific options
157  * SCALE  - scaling of immediate value
158  */
159 static struct arm64_insn arm64_i[] = {
160 	{ "add", "SF(1)|0001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
161 	    TYPE_01, 0 },			/* add shifted register */
162 	{ "mov", "SF(1)|001000100000000000000|RN(5)|RD(5)",
163 	    TYPE_01, OP_RD_SP | OP_RN_SP },	/* mov (to/from sp) */
164 	{ "add", "SF(1)|0010001|SHIFT(2)|IMM(12)|RN(5)|RD(5)",
165 	    TYPE_01, OP_RD_SP | OP_RN_SP },	/* add immediate */
166 	{ "cmn", "SF(1)|0101011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|11111",
167 	    TYPE_01, 0 },			/* cmn shifted register */
168 	{ "adds", "SF(1)|0101011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
169 	    TYPE_01, 0 },			/* adds shifted register */
170 	{ "ldr", "1|SF(1)|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)",
171 	    TYPE_02, OP_SIGN_EXT },
172 	    /* ldr immediate post/pre index */
173 	{ "ldr", "1|SF(1)|11100101|IMM(12)|RN(5)|RT(5)",
174 	    TYPE_02, 0 },			/* ldr immediate unsigned */
175 	{ "ldr", "1|SF(1)|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
176 	    TYPE_02, 0 },			/* ldr register */
177 	{ "ldr", "0|SF(1)|011000|IMM(19)|RT(5)",
178 	    TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 },	/* ldr literal */
179 	{ "ldrb", "00|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)",
180 	    TYPE_02, OP_SIGN_EXT | OP_SF32 },
181 	    /* ldrb immediate post/pre index */
182 	{ "ldrb", "00|11100101|IMM(12)|RN(5)|RT(5)",
183 	    TYPE_02, OP_SF32 },			/* ldrb immediate unsigned */
184 	{ "ldrb", "00|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
185 	    TYPE_02, OP_SF32 },			/* ldrb register */
186 	{ "ldrh", "01|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", TYPE_02,
187 	    OP_SIGN_EXT | OP_SF32 },
188 	    /* ldrh immediate post/pre index */
189 	{ "ldrh", "01|11100101|IMM(12)|RN(5)|RT(5)",
190 	    TYPE_02, OP_SF32 },			/* ldrh immediate unsigned */
191 	{ "ldrh", "01|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
192 	    TYPE_02, OP_SF32 },			/* ldrh register */
193 	{ "ldrsb", "001110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)",
194 	    TYPE_02, OP_SIGN_EXT | OP_SF_INV },
195 	    /* ldrsb immediate post/pre index */
196 	{ "ldrsb", "001110011|SF(1)|IMM(12)|RN(5)|RT(5)",\
197 	    TYPE_02, OP_SF_INV },		/* ldrsb immediate unsigned */
198 	{ "ldrsb", "001110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
199 	    TYPE_02,  OP_SF_INV },		/* ldrsb register */
200 	{ "ldrsh", "011110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)",
201 	    TYPE_02, OP_SIGN_EXT | OP_SF_INV },
202 	    /* ldrsh immediate post/pre index */
203 	{ "ldrsh", "011110011|SF(1)|IMM(12)|RN(5)|RT(5)",
204 	    TYPE_02, OP_SF_INV },		/* ldrsh immediate unsigned */
205 	{ "ldrsh", "011110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
206 	    TYPE_02, OP_SF_INV },		/* ldrsh register */
207 	{ "ldrsw", "10111000100|IMM(9)|OPTION(2)|RN(5)|RT(5)",
208 	    TYPE_02, OP_SIGN_EXT },
209 	    /* ldrsw immediate post/pre index */
210 	{ "ldrsw", "1011100110|IMM(12)|RN(5)|RT(5)",
211 	    TYPE_02, 0 },			/* ldrsw immediate unsigned */
212 	{ "ldrsw", "10111000101|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
213 	    TYPE_02, 0 },			/* ldrsw register */
214 	{ "ldrsw", "10011000|IMM(19)|RT(5)",
215 	    TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 },	/* ldrsw literal */
216 	{ "str", "1|SF(1)|111000000|IMM(9)|OPTION(2)|RN(5)|RT(5)",
217 	    TYPE_02, OP_SIGN_EXT },
218 	    /* str immediate post/pre index */
219 	{ "str", "1|SF(1)|11100100|IMM(12)|RN(5)|RT(5)",
220 	    TYPE_02, 0 },			/* str immediate unsigned */
221 	{ "str", "1|SF(1)|111000001|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
222 	    TYPE_02, 0 },			/* str register */
223 	{ "strb", "00111000000|IMM(9)|OPTION(2)|RN(5)|RT(5)",
224 	    TYPE_02, OP_SIGN_EXT | OP_SF32 },
225 	    /* strb immediate post/pre index */
226 	{ "strb", "0011100100|IMM(12)|RN(5)|RT(5)",
227 	    TYPE_02, OP_SF32 },			/* strb immediate unsigned */
228 	{ "strb", "00111000001|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
229 	    TYPE_02, OP_SF32 },			/* strb register */
230 	{ "strh", "01111000000|IMM(9)|OPTION(2)|RN(5)|RT(5)",
231 	    TYPE_02, OP_SF32 | OP_SIGN_EXT },
232 	    /* strh immediate post/pre index */
233 	{ "strh", "0111100100|IMM(12)|RN(5)|RT(5)",
234 	    TYPE_02, OP_SF32 },
235 	    /* strh immediate unsigned */
236 	{ "strh", "01111000001|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
237 	    TYPE_02, OP_SF32 },
238 	    /* strh register */
239 	{ "neg", "SF(1)|1001011|SHIFT(2)|0|RM(5)|IMM(6)|11111|RD(5)",
240 	    TYPE_01, 0 },			/* neg shifted register */
241 	{ "sub", "SF(1)|1001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
242 	    TYPE_01, 0 },			/* sub shifted register */
243 	{ "cmp", "SF(1)|1101011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|11111",
244 	    TYPE_01, 0 },			/* cmp shifted register */
245 	{ "negs", "SF(1)|1101011|SHIFT(2)|0|RM(5)|IMM(6)|11111|RD(5)",
246 	    TYPE_01, 0 },			/* negs shifted register */
247 	{ "subs", "SF(1)|1101011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
248 	    TYPE_01, 0 },			/* subs shifted register */
249 	{ "mvn", "SF(1)|0101010|SHIFT(2)|1|RM(5)|IMM(6)|11111|RD(5)",
250 	    TYPE_01, OP_SHIFT_ROR },		/* mvn shifted register */
251 	{ "orn", "SF(1)|0101010|SHIFT(2)|1|RM(5)|IMM(6)|RN(5)|RD(5)",
252 	    TYPE_01, OP_SHIFT_ROR },		/* orn shifted register */
253 	{ "mov", "SF(1)|0101010000|RM(5)|000000|11111|RD(5)",
254 	    TYPE_01, 0 },			/* mov register */
255 	{ "orr", "SF(1)|0101010|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
256 	    TYPE_01, OP_SHIFT_ROR },		/* orr shifted register */
257 	{ "and", "SF(1)|0001010|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
258 	    TYPE_01, OP_SHIFT_ROR },		/* and shifted register */
259 	{ "tst", "SF(1)|1101010|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|11111",
260 	    TYPE_01, OP_SHIFT_ROR },		/* tst shifted register */
261 	{ "ands", "SF(1)|1101010|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
262 	    TYPE_01, OP_SHIFT_ROR },		/* ands shifted register */
263 	{ "bic", "SF(1)|0001010|SHIFT(2)|1|RM(5)|IMM(6)|RN(5)|RD(5)",
264 	    TYPE_01, OP_SHIFT_ROR },		/* bic shifted register */
265 	{ "bics", "SF(1)|1101010|SHIFT(2)|1|RM(5)|IMM(6)|RN(5)|RD(5)",
266 	    TYPE_01, OP_SHIFT_ROR },		/* bics shifted register */
267 	{ "eon", "SF(1)|1001010|SHIFT(2)|1|RM(5)|IMM(6)|RN(5)|RD(5)",
268 	    TYPE_01, OP_SHIFT_ROR },		/* eon shifted register */
269 	{ "eor", "SF(1)|1001010|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
270 	    TYPE_01, OP_SHIFT_ROR },		/* eor shifted register */
271 	{ "add", "SF(1)|0001011001|RM(5)|OPTION(3)|IMM(3)|RN(5)|RD(5)",
272 	    TYPE_04, OP_RD_SP },		/* add extended register */
273 	{ "cmn", "SF(1)|0101011001|RM(5)|OPTION(3)|IMM(3)|RN(5)|11111",
274 	    TYPE_04, 0 },			/* cmn extended register */
275 	{ "adds", "SF(1)|0101011001|RM(5)|OPTION(3)|IMM(3)|RN(5)|RD(5)",
276 	    TYPE_04, 0 },			/* adds extended register */
277 	{ "sub", "SF(1)|1001011001|RM(5)|OPTION(3)|IMM(3)|RN(5)|RD(5)",
278 	    TYPE_04, OP_RD_SP },		/* sub extended register */
279 	{ "cmp", "SF(1)|1101011001|RM(5)|OPTION(3)|IMM(3)|RN(5)|11111",
280 	    TYPE_04, 0 },			/* cmp extended register */
281 	{ "subs", "SF(1)|1101011001|RM(5)|OPTION(3)|IMM(3)|RN(5)|RD(5)",
282 	    TYPE_04, 0 },			/* subs extended register */
283 	{ NULL, NULL }
284 };
285 
286 static void
287 arm64_disasm_generate_masks(struct arm64_insn *tab)
288 {
289 	uint32_t mask, val;
290 	int a, i;
291 	int len, ret;
292 	int token = 0;
293 	char *format;
294 	int error;
295 
296 	while (tab->name != NULL) {
297 		mask = 0;
298 		val = 0;
299 		format = tab->format;
300 		token = 0;
301 		error = 0;
302 
303 		/*
304 		 * For each entry analyze format strings from the
305 		 * left (i.e. from the MSB).
306 		 */
307 		a = (INSN_SIZE * NBBY) - 1;
308 		while (*format != '\0' && (a >= 0)) {
309 			switch (*format) {
310 			case '0':
311 				/* Bit is 0, add to mask and pattern */
312 				mask |= (1 << a);
313 				a--;
314 				format++;
315 				break;
316 			case '1':
317 				/* Bit is 1, add to mask and pattern */
318 				mask |= (1 << a);
319 				val |= (1 << a);
320 				a--;
321 				format++;
322 				break;
323 			case '|':
324 				/* skip */
325 				format++;
326 				break;
327 			default:
328 				/* Token found, copy the name */
329 				memset(tab->tokens[token].name, 0,
330 				    sizeof(tab->tokens[token].name));
331 				i = 0;
332 				while (*format != '(') {
333 					tab->tokens[token].name[i] = *format;
334 					i++;
335 					format++;
336 					if (i >= ARM64_MAX_TOKEN_LEN) {
337 						printf("ERROR: "
338 						    "token too long in op %s\n",
339 						    tab->name);
340 						error = 1;
341 						break;
342 					}
343 				}
344 				if (error != 0)
345 					break;
346 
347 				/* Read the length value */
348 				ret = sscanf(format, "(%d)", &len);
349 				if (ret == 1) {
350 					if (token >= ARM64_MAX_TOKEN_CNT) {
351 						printf("ERROR: "
352 						    "too many tokens in op %s\n",
353 						    tab->name);
354 						error = 1;
355 						break;
356 					}
357 
358 					a -= len;
359 					tab->tokens[token].pos = a + 1;
360 					tab->tokens[token].len = len;
361 					token++;
362 				}
363 
364 				/* Skip to the end of the token */
365 				while (*format != 0 && *format != '|')
366 					format++;
367 			}
368 		}
369 
370 		/* Write mask and pattern to the instruction array */
371 		tab->mask = mask;
372 		tab->pattern = val;
373 
374 		/*
375 		 * If we got here, format string must be parsed and "a"
376 		 * should point to -1. If it's not, wrong number of bits
377 		 * in format string. Mark this as invalid and prevent
378 		 * from being matched.
379 		 */
380 		if (*format != 0 || (a != -1) || (error != 0)) {
381 			tab->mask = 0;
382 			tab->pattern = 0xffffffff;
383 			printf("ERROR: skipping instruction op %s\n",
384 			    tab->name);
385 		}
386 
387 		tab++;
388 	}
389 }
390 
391 static int
392 arm64_disasm_read_token(struct arm64_insn *insn, u_int opcode,
393     const char *token, int *val)
394 {
395 	int i;
396 
397 	for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
398 		if (strcmp(insn->tokens[i].name, token) == 0) {
399 			*val = (opcode >> insn->tokens[i].pos &
400 			    ((1 << insn->tokens[i].len) - 1));
401 			return (0);
402 		}
403 	}
404 
405 	return (EINVAL);
406 }
407 
408 static int
409 arm64_disasm_read_token_sign_ext(struct arm64_insn *insn, u_int opcode,
410     const char *token, int *val)
411 {
412 	int i;
413 	int msk;
414 
415 	for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
416 		if (strcmp(insn->tokens[i].name, token) == 0) {
417 			msk = (1 << insn->tokens[i].len) - 1;
418 			*val = ((opcode >> insn->tokens[i].pos) & msk);
419 
420 			/* If last bit is 1, sign-extend the value */
421 			if (*val & (1 << (insn->tokens[i].len - 1)))
422 				*val |= ~msk;
423 
424 			return (0);
425 		}
426 	}
427 
428 	return (EINVAL);
429 }
430 
431 static const char *
432 arm64_disasm_reg_extend(int sf, int option, int rd, int rn, int amount)
433 {
434 	bool is_sp, lsl_preferred_uxtw, lsl_preferred_uxtx, lsl_preferred;
435 
436 	is_sp = rd == 31 || rn == 31;
437 	lsl_preferred_uxtw = sf == 0 && option == 2;
438 	lsl_preferred_uxtx = sf == 1 && option == 3;
439 	lsl_preferred = is_sp && (lsl_preferred_uxtw || lsl_preferred_uxtx);
440 
441 	/*
442 	 * LSL may be omitted when <amount> is 0.
443 	 * In all other cases <extend> is required.
444 	 */
445 	if (lsl_preferred && amount == 0)
446 		return (NULL);
447 	if (lsl_preferred)
448 		return ("lsl");
449 	return (extend_types[option]);
450 }
451 
452 static const char *
453 arm64_w_reg(int num, int wsp)
454 {
455 	if (num == 31)
456 		return (wsp != 0 ? "wsp" : "wzr");
457 	return (w_reg[num]);
458 }
459 
460 static const char *
461 arm64_x_reg(int num, int sp)
462 {
463 	if (num == 31)
464 		return (sp != 0 ? "sp" : "xzr");
465 	return (x_reg[num]);
466 }
467 
468 static const char *
469 arm64_reg(int b64, int num, int sp)
470 {
471 	if (b64 != 0)
472 		return (arm64_x_reg(num, sp));
473 	return (arm64_w_reg(num, sp));
474 }
475 
476 /*
477  * Decodes OPTION(3) to get <Xn|Wn> register or <WZR|XZR>
478  * for extended register instruction.
479  */
480 static const char *
481 arm64_disasm_reg_width(int option, int reg)
482 {
483 	if (option == 3 || option == 7)
484 		return (arm64_x_reg(reg, 0));
485 	return (arm64_w_reg(reg, 0));
486 }
487 
488 vm_offset_t
489 disasm(const struct disasm_interface *di, vm_offset_t loc, int altfmt)
490 {
491 	struct arm64_insn *i_ptr = arm64_i;
492 	uint32_t insn;
493 	int matchp;
494 	int ret;
495 	int shift, rm, rt, rd, rn, imm, sf, idx, option, scale, amount;
496 	int sign_ext;
497 	bool rm_absent, rd_absent, rn_absent;
498 	/* Indicate if immediate should be outside or inside brackets */
499 	int inside;
500 	/* Print exclamation mark if pre-incremented */
501 	int pre;
502 	/* Indicate if x31 register should be printed as sp or xzr */
503 	int rm_sp, rt_sp, rd_sp, rn_sp;
504 	/* Indicate if shift type ror is supported */
505 	bool has_shift_ror;
506 
507 	const char *extend;
508 
509 	/* Initialize defaults, all are 0 except SF indicating 64bit access */
510 	shift = rd = rm = rn = imm = idx = option = amount = scale = 0;
511 	sign_ext = 0;
512 	sf = 1;
513 	extend = NULL;
514 
515 	matchp = 0;
516 	insn = di->di_readword(loc);
517 	while (i_ptr->name) {
518 		/* If mask is 0 then the parser was not initialized yet */
519 		if ((i_ptr->mask != 0) &&
520 		    ((insn & i_ptr->mask) == i_ptr->pattern)) {
521 			matchp = 1;
522 			break;
523 		}
524 		i_ptr++;
525 	}
526 	if (matchp == 0)
527 		goto undefined;
528 
529 	/* Global options */
530 	if (i_ptr->special_ops & OP_SF32)
531 		sf = 0;
532 
533 	/* Global optional tokens */
534 	arm64_disasm_read_token(i_ptr, insn, "SF", &sf);
535 	if (i_ptr->special_ops & OP_SF_INV)
536 		sf = 1 - sf;
537 	if (arm64_disasm_read_token(i_ptr, insn, "SIGN", &sign_ext) == 0)
538 		sign_ext = 1 - sign_ext;
539 	if (i_ptr->special_ops & OP_SIGN_EXT)
540 		sign_ext = 1;
541 	if (sign_ext != 0)
542 		arm64_disasm_read_token_sign_ext(i_ptr, insn, "IMM", &imm);
543 	else
544 		arm64_disasm_read_token(i_ptr, insn, "IMM", &imm);
545 	if (i_ptr->special_ops & OP_MULT_4)
546 		imm <<= 2;
547 
548 	rm_sp = i_ptr->special_ops & OP_RM_SP;
549 	rt_sp = i_ptr->special_ops & OP_RT_SP;
550 	rd_sp = i_ptr->special_ops & OP_RD_SP;
551 	rn_sp = i_ptr->special_ops & OP_RN_SP;
552 
553 	has_shift_ror = i_ptr->special_ops & OP_SHIFT_ROR;
554 
555 	/* Print opcode by type */
556 	switch (i_ptr->type) {
557 	case TYPE_01:
558 		/*
559 		 * OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
560 		 * OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64
561 		 * OP <RD>, <RM> {, <shift> #<imm> }
562 		 * OP <RN>, <RM> {, <shift> #<imm> }
563 		 */
564 
565 		rd_absent = arm64_disasm_read_token(i_ptr, insn, "RD", &rd);
566 		rn_absent = arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
567 		rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
568 		arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift);
569 
570 		/*
571 		 * if shift type is RESERVED for shifted register instruction,
572 		 * print undefined
573 		 */
574 		if (shift == 3 && !has_shift_ror)
575 			goto undefined;
576 
577 		di->di_printf("%s\t", i_ptr->name);
578 
579 		/*
580 		 * If RD and RN are present, we will display the following
581 		 * patterns:
582 		 * - OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
583 		 * - OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64
584 		 * Otherwise if only RD is present:
585 		 * - OP <RD>, <RM> {, <shift> #<imm> }
586 		 * Otherwise if only RN is present:
587 		 * - OP <RN>, <RM> {, <shift> #<imm> }
588 		 */
589 		if (!rd_absent && !rn_absent)
590 			di->di_printf("%s, %s", arm64_reg(sf, rd, rd_sp),
591 			    arm64_reg(sf, rn, rn_sp));
592 		else if (!rd_absent)
593 			di->di_printf("%s", arm64_reg(sf, rd, rd_sp));
594 		else
595 			di->di_printf("%s", arm64_reg(sf, rn, rn_sp));
596 
597 		/* If RM is present use it, otherwise use immediate notation */
598 		if (!rm_absent) {
599 			di->di_printf(", %s", arm64_reg(sf, rm, rm_sp));
600 			if (imm != 0)
601 				di->di_printf(", %s #%d", shift_2[shift], imm);
602 		} else {
603 			if (imm != 0 || shift != 0)
604 				di->di_printf(", #0x%x", imm);
605 			if (shift != 0)
606 				di->di_printf(" lsl #12");
607 		}
608 		break;
609 	case TYPE_02:
610 		/*
611 		 * OP <RT>, [<XN|SP>, #<simm>]!
612 		 * OP <RT>, [<XN|SP>], #<simm>
613 		 * OP <RT>, [<XN|SP> {, #<pimm> }]
614 		 * OP <RT>, [<XN|SP>, <RM> {, EXTEND AMOUNT }]
615 		 */
616 
617 		/* Mandatory tokens */
618 		ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt);
619 		ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
620 		if (ret != 0) {
621 			printf("ERROR: "
622 			    "Missing mandatory token for op %s type %d\n",
623 			    i_ptr->name, i_ptr->type);
624 			goto undefined;
625 		}
626 
627 		/* Optional tokens */
628 		arm64_disasm_read_token(i_ptr, insn, "OPTION", &option);
629 		arm64_disasm_read_token(i_ptr, insn, "SCALE", &scale);
630 		rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
631 
632 		if (rm_absent) {
633 			/*
634 			 * In unsigned operation, shift immediate value
635 			 * and reset options to default.
636 			 */
637 			if (sign_ext == 0) {
638 				imm = imm << ((insn >> ARM_INSN_SIZE_OFFSET) &
639 				    ARM_INSN_SIZE_MASK);
640 				option = 0;
641 			}
642 			switch (option) {
643 			case 0x0:
644 				pre = 0;
645 				inside = 1;
646 				break;
647 			case 0x1:
648 				pre = 0;
649 				inside = 0;
650 				break;
651 			case 0x2:
652 			default:
653 				pre = 1;
654 				inside = 1;
655 				break;
656 			}
657 
658 			di->di_printf("%s\t%s, ", i_ptr->name,
659 			    arm64_reg(sf, rt, rt_sp));
660 			if (inside != 0) {
661 				di->di_printf("[%s", arm64_x_reg(rn, 1));
662 				if (imm != 0)
663 					di->di_printf(", #%d", imm);
664 				di->di_printf("]");
665 			} else {
666 				di->di_printf("[%s]", arm64_x_reg(rn, 1));
667 				if (imm != 0)
668 					di->di_printf(", #%d", imm);
669 			}
670 			if (pre != 0)
671 				di->di_printf("!");
672 		} else {
673 			/* Last bit of option field determines 32/64 bit offset */
674 			di->di_printf("%s\t%s, [%s, %s", i_ptr->name,
675 			    arm64_reg(sf, rt, rt_sp), arm64_x_reg(rn, 1),
676 			    arm64_reg(option & 1, rm, rm_sp));
677 
678 			if (scale == 0)
679 				amount = 0;
680 			else {
681 				/* Calculate amount, it's op(31:30) */
682 				amount = (insn >> ARM_INSN_SIZE_OFFSET) &
683 			            ARM_INSN_SIZE_MASK;
684 			}
685 
686 			switch (option) {
687 			case 0x2:
688 				di->di_printf(", uxtw #%d", amount);
689 				break;
690 			case 0x3:
691 				if (scale != 0)
692 					di->di_printf(", lsl #%d", amount);
693 				break;
694 			case 0x6:
695 				di->di_printf(", sxtw #%d", amount);
696 				break;
697 			case 0x7:
698 				di->di_printf(", sxtx #%d", amount);
699 				break;
700 			default:
701 				di->di_printf(", rsv");
702 				break;
703 			}
704 			di->di_printf("]");
705 		}
706 
707 		break;
708 
709 	case TYPE_03:
710 		/* OP <RT>, #imm SF32/64 */
711 
712 		/* Mandatory tokens */
713 		ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt);
714 		if (ret != 0) {
715 			printf("ERROR: "
716 			    "Missing mandatory token for op %s type %d\n",
717 			    i_ptr->name, i_ptr->type);
718 			goto undefined;
719 		}
720 
721 		di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt, rt_sp));
722 		if (i_ptr->special_ops & OP_LITERAL)
723 			di->di_printf("0x%lx", loc + imm);
724 		else
725 			di->di_printf("#%d", imm);
726 
727 		break;
728 
729 	case TYPE_04:
730 		/*
731 		 * OP <RD>, <RN|SP>, <RM> {, <extend> { #<amount> } }
732 		 * OP <RN|SP>, <RM>, {, <extend> { #<amount> } }
733 		 */
734 
735 		arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
736 		arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
737 		arm64_disasm_read_token(i_ptr, insn, "OPTION", &option);
738 
739 		rd_absent = arm64_disasm_read_token(i_ptr, insn, "RD", &rd);
740 		extend = arm64_disasm_reg_extend(sf, option, rd, rn, imm);
741 
742 		di->di_printf("%s\t", i_ptr->name);
743 
744 		if (!rd_absent)
745 			di->di_printf("%s, ", arm64_reg(sf, rd, rd_sp));
746 
747 		di->di_printf("%s, ", arm64_reg(sf, rn, 1));
748 
749 		if (sf != 0)
750 			di->di_printf("%s",
751 			    arm64_disasm_reg_width(option, rm));
752 		else
753 			di->di_printf("%s", arm64_w_reg(rm, 0));
754 
755 		if (extend != NULL)
756 			di->di_printf(", %s #%d", extend, imm);
757 
758 		break;
759 	default:
760 		goto undefined;
761 	}
762 
763 	di->di_printf("\n");
764 	return (loc + INSN_SIZE);
765 
766 undefined:
767 	di->di_printf("undefined\t%08x\n", insn);
768 	return (loc + INSN_SIZE);
769 }
770 
771 /* Parse format strings at the very beginning */
772 SYSINIT(arm64_disasm_generate_masks, SI_SUB_DDB_SERVICES, SI_ORDER_FIRST,
773     arm64_disasm_generate_masks, arm64_i);
774