1 /*- 2 * Copyright (c) 2016 Cavium 3 * All rights reserved. 4 * 5 * This software was developed by Semihalf. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 #include <sys/param.h> 32 33 #include <sys/systm.h> 34 #include <machine/disassem.h> 35 #include <machine/armreg.h> 36 #include <ddb/ddb.h> 37 38 #define ARM64_MAX_TOKEN_LEN 8 39 #define ARM64_MAX_TOKEN_CNT 10 40 41 static const char *w_reg[] = { 42 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", 43 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", 44 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", 45 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wSP", 46 }; 47 48 static const char *x_reg[] = { 49 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 50 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 51 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 52 "x24", "x25", "x26", "x27", "x28", "x29", "LR", "SP", 53 }; 54 55 static const char *shift_2[] = { 56 "LSL", "LSR", "ASR", "RSV" 57 }; 58 59 /* 60 * Structure representing single token (operand) inside instruction. 61 * name - name of operand 62 * pos - position within the instruction (in bits) 63 * len - operand length (in bits) 64 */ 65 struct arm64_insn_token { 66 char name[ARM64_MAX_TOKEN_LEN]; 67 int pos; 68 int len; 69 }; 70 71 /* 72 * Define generic types for instruction printing. 73 */ 74 enum arm64_format_type { 75 TYPE_01, /* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64 76 OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */ 77 }; 78 79 /* 80 * Structure representing single parsed instruction format. 81 * name - opcode name 82 * format - opcode format in a human-readable way 83 * type - syntax type for printing 84 * special_ops - special options passed to a printer (if any) 85 * mask - bitmask for instruction matching 86 * pattern - pattern to look for 87 * tokens - array of tokens (operands) inside instruction 88 */ 89 struct arm64_insn { 90 char* name; 91 char* format; 92 enum arm64_format_type type; 93 uint64_t special_ops; 94 uint32_t mask; 95 uint32_t pattern; 96 struct arm64_insn_token tokens[ARM64_MAX_TOKEN_CNT]; 97 }; 98 99 /* 100 * Specify instruction opcode format in a human-readable way. Use notation 101 * obtained from ARM Architecture Reference Manual for ARMv8-A. 102 * 103 * Format string description: 104 * Each group must be separated by "|". Group made of 0/1 is used to 105 * generate mask and pattern for instruction matching. Groups containing 106 * an operand token (in format NAME(length_bits)) are used to retrieve any 107 * operand data from the instruction. Names here must be meaningful 108 * and match the one described in the Manual. 109 * 110 * Token description: 111 * SF - "0" represents 32-bit access, "1" represents 64-bit access 112 * SHIFT - type of shift (instruction dependent) 113 * IMM - immediate value 114 * Rx - register number 115 */ 116 static struct arm64_insn arm64_i[] = { 117 { "add", "SF(1)|0001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)", TYPE_01, 0 }, 118 { "mov", "SF(1)|001000100000000000000|RN(5)|RD(5)", TYPE_01, 0 }, 119 { "add", "SF(1)|0010001|SHIFT(2)|IMM(12)|RN(5)|RD(5)", TYPE_01, 0 }, 120 { NULL, NULL } 121 }; 122 123 static void 124 arm64_disasm_generate_masks(struct arm64_insn *tab) 125 { 126 uint32_t mask, val; 127 int a, i; 128 int len, ret; 129 int token = 0; 130 char *format; 131 int error; 132 133 while (tab->name != NULL) { 134 mask = 0; 135 val = 0; 136 format = tab->format; 137 token = 0; 138 error = 0; 139 140 /* 141 * For each entry analyze format strings from the 142 * left (i.e. from the MSB). 143 */ 144 a = (INSN_SIZE * NBBY) - 1; 145 while (*format != '\0' && (a >= 0)) { 146 switch(*format) { 147 case '0': 148 /* Bit is 0, add to mask and pattern */ 149 mask |= (1 << a); 150 a--; 151 format++; 152 break; 153 case '1': 154 /* Bit is 1, add to mask and pattern */ 155 mask |= (1 << a); 156 val |= (1 << a); 157 a--; 158 format++; 159 break; 160 case '|': 161 /* skip */ 162 format++; 163 break; 164 default: 165 /* Token found, copy the name */ 166 memset(tab->tokens[token].name, 0, 167 sizeof(tab->tokens[token].name)); 168 i = 0; 169 while (*format != '(') { 170 tab->tokens[token].name[i] = *format; 171 i++; 172 format++; 173 if (i >= ARM64_MAX_TOKEN_LEN) { 174 printf("ERROR: token too long in op %s\n", 175 tab->name); 176 error = 1; 177 break; 178 } 179 } 180 if (error != 0) 181 break; 182 183 /* Read the length value */ 184 ret = sscanf(format, "(%d)", &len); 185 if (ret == 1) { 186 if (token >= ARM64_MAX_TOKEN_CNT) { 187 printf("ERROR: to many tokens in op %s\n", 188 tab->name); 189 error = 1; 190 break; 191 } 192 193 a -= len; 194 tab->tokens[token].pos = a + 1; 195 tab->tokens[token].len = len; 196 token++; 197 } 198 199 /* Skip to the end of the token */ 200 while (*format != 0 && *format != '|') 201 format++; 202 } 203 } 204 205 /* Write mask and pattern to the instruction array */ 206 tab->mask = mask; 207 tab->pattern = val; 208 209 /* 210 * If we got here, format string must be parsed and "a" 211 * should point to -1. If it's not, wrong number of bits 212 * in format string. Mark this as invalid and prevent 213 * from being matched. 214 */ 215 if (*format != 0 || (a != -1) || (error != 0)) { 216 tab->mask = 0; 217 tab->pattern = 0xffffffff; 218 printf("ERROR: skipping instruction op %s\n", 219 tab->name); 220 } 221 222 tab++; 223 } 224 } 225 226 static int 227 arm64_disasm_read_token(struct arm64_insn *insn, u_int opcode, 228 const char *token, int *val) 229 { 230 int i; 231 232 for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) { 233 if (strcmp(insn->tokens[i].name, token) == 0) { 234 *val = (opcode >> insn->tokens[i].pos & 235 ((1 << insn->tokens[i].len) - 1)); 236 return (0); 237 } 238 } 239 240 return (EINVAL); 241 } 242 243 static const char * 244 arm64_reg(int b64, int num) 245 { 246 247 if (b64 != 0) 248 return (x_reg[num]); 249 250 return (w_reg[num]); 251 } 252 253 vm_offset_t 254 disasm(const struct disasm_interface *di, vm_offset_t loc, int altfmt) 255 { 256 struct arm64_insn *i_ptr = arm64_i; 257 uint32_t insn; 258 int matchp; 259 int ret; 260 int shift, rm, rd, rn, imm, sf; 261 int rm_absent; 262 263 /* Initialize defaults, all are 0 except SF indicating 64bit access */ 264 shift = rd = rm = rn = imm = 0; 265 sf = 1; 266 267 matchp = 0; 268 insn = di->di_readword(loc); 269 while (i_ptr->name) { 270 /* If mask is 0 then the parser was not initialized yet */ 271 if ((i_ptr->mask != 0) && 272 ((insn & i_ptr->mask) == i_ptr->pattern)) { 273 matchp = 1; 274 break; 275 } 276 i_ptr++; 277 } 278 if (matchp == 0) 279 goto undefined; 280 281 switch (i_ptr->type) { 282 case TYPE_01: 283 /* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64 284 OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */ 285 286 /* Mandatory tokens */ 287 ret = arm64_disasm_read_token(i_ptr, insn, "SF", &sf); 288 ret |= arm64_disasm_read_token(i_ptr, insn, "RD", &rd); 289 ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn); 290 if (ret != 0) { 291 printf("ERROR: Missing mandatory token for op %s type %d\n", 292 i_ptr->name, i_ptr->type); 293 goto undefined; 294 } 295 296 /* Optional tokens */ 297 arm64_disasm_read_token(i_ptr, insn, "IMM", &imm); 298 arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift); 299 rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm); 300 301 di->di_printf("%s\t%s, %s", i_ptr->name, arm64_reg(sf, rd), 302 arm64_reg(sf, rn)); 303 304 /* If RM is present use it, otherwise use immediate notation */ 305 if (rm_absent == 0) { 306 di->di_printf(", %s", arm64_reg(sf, rm)); 307 if (imm != 0) 308 di->di_printf(", %s #%d", shift_2[shift], imm); 309 } else { 310 if (imm != 0 || shift != 0) 311 di->di_printf(", #0x%x", imm); 312 if (shift != 0) 313 di->di_printf(" LSL #12"); 314 } 315 break; 316 default: 317 goto undefined; 318 } 319 320 di->di_printf("\n"); 321 return(loc + INSN_SIZE); 322 323 undefined: 324 di->di_printf("undefined\t%08x\n", insn); 325 return(loc + INSN_SIZE); 326 } 327 328 /* Parse format strings at the very beginning */ 329 SYSINIT(arm64_disasm_generate_masks, SI_SUB_DDB_SERVICES, 330 SI_ORDER_FIRST, arm64_disasm_generate_masks, arm64_i); 331