1 /*- 2 * Copyright (c) 2016 Cavium 3 * All rights reserved. 4 * 5 * This software was developed by Semihalf. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 #include <sys/param.h> 32 33 #include <sys/systm.h> 34 #include <machine/disassem.h> 35 #include <machine/armreg.h> 36 #include <ddb/ddb.h> 37 38 #define ARM64_MAX_TOKEN_LEN 8 39 #define ARM64_MAX_TOKEN_CNT 10 40 41 #define ARM_INSN_SIZE_OFFSET 30 42 #define ARM_INSN_SIZE_MASK 0x3 43 44 /* Special options for instruction printing */ 45 #define OP_SIGN_EXT (1UL << 0) /* Sign-extend immediate value */ 46 #define OP_LITERAL (1UL << 1) /* Use literal (memory offset) */ 47 #define OP_MULT_4 (1UL << 2) /* Multiply immediate by 4 */ 48 #define OP_SF32 (1UL << 3) /* Force 32-bit access */ 49 #define OP_SF_INV (1UL << 6) /* SF is inverted (1 means 32 bit access) */ 50 51 static const char *w_reg[] = { 52 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", 53 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", 54 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", 55 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wSP", 56 }; 57 58 static const char *x_reg[] = { 59 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 60 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 61 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 62 "x24", "x25", "x26", "x27", "x28", "x29", "LR", "SP", 63 }; 64 65 static const char *shift_2[] = { 66 "LSL", "LSR", "ASR", "RSV" 67 }; 68 69 /* 70 * Structure representing single token (operand) inside instruction. 71 * name - name of operand 72 * pos - position within the instruction (in bits) 73 * len - operand length (in bits) 74 */ 75 struct arm64_insn_token { 76 char name[ARM64_MAX_TOKEN_LEN]; 77 int pos; 78 int len; 79 }; 80 81 /* 82 * Define generic types for instruction printing. 83 */ 84 enum arm64_format_type { 85 TYPE_01, /* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64 86 OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */ 87 TYPE_02, /* OP <RT>, [<RN>, #<imm>]{!}] SF32/64 88 OP <RT>, [<RN>], #<imm>{!} SF32/64 89 OP <RT>, <RN>, <RM> {, EXTEND AMOUNT } */ 90 TYPE_03, /* OP <RT>, #imm SF32/64 */ 91 }; 92 93 /* 94 * Structure representing single parsed instruction format. 95 * name - opcode name 96 * format - opcode format in a human-readable way 97 * type - syntax type for printing 98 * special_ops - special options passed to a printer (if any) 99 * mask - bitmask for instruction matching 100 * pattern - pattern to look for 101 * tokens - array of tokens (operands) inside instruction 102 */ 103 struct arm64_insn { 104 char* name; 105 char* format; 106 enum arm64_format_type type; 107 uint64_t special_ops; 108 uint32_t mask; 109 uint32_t pattern; 110 struct arm64_insn_token tokens[ARM64_MAX_TOKEN_CNT]; 111 }; 112 113 /* 114 * Specify instruction opcode format in a human-readable way. Use notation 115 * obtained from ARM Architecture Reference Manual for ARMv8-A. 116 * 117 * Format string description: 118 * Each group must be separated by "|". Group made of 0/1 is used to 119 * generate mask and pattern for instruction matching. Groups containing 120 * an operand token (in format NAME(length_bits)) are used to retrieve any 121 * operand data from the instruction. Names here must be meaningful 122 * and match the one described in the Manual. 123 * 124 * Token description: 125 * SF - "0" represents 32-bit access, "1" represents 64-bit access 126 * SHIFT - type of shift (instruction dependent) 127 * IMM - immediate value 128 * Rx - register number 129 * OPTION - command specific options 130 * SCALE - scaling of immediate value 131 */ 132 static struct arm64_insn arm64_i[] = { 133 { "add", "SF(1)|0001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)", 134 TYPE_01, 0 }, 135 { "mov", "SF(1)|001000100000000000000|RN(5)|RD(5)", 136 TYPE_01, 0 }, 137 { "add", "SF(1)|0010001|SHIFT(2)|IMM(12)|RN(5)|RD(5)", 138 TYPE_01, 0 }, 139 { "ldr", "1|SF(1)|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", 140 TYPE_02, OP_SIGN_EXT }, /* ldr immediate post/pre index */ 141 { "ldr", "1|SF(1)|11100101|IMM(12)|RN(5)|RT(5)", 142 TYPE_02, 0 }, /* ldr immediate unsigned */ 143 { "ldr", "1|SF(1)|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", 144 TYPE_02, 0 }, /* ldr register */ 145 { "ldr", "0|SF(1)|011000|IMM(19)|RT(5)", 146 TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldr literal */ 147 { "ldrb", "00|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", 148 TYPE_02, OP_SIGN_EXT | OP_SF32 }, /* ldrb immediate post/pre index */ 149 { "ldrb", "00|11100101|IMM(12)|RN(5)|RT(5)", 150 TYPE_02, OP_SF32 }, /* ldrb immediate unsigned */ 151 { "ldrb", "00|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", 152 TYPE_02, OP_SF32 }, /* ldrb register */ 153 { "ldrh", "01|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", TYPE_02, 154 OP_SIGN_EXT | OP_SF32 }, /* ldrh immediate post/pre index */ 155 { "ldrh", "01|11100101|IMM(12)|RN(5)|RT(5)", 156 TYPE_02, OP_SF32 }, /* ldrh immediate unsigned */ 157 { "ldrh", "01|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", 158 TYPE_02, OP_SF32 }, /* ldrh register */ 159 { "ldrsb", "001110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)", 160 TYPE_02, OP_SIGN_EXT | OP_SF_INV }, /* ldrsb immediate post/pre index */ 161 { "ldrsb", "001110011|SF(1)|IMM(12)|RN(5)|RT(5)",\ 162 TYPE_02, OP_SF_INV}, /* ldrsb immediate unsigned */ 163 { "ldrsb", "001110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", 164 TYPE_02, OP_SF_INV }, /* ldrsb register */ 165 { "ldrsh", "011110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)", 166 TYPE_02, OP_SIGN_EXT | OP_SF_INV }, /* ldrsh immediate post/pre index */ 167 { "ldrsh", "011110011|SF(1)|IMM(12)|RN(5)|RT(5)", 168 TYPE_02, OP_SF_INV}, /* ldrsh immediate unsigned */ 169 { "ldrsh", "011110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", 170 TYPE_02, OP_SF_INV }, /* ldrsh register */ 171 { "ldrsw", "10111000100|IMM(9)|OPTION(2)|RN(5)|RT(5)", 172 TYPE_02, OP_SIGN_EXT }, /* ldrsw immediate post/pre index */ 173 { "ldrsw", "1011100110|IMM(12)|RN(5)|RT(5)", 174 TYPE_02, 0 }, /* ldrsw immediate unsigned */ 175 { "ldrsw", "10111000101|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", 176 TYPE_02, 0 }, /* ldrsw register */ 177 { "ldrsw", "10011000|IMM(19)|RT(5)", 178 TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldr literal */ 179 { NULL, NULL } 180 }; 181 182 static void 183 arm64_disasm_generate_masks(struct arm64_insn *tab) 184 { 185 uint32_t mask, val; 186 int a, i; 187 int len, ret; 188 int token = 0; 189 char *format; 190 int error; 191 192 while (tab->name != NULL) { 193 mask = 0; 194 val = 0; 195 format = tab->format; 196 token = 0; 197 error = 0; 198 199 /* 200 * For each entry analyze format strings from the 201 * left (i.e. from the MSB). 202 */ 203 a = (INSN_SIZE * NBBY) - 1; 204 while (*format != '\0' && (a >= 0)) { 205 switch(*format) { 206 case '0': 207 /* Bit is 0, add to mask and pattern */ 208 mask |= (1 << a); 209 a--; 210 format++; 211 break; 212 case '1': 213 /* Bit is 1, add to mask and pattern */ 214 mask |= (1 << a); 215 val |= (1 << a); 216 a--; 217 format++; 218 break; 219 case '|': 220 /* skip */ 221 format++; 222 break; 223 default: 224 /* Token found, copy the name */ 225 memset(tab->tokens[token].name, 0, 226 sizeof(tab->tokens[token].name)); 227 i = 0; 228 while (*format != '(') { 229 tab->tokens[token].name[i] = *format; 230 i++; 231 format++; 232 if (i >= ARM64_MAX_TOKEN_LEN) { 233 printf("ERROR: token too long in op %s\n", 234 tab->name); 235 error = 1; 236 break; 237 } 238 } 239 if (error != 0) 240 break; 241 242 /* Read the length value */ 243 ret = sscanf(format, "(%d)", &len); 244 if (ret == 1) { 245 if (token >= ARM64_MAX_TOKEN_CNT) { 246 printf("ERROR: to many tokens in op %s\n", 247 tab->name); 248 error = 1; 249 break; 250 } 251 252 a -= len; 253 tab->tokens[token].pos = a + 1; 254 tab->tokens[token].len = len; 255 token++; 256 } 257 258 /* Skip to the end of the token */ 259 while (*format != 0 && *format != '|') 260 format++; 261 } 262 } 263 264 /* Write mask and pattern to the instruction array */ 265 tab->mask = mask; 266 tab->pattern = val; 267 268 /* 269 * If we got here, format string must be parsed and "a" 270 * should point to -1. If it's not, wrong number of bits 271 * in format string. Mark this as invalid and prevent 272 * from being matched. 273 */ 274 if (*format != 0 || (a != -1) || (error != 0)) { 275 tab->mask = 0; 276 tab->pattern = 0xffffffff; 277 printf("ERROR: skipping instruction op %s\n", 278 tab->name); 279 } 280 281 tab++; 282 } 283 } 284 285 static int 286 arm64_disasm_read_token(struct arm64_insn *insn, u_int opcode, 287 const char *token, int *val) 288 { 289 int i; 290 291 for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) { 292 if (strcmp(insn->tokens[i].name, token) == 0) { 293 *val = (opcode >> insn->tokens[i].pos & 294 ((1 << insn->tokens[i].len) - 1)); 295 return (0); 296 } 297 } 298 299 return (EINVAL); 300 } 301 302 static int 303 arm64_disasm_read_token_sign_ext(struct arm64_insn *insn, u_int opcode, 304 const char *token, int *val) 305 { 306 int i; 307 int msk; 308 309 for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) { 310 if (strcmp(insn->tokens[i].name, token) == 0) { 311 msk = (1 << insn->tokens[i].len) - 1; 312 *val = ((opcode >> insn->tokens[i].pos) & msk); 313 314 /* If last bit is 1, sign-extend the value */ 315 if (*val & (1 << (insn->tokens[i].len - 1))) 316 *val |= ~msk; 317 318 return (0); 319 } 320 } 321 322 return (EINVAL); 323 } 324 325 static const char * 326 arm64_reg(int b64, int num) 327 { 328 329 if (b64 != 0) 330 return (x_reg[num]); 331 332 return (w_reg[num]); 333 } 334 335 vm_offset_t 336 disasm(const struct disasm_interface *di, vm_offset_t loc, int altfmt) 337 { 338 struct arm64_insn *i_ptr = arm64_i; 339 uint32_t insn; 340 int matchp; 341 int ret; 342 int shift, rm, rt, rd, rn, imm, sf, idx, option, scale, amount; 343 int sign_ext; 344 int rm_absent; 345 /* Indicate if immediate should be outside or inside brackets */ 346 int inside; 347 /* Print exclamation mark if pre-incremented */ 348 int pre; 349 350 /* Initialize defaults, all are 0 except SF indicating 64bit access */ 351 shift = rd = rm = rn = imm = idx = option = amount = scale = 0; 352 sign_ext = 0; 353 sf = 1; 354 355 matchp = 0; 356 insn = di->di_readword(loc); 357 while (i_ptr->name) { 358 /* If mask is 0 then the parser was not initialized yet */ 359 if ((i_ptr->mask != 0) && 360 ((insn & i_ptr->mask) == i_ptr->pattern)) { 361 matchp = 1; 362 break; 363 } 364 i_ptr++; 365 } 366 if (matchp == 0) 367 goto undefined; 368 369 /* Global options */ 370 if (i_ptr->special_ops & OP_SF32) 371 sf = 0; 372 373 /* Global optional tokens */ 374 arm64_disasm_read_token(i_ptr, insn, "SF", &sf); 375 if (i_ptr->special_ops & OP_SF_INV) 376 sf = 1 - sf; 377 if (arm64_disasm_read_token(i_ptr, insn, "SIGN", &sign_ext) == 0) 378 sign_ext = 1 - sign_ext; 379 if (i_ptr->special_ops & OP_SIGN_EXT) 380 sign_ext = 1; 381 if (sign_ext != 0) 382 arm64_disasm_read_token_sign_ext(i_ptr, insn, "IMM", &imm); 383 else 384 arm64_disasm_read_token(i_ptr, insn, "IMM", &imm); 385 if (i_ptr->special_ops & OP_MULT_4) 386 imm <<= 2; 387 388 /* Print opcode by type */ 389 switch (i_ptr->type) { 390 case TYPE_01: 391 /* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64 392 OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */ 393 394 /* Mandatory tokens */ 395 ret = arm64_disasm_read_token(i_ptr, insn, "RD", &rd); 396 ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn); 397 if (ret != 0) { 398 printf("ERROR: Missing mandatory token for op %s type %d\n", 399 i_ptr->name, i_ptr->type); 400 goto undefined; 401 } 402 403 /* Optional tokens */ 404 arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift); 405 rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm); 406 407 di->di_printf("%s\t%s, %s", i_ptr->name, arm64_reg(sf, rd), 408 arm64_reg(sf, rn)); 409 410 /* If RM is present use it, otherwise use immediate notation */ 411 if (rm_absent == 0) { 412 di->di_printf(", %s", arm64_reg(sf, rm)); 413 if (imm != 0) 414 di->di_printf(", %s #%d", shift_2[shift], imm); 415 } else { 416 if (imm != 0 || shift != 0) 417 di->di_printf(", #0x%x", imm); 418 if (shift != 0) 419 di->di_printf(" LSL #12"); 420 } 421 break; 422 case TYPE_02: 423 /* OP <RT>, [<RN>, #<imm>]{!}] SF32/64 424 OP <RT>, [<RN>], #<imm>{!} SF32/64 425 OP <RT>, <RN>, <RM> {, EXTEND AMOUNT } */ 426 427 /* Mandatory tokens */ 428 ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt); 429 ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn); 430 if (ret != 0) { 431 printf("ERROR: Missing mandatory token for op %s type %d\n", 432 i_ptr->name, i_ptr->type); 433 goto undefined; 434 } 435 436 /* Optional tokens */ 437 arm64_disasm_read_token(i_ptr, insn, "OPTION", &option); 438 arm64_disasm_read_token(i_ptr, insn, "SCALE", &scale); 439 rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm); 440 441 if (rm_absent) { 442 /* 443 * In unsigned operation, shift immediate value 444 * and reset options to default. 445 */ 446 if (sign_ext == 0) { 447 imm = imm << ((insn >> ARM_INSN_SIZE_OFFSET) & 448 ARM_INSN_SIZE_MASK); 449 option = 0; 450 } 451 switch (option) { 452 case 0x0: 453 pre = 0; 454 inside = 1; 455 break; 456 case 0x1: 457 pre = 0; 458 inside = 0; 459 break; 460 case 0x2: 461 default: 462 pre = 1; 463 inside = 1; 464 break; 465 } 466 467 di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt)); 468 if (inside != 0) { 469 di->di_printf("[%s", arm64_reg(1, rn)); 470 if (imm != 0) 471 di->di_printf(", #%d", imm); 472 di->di_printf("]"); 473 } else { 474 di->di_printf("[%s]", arm64_reg(1, rn)); 475 if (imm != 0) 476 di->di_printf(", #%d", imm); 477 } 478 if (pre != 0) 479 di->di_printf("!"); 480 } else { 481 /* Last bit of option field determines 32/64 bit offset */ 482 di->di_printf("%s\t%s, [%s, %s", i_ptr->name, 483 arm64_reg(sf, rt), arm64_reg(1, rn), 484 arm64_reg(option & 1, rm)); 485 486 /* Calculate amount, it's op(31:30) */ 487 amount = (insn >> ARM_INSN_SIZE_OFFSET) & 488 ARM_INSN_SIZE_MASK; 489 490 switch (option) { 491 case 0x2: 492 di->di_printf(", uxtw #%d", amount); 493 break; 494 case 0x3: 495 if (scale != 0) 496 di->di_printf(", lsl #%d", amount); 497 break; 498 case 0x6: 499 di->di_printf(", sxtw #%d", amount); 500 break; 501 case 0x7: 502 di->di_printf(", sxts #%d", amount); 503 break; 504 default: 505 di->di_printf(", RSVD"); 506 break; 507 } 508 di->di_printf("]"); 509 } 510 511 break; 512 513 case TYPE_03: 514 /* OP <RT>, #imm SF32/64 */ 515 516 /* Mandatory tokens */ 517 ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt); 518 if (ret != 0) { 519 printf("ERROR: Missing mandatory token for op %s type %d\n", 520 i_ptr->name, i_ptr->type); 521 goto undefined; 522 } 523 524 di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt)); 525 if (i_ptr->special_ops & OP_LITERAL) 526 di->di_printf("0x%lx", loc + imm); 527 else 528 di->di_printf("#%d", imm); 529 530 break; 531 default: 532 goto undefined; 533 } 534 535 di->di_printf("\n"); 536 return(loc + INSN_SIZE); 537 538 undefined: 539 di->di_printf("undefined\t%08x\n", insn); 540 return(loc + INSN_SIZE); 541 } 542 543 /* Parse format strings at the very beginning */ 544 SYSINIT(arm64_disasm_generate_masks, SI_SUB_DDB_SERVICES, 545 SI_ORDER_FIRST, arm64_disasm_generate_masks, arm64_i); 546