1 /*- 2 * Copyright (c) 2016 Cavium 3 * All rights reserved. 4 * 5 * This software was developed by Semihalf. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 35 #include <machine/armreg.h> 36 #include <machine/disassem.h> 37 38 #include <ddb/ddb.h> 39 40 #define ARM64_MAX_TOKEN_LEN 8 41 #define ARM64_MAX_TOKEN_CNT 10 42 43 #define ARM_INSN_SIZE_OFFSET 30 44 #define ARM_INSN_SIZE_MASK 0x3 45 46 /* Special options for instruction printing */ 47 #define OP_SIGN_EXT (1UL << 0) /* Sign-extend immediate value */ 48 #define OP_LITERAL (1UL << 1) /* Use literal (memory offset) */ 49 #define OP_MULT_4 (1UL << 2) /* Multiply immediate by 4 */ 50 #define OP_SF32 (1UL << 3) /* Force 32-bit access */ 51 #define OP_SF_INV (1UL << 6) /* SF is inverted (1 means 32 bit access) */ 52 53 static const char *w_reg[] = { 54 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", 55 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", 56 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", 57 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wSP", 58 }; 59 60 static const char *x_reg[] = { 61 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 62 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 63 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 64 "x24", "x25", "x26", "x27", "x28", "x29", "LR", "SP", 65 }; 66 67 static const char *shift_2[] = { 68 "LSL", "LSR", "ASR", "RSV" 69 }; 70 71 /* 72 * Structure representing single token (operand) inside instruction. 73 * name - name of operand 74 * pos - position within the instruction (in bits) 75 * len - operand length (in bits) 76 */ 77 struct arm64_insn_token { 78 char name[ARM64_MAX_TOKEN_LEN]; 79 int pos; 80 int len; 81 }; 82 83 /* 84 * Define generic types for instruction printing. 85 */ 86 enum arm64_format_type { 87 /* 88 * OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #imm} SF32/64 89 * OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 90 */ 91 TYPE_01, 92 93 /* 94 * OP <RT>, [<RN>, #<imm>]{!} SF32/64 95 * OP <RT>, [<RN>], #<imm>{!} SF32/64 96 * OP <RT>, <RN>, <RM> {, EXTEND AMOUNT } 97 */ 98 TYPE_02, 99 100 /* OP <RT>, #imm SF32/64 */ 101 TYPE_03, 102 }; 103 104 /* 105 * Structure representing single parsed instruction format. 106 * name - opcode name 107 * format - opcode format in a human-readable way 108 * type - syntax type for printing 109 * special_ops - special options passed to a printer (if any) 110 * mask - bitmask for instruction matching 111 * pattern - pattern to look for 112 * tokens - array of tokens (operands) inside instruction 113 */ 114 struct arm64_insn { 115 char *name; 116 char *format; 117 enum arm64_format_type type; 118 uint64_t special_ops; 119 uint32_t mask; 120 uint32_t pattern; 121 struct arm64_insn_token tokens[ARM64_MAX_TOKEN_CNT]; 122 }; 123 124 /* 125 * Specify instruction opcode format in a human-readable way. Use notation 126 * obtained from ARM Architecture Reference Manual for ARMv8-A. 127 * 128 * Format string description: 129 * Each group must be separated by "|". Group made of 0/1 is used to 130 * generate mask and pattern for instruction matching. Groups containing 131 * an operand token (in format NAME(length_bits)) are used to retrieve any 132 * operand data from the instruction. Names here must be meaningful 133 * and match the one described in the Manual. 134 * 135 * Token description: 136 * SF - "0" represents 32-bit access, "1" represents 64-bit access 137 * SHIFT - type of shift (instruction dependent) 138 * IMM - immediate value 139 * Rx - register number 140 * OPTION - command specific options 141 * SCALE - scaling of immediate value 142 */ 143 static struct arm64_insn arm64_i[] = { 144 { "add", "SF(1)|0001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)", 145 TYPE_01, 0 }, 146 { "mov", "SF(1)|001000100000000000000|RN(5)|RD(5)", 147 TYPE_01, 0 }, 148 { "add", "SF(1)|0010001|SHIFT(2)|IMM(12)|RN(5)|RD(5)", 149 TYPE_01, 0 }, 150 { "ldr", "1|SF(1)|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", 151 TYPE_02, OP_SIGN_EXT }, /* ldr immediate post/pre index */ 152 { "ldr", "1|SF(1)|11100101|IMM(12)|RN(5)|RT(5)", 153 TYPE_02, 0 }, /* ldr immediate unsigned */ 154 { "ldr", "1|SF(1)|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", 155 TYPE_02, 0 }, /* ldr register */ 156 { "ldr", "0|SF(1)|011000|IMM(19)|RT(5)", 157 TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldr literal */ 158 { "ldrb", "00|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", 159 TYPE_02, OP_SIGN_EXT | OP_SF32 }, /* ldrb immediate post/pre index */ 160 { "ldrb", "00|11100101|IMM(12)|RN(5)|RT(5)", 161 TYPE_02, OP_SF32 }, /* ldrb immediate unsigned */ 162 { "ldrb", "00|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", 163 TYPE_02, OP_SF32 }, /* ldrb register */ 164 { "ldrh", "01|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", TYPE_02, 165 OP_SIGN_EXT | OP_SF32 }, /* ldrh immediate post/pre index */ 166 { "ldrh", "01|11100101|IMM(12)|RN(5)|RT(5)", 167 TYPE_02, OP_SF32 }, /* ldrh immediate unsigned */ 168 { "ldrh", "01|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", 169 TYPE_02, OP_SF32 }, /* ldrh register */ 170 { "ldrsb", "001110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)", 171 TYPE_02, OP_SIGN_EXT | OP_SF_INV }, /* ldrsb immediate post/pre index */ 172 { "ldrsb", "001110011|SF(1)|IMM(12)|RN(5)|RT(5)",\ 173 TYPE_02, OP_SF_INV}, /* ldrsb immediate unsigned */ 174 { "ldrsb", "001110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", 175 TYPE_02, OP_SF_INV }, /* ldrsb register */ 176 { "ldrsh", "011110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)", 177 TYPE_02, OP_SIGN_EXT | OP_SF_INV }, /* ldrsh immediate post/pre index */ 178 { "ldrsh", "011110011|SF(1)|IMM(12)|RN(5)|RT(5)", 179 TYPE_02, OP_SF_INV}, /* ldrsh immediate unsigned */ 180 { "ldrsh", "011110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", 181 TYPE_02, OP_SF_INV }, /* ldrsh register */ 182 { "ldrsw", "10111000100|IMM(9)|OPTION(2)|RN(5)|RT(5)", 183 TYPE_02, OP_SIGN_EXT }, /* ldrsw immediate post/pre index */ 184 { "ldrsw", "1011100110|IMM(12)|RN(5)|RT(5)", 185 TYPE_02, 0 }, /* ldrsw immediate unsigned */ 186 { "ldrsw", "10111000101|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", 187 TYPE_02, 0 }, /* ldrsw register */ 188 { "ldrsw", "10011000|IMM(19)|RT(5)", 189 TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldr literal */ 190 { "str", "1|SF(1)|111000000|IMM(9)|OPTION(2)|RN(5)|RT(5)", 191 TYPE_02, OP_SIGN_EXT }, /* str immediate post/pre index */ 192 { "str", "1|SF(1)|11100100|IMM(12)|RN(5)|RT(5)", 193 TYPE_02, 0 }, /* str immediate unsigned */ 194 { "str", "1|SF(1)|111000001|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", 195 TYPE_02, 0 }, /* str register */ 196 { "strb", "00111000000|IMM(9)|OPTION(2)|RN(5)|RT(5)", 197 TYPE_02, OP_SIGN_EXT | OP_SF32 }, /* strb immediate post/pre index */ 198 { "strb", "0011100100|IMM(12)|RN(5)|RT(5)", 199 TYPE_02, OP_SF32 }, /* strb immediate unsigned */ 200 { "strb", "00111000001|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", 201 TYPE_02, OP_SF32 }, /* strb register */ 202 { "strh", "01111000000|IMM(9)|OPTION(2)|RN(5)|RT(5)", 203 TYPE_02, OP_SF32 | OP_SIGN_EXT }, /* strh immediate post/pre index */ 204 { "strh", "0111100100|IMM(12)|RN(5)|RT(5)", 205 TYPE_02, OP_SF32 }, /* immediate unsigned */ 206 { "strh", "01111000001|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)", 207 TYPE_02, OP_SF32 }, /* strh register */ 208 { NULL, NULL } 209 }; 210 211 static void 212 arm64_disasm_generate_masks(struct arm64_insn *tab) 213 { 214 uint32_t mask, val; 215 int a, i; 216 int len, ret; 217 int token = 0; 218 char *format; 219 int error; 220 221 while (tab->name != NULL) { 222 mask = 0; 223 val = 0; 224 format = tab->format; 225 token = 0; 226 error = 0; 227 228 /* 229 * For each entry analyze format strings from the 230 * left (i.e. from the MSB). 231 */ 232 a = (INSN_SIZE * NBBY) - 1; 233 while (*format != '\0' && (a >= 0)) { 234 switch (*format) { 235 case '0': 236 /* Bit is 0, add to mask and pattern */ 237 mask |= (1 << a); 238 a--; 239 format++; 240 break; 241 case '1': 242 /* Bit is 1, add to mask and pattern */ 243 mask |= (1 << a); 244 val |= (1 << a); 245 a--; 246 format++; 247 break; 248 case '|': 249 /* skip */ 250 format++; 251 break; 252 default: 253 /* Token found, copy the name */ 254 memset(tab->tokens[token].name, 0, 255 sizeof(tab->tokens[token].name)); 256 i = 0; 257 while (*format != '(') { 258 tab->tokens[token].name[i] = *format; 259 i++; 260 format++; 261 if (i >= ARM64_MAX_TOKEN_LEN) { 262 printf("ERROR: " 263 "token too long in op %s\n", 264 tab->name); 265 error = 1; 266 break; 267 } 268 } 269 if (error != 0) 270 break; 271 272 /* Read the length value */ 273 ret = sscanf(format, "(%d)", &len); 274 if (ret == 1) { 275 if (token >= ARM64_MAX_TOKEN_CNT) { 276 printf("ERROR: " 277 "too many tokens in op %s\n", 278 tab->name); 279 error = 1; 280 break; 281 } 282 283 a -= len; 284 tab->tokens[token].pos = a + 1; 285 tab->tokens[token].len = len; 286 token++; 287 } 288 289 /* Skip to the end of the token */ 290 while (*format != 0 && *format != '|') 291 format++; 292 } 293 } 294 295 /* Write mask and pattern to the instruction array */ 296 tab->mask = mask; 297 tab->pattern = val; 298 299 /* 300 * If we got here, format string must be parsed and "a" 301 * should point to -1. If it's not, wrong number of bits 302 * in format string. Mark this as invalid and prevent 303 * from being matched. 304 */ 305 if (*format != 0 || (a != -1) || (error != 0)) { 306 tab->mask = 0; 307 tab->pattern = 0xffffffff; 308 printf("ERROR: skipping instruction op %s\n", 309 tab->name); 310 } 311 312 tab++; 313 } 314 } 315 316 static int 317 arm64_disasm_read_token(struct arm64_insn *insn, u_int opcode, 318 const char *token, int *val) 319 { 320 int i; 321 322 for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) { 323 if (strcmp(insn->tokens[i].name, token) == 0) { 324 *val = (opcode >> insn->tokens[i].pos & 325 ((1 << insn->tokens[i].len) - 1)); 326 return (0); 327 } 328 } 329 330 return (EINVAL); 331 } 332 333 static int 334 arm64_disasm_read_token_sign_ext(struct arm64_insn *insn, u_int opcode, 335 const char *token, int *val) 336 { 337 int i; 338 int msk; 339 340 for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) { 341 if (strcmp(insn->tokens[i].name, token) == 0) { 342 msk = (1 << insn->tokens[i].len) - 1; 343 *val = ((opcode >> insn->tokens[i].pos) & msk); 344 345 /* If last bit is 1, sign-extend the value */ 346 if (*val & (1 << (insn->tokens[i].len - 1))) 347 *val |= ~msk; 348 349 return (0); 350 } 351 } 352 353 return (EINVAL); 354 } 355 356 static const char * 357 arm64_reg(int b64, int num) 358 { 359 360 if (b64 != 0) 361 return (x_reg[num]); 362 363 return (w_reg[num]); 364 } 365 366 vm_offset_t 367 disasm(const struct disasm_interface *di, vm_offset_t loc, int altfmt) 368 { 369 struct arm64_insn *i_ptr = arm64_i; 370 uint32_t insn; 371 int matchp; 372 int ret; 373 int shift, rm, rt, rd, rn, imm, sf, idx, option, scale, amount; 374 int sign_ext; 375 int rm_absent; 376 /* Indicate if immediate should be outside or inside brackets */ 377 int inside; 378 /* Print exclamation mark if pre-incremented */ 379 int pre; 380 381 /* Initialize defaults, all are 0 except SF indicating 64bit access */ 382 shift = rd = rm = rn = imm = idx = option = amount = scale = 0; 383 sign_ext = 0; 384 sf = 1; 385 386 matchp = 0; 387 insn = di->di_readword(loc); 388 while (i_ptr->name) { 389 /* If mask is 0 then the parser was not initialized yet */ 390 if ((i_ptr->mask != 0) && 391 ((insn & i_ptr->mask) == i_ptr->pattern)) { 392 matchp = 1; 393 break; 394 } 395 i_ptr++; 396 } 397 if (matchp == 0) 398 goto undefined; 399 400 /* Global options */ 401 if (i_ptr->special_ops & OP_SF32) 402 sf = 0; 403 404 /* Global optional tokens */ 405 arm64_disasm_read_token(i_ptr, insn, "SF", &sf); 406 if (i_ptr->special_ops & OP_SF_INV) 407 sf = 1 - sf; 408 if (arm64_disasm_read_token(i_ptr, insn, "SIGN", &sign_ext) == 0) 409 sign_ext = 1 - sign_ext; 410 if (i_ptr->special_ops & OP_SIGN_EXT) 411 sign_ext = 1; 412 if (sign_ext != 0) 413 arm64_disasm_read_token_sign_ext(i_ptr, insn, "IMM", &imm); 414 else 415 arm64_disasm_read_token(i_ptr, insn, "IMM", &imm); 416 if (i_ptr->special_ops & OP_MULT_4) 417 imm <<= 2; 418 419 /* Print opcode by type */ 420 switch (i_ptr->type) { 421 case TYPE_01: 422 /* 423 * OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64 424 * OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 425 */ 426 427 /* Mandatory tokens */ 428 ret = arm64_disasm_read_token(i_ptr, insn, "RD", &rd); 429 ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn); 430 if (ret != 0) { 431 printf("ERROR: " 432 "Missing mandatory token for op %s type %d\n", 433 i_ptr->name, i_ptr->type); 434 goto undefined; 435 } 436 437 /* Optional tokens */ 438 arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift); 439 rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm); 440 441 di->di_printf("%s\t%s, %s", i_ptr->name, arm64_reg(sf, rd), 442 arm64_reg(sf, rn)); 443 444 /* If RM is present use it, otherwise use immediate notation */ 445 if (rm_absent == 0) { 446 di->di_printf(", %s", arm64_reg(sf, rm)); 447 if (imm != 0) 448 di->di_printf(", %s #%d", shift_2[shift], imm); 449 } else { 450 if (imm != 0 || shift != 0) 451 di->di_printf(", #0x%x", imm); 452 if (shift != 0) 453 di->di_printf(" LSL #12"); 454 } 455 break; 456 case TYPE_02: 457 /* 458 * OP <RT>, [<RN>, #<imm>]{!}] SF32/64 459 * OP <RT>, [<RN>], #<imm>{!} SF32/64 460 * OP <RT>, <RN>, <RM> {, EXTEND AMOUNT } 461 */ 462 463 /* Mandatory tokens */ 464 ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt); 465 ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn); 466 if (ret != 0) { 467 printf("ERROR: " 468 "Missing mandatory token for op %s type %d\n", 469 i_ptr->name, i_ptr->type); 470 goto undefined; 471 } 472 473 /* Optional tokens */ 474 arm64_disasm_read_token(i_ptr, insn, "OPTION", &option); 475 arm64_disasm_read_token(i_ptr, insn, "SCALE", &scale); 476 rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm); 477 478 if (rm_absent) { 479 /* 480 * In unsigned operation, shift immediate value 481 * and reset options to default. 482 */ 483 if (sign_ext == 0) { 484 imm = imm << ((insn >> ARM_INSN_SIZE_OFFSET) & 485 ARM_INSN_SIZE_MASK); 486 option = 0; 487 } 488 switch (option) { 489 case 0x0: 490 pre = 0; 491 inside = 1; 492 break; 493 case 0x1: 494 pre = 0; 495 inside = 0; 496 break; 497 case 0x2: 498 default: 499 pre = 1; 500 inside = 1; 501 break; 502 } 503 504 di->di_printf("%s\t%s, ", i_ptr->name, 505 arm64_reg(sf, rt)); 506 if (inside != 0) { 507 di->di_printf("[%s", arm64_reg(1, rn)); 508 if (imm != 0) 509 di->di_printf(", #%d", imm); 510 di->di_printf("]"); 511 } else { 512 di->di_printf("[%s]", arm64_reg(1, rn)); 513 if (imm != 0) 514 di->di_printf(", #%d", imm); 515 } 516 if (pre != 0) 517 di->di_printf("!"); 518 } else { 519 /* Last bit of option field determines 32/64 bit offset */ 520 di->di_printf("%s\t%s, [%s, %s", i_ptr->name, 521 arm64_reg(sf, rt), arm64_reg(1, rn), 522 arm64_reg(option & 1, rm)); 523 524 if (scale == 0) 525 amount = 0; 526 else { 527 /* Calculate amount, it's op(31:30) */ 528 amount = (insn >> ARM_INSN_SIZE_OFFSET) & 529 ARM_INSN_SIZE_MASK; 530 } 531 532 switch (option) { 533 case 0x2: 534 di->di_printf(", uxtw #%d", amount); 535 break; 536 case 0x3: 537 if (scale != 0) 538 di->di_printf(", lsl #%d", amount); 539 break; 540 case 0x6: 541 di->di_printf(", sxtw #%d", amount); 542 break; 543 case 0x7: 544 di->di_printf(", sxtx #%d", amount); 545 break; 546 default: 547 di->di_printf(", RSVD"); 548 break; 549 } 550 di->di_printf("]"); 551 } 552 553 break; 554 555 case TYPE_03: 556 /* OP <RT>, #imm SF32/64 */ 557 558 /* Mandatory tokens */ 559 ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt); 560 if (ret != 0) { 561 printf("ERROR: " 562 "Missing mandatory token for op %s type %d\n", 563 i_ptr->name, i_ptr->type); 564 goto undefined; 565 } 566 567 di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt)); 568 if (i_ptr->special_ops & OP_LITERAL) 569 di->di_printf("0x%lx", loc + imm); 570 else 571 di->di_printf("#%d", imm); 572 573 break; 574 default: 575 goto undefined; 576 } 577 578 di->di_printf("\n"); 579 return (loc + INSN_SIZE); 580 581 undefined: 582 di->di_printf("undefined\t%08x\n", insn); 583 return (loc + INSN_SIZE); 584 } 585 586 /* Parse format strings at the very beginning */ 587 SYSINIT(arm64_disasm_generate_masks, SI_SUB_DDB_SERVICES, SI_ORDER_FIRST, 588 arm64_disasm_generate_masks, arm64_i); 589