1#!/bin/awk -f 2# SPDX-License-Identifier: GPL-2.0 3# gen-insn-attr-x86.awk: Instruction attribute table generator 4# Written by Masami Hiramatsu <mhiramat@redhat.com> 5# 6# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c 7 8# Awk implementation sanity check 9function check_awk_implement() { 10 if (sprintf("%x", 0) != "0") 11 return "Your awk has a printf-format problem." 12 return "" 13} 14 15# Clear working vars 16function clear_vars() { 17 delete table 18 delete lptable2 19 delete lptable1 20 delete lptable3 21 eid = -1 # escape id 22 gid = -1 # group id 23 aid = -1 # AVX id 24 tname = "" 25} 26 27BEGIN { 28 # Implementation error checking 29 awkchecked = check_awk_implement() 30 if (awkchecked != "") { 31 print "Error: " awkchecked > "/dev/stderr" 32 print "Please try to use gawk." > "/dev/stderr" 33 exit 1 34 } 35 36 # Setup generating tables 37 print "/* x86 opcode map generated from x86-opcode-map.txt */" 38 print "/* Do not change this code. */\n" 39 ggid = 1 40 geid = 1 41 gaid = 0 42 delete etable 43 delete gtable 44 delete atable 45 46 opnd_expr = "^[A-Za-z/]" 47 ext_expr = "^\\(" 48 sep_expr = "^\\|$" 49 group_expr = "^Grp[0-9A-Za-z]+" 50 51 imm_expr = "^[IJAOL][a-z]" 52 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 53 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 54 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" 55 imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" 56 imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" 57 imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" 58 imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" 59 imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" 60 imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" 61 imm_flag["Ob"] = "INAT_MOFFSET" 62 imm_flag["Ov"] = "INAT_MOFFSET" 63 imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 64 65 modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" 66 force64_expr = "\\([df]64\\)" 67 rex_expr = "^((REX(\\.[XRWB]+)+)|(REX$))" 68 rex2_expr = "\\(REX2\\)" 69 no_rex2_expr = "\\(!REX2\\)" 70 fpu_expr = "^ESC" # TODO 71 72 lprefix1_expr = "\\((66|!F3)\\)" 73 lprefix2_expr = "\\(F3\\)" 74 lprefix3_expr = "\\((F2|!F3|66&F2)\\)" 75 lprefix_expr = "\\((66|F2|F3)\\)" 76 max_lprefix = 4 77 78 # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript 79 # accepts VEX prefix 80 vexok_opcode_expr = "^[vk].*" 81 vexok_expr = "\\(v1\\)" 82 # All opcodes with (v) superscript supports *only* VEX prefix 83 vexonly_expr = "\\(v\\)" 84 # All opcodes with (ev) superscript supports *only* EVEX prefix 85 evexonly_expr = "\\(ev\\)" 86 # (es) is the same as (ev) but also "SCALABLE" i.e. W and pp determine operand size 87 evex_scalable_expr = "\\(es\\)" 88 89 prefix_expr = "\\(Prefix\\)" 90 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" 91 prefix_num["REPNE"] = "INAT_PFX_REPNE" 92 prefix_num["REP/REPE"] = "INAT_PFX_REPE" 93 prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" 94 prefix_num["XRELEASE"] = "INAT_PFX_REPE" 95 prefix_num["LOCK"] = "INAT_PFX_LOCK" 96 prefix_num["SEG=CS"] = "INAT_PFX_CS" 97 prefix_num["SEG=DS"] = "INAT_PFX_DS" 98 prefix_num["SEG=ES"] = "INAT_PFX_ES" 99 prefix_num["SEG=FS"] = "INAT_PFX_FS" 100 prefix_num["SEG=GS"] = "INAT_PFX_GS" 101 prefix_num["SEG=SS"] = "INAT_PFX_SS" 102 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" 103 prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" 104 prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" 105 prefix_num["EVEX"] = "INAT_PFX_EVEX" 106 prefix_num["REX2"] = "INAT_PFX_REX2" 107 108 clear_vars() 109} 110 111function semantic_error(msg) { 112 print "Semantic error at " NR ": " msg > "/dev/stderr" 113 exit 1 114} 115 116function debug(msg) { 117 print "DEBUG: " msg 118} 119 120function array_size(arr, i,c) { 121 c = 0 122 for (i in arr) 123 c++ 124 return c 125} 126 127/^Table:/ { 128 print "/* " $0 " */" 129 if (tname != "") 130 semantic_error("Hit Table: before EndTable:."); 131} 132 133/^Referrer:/ { 134 if (NF != 1) { 135 # escape opcode table 136 ref = "" 137 for (i = 2; i <= NF; i++) 138 ref = ref $i 139 eid = escape[ref] 140 tname = sprintf("inat_escape_table_%d", eid) 141 } 142} 143 144/^AVXcode:/ { 145 if (NF != 1) { 146 # AVX/escape opcode table 147 aid = $2 148 if (gaid <= aid) 149 gaid = aid + 1 150 if (tname == "") # AVX only opcode table 151 tname = sprintf("inat_avx_table_%d", $2) 152 } 153 if (aid == -1 && eid == -1) # primary opcode table 154 tname = "inat_primary_table" 155} 156 157/^GrpTable:/ { 158 print "/* " $0 " */" 159 if (!($2 in group)) 160 semantic_error("No group: " $2 ) 161 gid = group[$2] 162 tname = "inat_group_table_" gid 163} 164 165function print_table(tbl,name,fmt,n) 166{ 167 print "const insn_attr_t " name " = {" 168 for (i = 0; i < n; i++) { 169 id = sprintf(fmt, i) 170 if (tbl[id]) 171 print " [" id "] = " tbl[id] "," 172 } 173 print "};" 174} 175 176/^EndTable/ { 177 if (gid != -1) { 178 # print group tables 179 if (array_size(table) != 0) { 180 print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", 181 "0x%x", 8) 182 gtable[gid,0] = tname 183 } 184 if (array_size(lptable1) != 0) { 185 print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", 186 "0x%x", 8) 187 gtable[gid,1] = tname "_1" 188 } 189 if (array_size(lptable2) != 0) { 190 print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", 191 "0x%x", 8) 192 gtable[gid,2] = tname "_2" 193 } 194 if (array_size(lptable3) != 0) { 195 print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", 196 "0x%x", 8) 197 gtable[gid,3] = tname "_3" 198 } 199 } else { 200 # print primary/escaped tables 201 if (array_size(table) != 0) { 202 print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", 203 "0x%02x", 256) 204 etable[eid,0] = tname 205 if (aid >= 0) 206 atable[aid,0] = tname 207 } 208 if (array_size(lptable1) != 0) { 209 print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", 210 "0x%02x", 256) 211 etable[eid,1] = tname "_1" 212 if (aid >= 0) 213 atable[aid,1] = tname "_1" 214 } 215 if (array_size(lptable2) != 0) { 216 print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", 217 "0x%02x", 256) 218 etable[eid,2] = tname "_2" 219 if (aid >= 0) 220 atable[aid,2] = tname "_2" 221 } 222 if (array_size(lptable3) != 0) { 223 print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", 224 "0x%02x", 256) 225 etable[eid,3] = tname "_3" 226 if (aid >= 0) 227 atable[aid,3] = tname "_3" 228 } 229 } 230 print "" 231 clear_vars() 232} 233 234function add_flags(old,new) { 235 if (old && new) 236 return old " | " new 237 else if (old) 238 return old 239 else 240 return new 241} 242 243# convert operands to flags. 244function convert_operands(count,opnd, i,j,imm,mod) 245{ 246 imm = null 247 mod = null 248 for (j = 1; j <= count; j++) { 249 i = opnd[j] 250 if (match(i, imm_expr) == 1) { 251 if (!imm_flag[i]) 252 semantic_error("Unknown imm opnd: " i) 253 if (imm) { 254 if (i != "Ib") 255 semantic_error("Second IMM error") 256 imm = add_flags(imm, "INAT_SCNDIMM") 257 } else 258 imm = imm_flag[i] 259 } else if (match(i, modrm_expr)) 260 mod = "INAT_MODRM" 261 } 262 return add_flags(imm, mod) 263} 264 265/^[0-9a-f]+:/ { 266 if (NR == 1) 267 next 268 # get index 269 idx = "0x" substr($1, 1, index($1,":") - 1) 270 if (idx in table) 271 semantic_error("Redefine " idx " in " tname) 272 273 # check if escaped opcode 274 if ("escape" == $2) { 275 if ($3 != "#") 276 semantic_error("No escaped name") 277 ref = "" 278 for (i = 4; i <= NF; i++) 279 ref = ref $i 280 if (ref in escape) 281 semantic_error("Redefine escape (" ref ")") 282 escape[ref] = geid 283 geid++ 284 table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" 285 next 286 } 287 288 variant = null 289 # converts 290 i = 2 291 while (i <= NF) { 292 opcode = $(i++) 293 delete opnds 294 ext = null 295 flags = null 296 opnd = null 297 # parse one opcode 298 if (match($i, opnd_expr)) { 299 opnd = $i 300 count = split($(i++), opnds, ",") 301 flags = convert_operands(count, opnds) 302 } 303 if (match($i, ext_expr)) 304 ext = $(i++) 305 if (match($i, sep_expr)) 306 i++ 307 else if (i < NF) 308 semantic_error($i " is not a separator") 309 310 # check if group opcode 311 if (match(opcode, group_expr)) { 312 if (!(opcode in group)) { 313 group[opcode] = ggid 314 ggid++ 315 } 316 flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") 317 } 318 # check force(or default) 64bit 319 if (match(ext, force64_expr)) 320 flags = add_flags(flags, "INAT_FORCE64") 321 322 # check REX2 not allowed 323 if (match(ext, no_rex2_expr)) 324 flags = add_flags(flags, "INAT_NO_REX2") 325 326 # check REX prefix 327 if (match(opcode, rex_expr)) 328 flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") 329 330 # check coprocessor escape : TODO 331 if (match(opcode, fpu_expr)) 332 flags = add_flags(flags, "INAT_MODRM") 333 334 # check VEX codes 335 if (match(ext, evexonly_expr)) 336 flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY") 337 else if (match(ext, evex_scalable_expr)) 338 flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY | INAT_EVEX_SCALABLE") 339 else if (match(ext, vexonly_expr)) 340 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") 341 else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) 342 flags = add_flags(flags, "INAT_VEXOK") 343 344 # check prefixes 345 if (match(ext, prefix_expr)) { 346 if (!prefix_num[opcode]) 347 semantic_error("Unknown prefix: " opcode) 348 flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") 349 } 350 if (length(flags) == 0) 351 continue 352 # check if last prefix 353 if (match(ext, lprefix1_expr)) { 354 lptable1[idx] = add_flags(lptable1[idx],flags) 355 variant = "INAT_VARIANT" 356 } 357 if (match(ext, lprefix2_expr)) { 358 lptable2[idx] = add_flags(lptable2[idx],flags) 359 variant = "INAT_VARIANT" 360 } 361 if (match(ext, lprefix3_expr)) { 362 lptable3[idx] = add_flags(lptable3[idx],flags) 363 variant = "INAT_VARIANT" 364 } 365 if (match(ext, rex2_expr)) 366 table[idx] = add_flags(table[idx], "INAT_REX2_VARIANT") 367 if (!match(ext, lprefix_expr)){ 368 table[idx] = add_flags(table[idx],flags) 369 } 370 } 371 if (variant) 372 table[idx] = add_flags(table[idx],variant) 373} 374 375END { 376 if (awkchecked != "") 377 exit 1 378 379 print "#ifndef __BOOT_COMPRESSED\n" 380 381 # print escape opcode map's array 382 print "/* Escape opcode map array */" 383 print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ 384 "[INAT_LSTPFX_MAX + 1] = {" 385 for (i = 0; i < geid; i++) 386 for (j = 0; j < max_lprefix; j++) 387 if (etable[i,j]) 388 print " ["i"]["j"] = "etable[i,j]"," 389 print "};\n" 390 # print group opcode map's array 391 print "/* Group opcode map array */" 392 print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ 393 "[INAT_LSTPFX_MAX + 1] = {" 394 for (i = 0; i < ggid; i++) 395 for (j = 0; j < max_lprefix; j++) 396 if (gtable[i,j]) 397 print " ["i"]["j"] = "gtable[i,j]"," 398 print "};\n" 399 # print AVX opcode map's array 400 print "/* AVX opcode map array */" 401 print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ 402 "[INAT_LSTPFX_MAX + 1] = {" 403 for (i = 0; i < gaid; i++) 404 for (j = 0; j < max_lprefix; j++) 405 if (atable[i,j]) 406 print " ["i"]["j"] = "atable[i,j]"," 407 print "};\n" 408 409 print "#else /* !__BOOT_COMPRESSED */\n" 410 411 print "/* Escape opcode map array */" 412 print "static const insn_attr_t *inat_escape_tables[INAT_ESC_MAX + 1]" \ 413 "[INAT_LSTPFX_MAX + 1];" 414 print "" 415 416 print "/* Group opcode map array */" 417 print "static const insn_attr_t *inat_group_tables[INAT_GRP_MAX + 1]"\ 418 "[INAT_LSTPFX_MAX + 1];" 419 print "" 420 421 print "/* AVX opcode map array */" 422 print "static const insn_attr_t *inat_avx_tables[X86_VEX_M_MAX + 1]"\ 423 "[INAT_LSTPFX_MAX + 1];" 424 print "" 425 426 print "static void inat_init_tables(void)" 427 print "{" 428 429 # print escape opcode map's array 430 print "\t/* Print Escape opcode map array */" 431 for (i = 0; i < geid; i++) 432 for (j = 0; j < max_lprefix; j++) 433 if (etable[i,j]) 434 print "\tinat_escape_tables["i"]["j"] = "etable[i,j]";" 435 print "" 436 437 # print group opcode map's array 438 print "\t/* Print Group opcode map array */" 439 for (i = 0; i < ggid; i++) 440 for (j = 0; j < max_lprefix; j++) 441 if (gtable[i,j]) 442 print "\tinat_group_tables["i"]["j"] = "gtable[i,j]";" 443 print "" 444 # print AVX opcode map's array 445 print "\t/* Print AVX opcode map array */" 446 for (i = 0; i < gaid; i++) 447 for (j = 0; j < max_lprefix; j++) 448 if (atable[i,j]) 449 print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";" 450 451 print "}" 452 print "#endif" 453} 454 455