xref: /linux/arch/x86/tools/gen-insn-attr-x86.awk (revision bfb4a6c721517a11b277e8841f8a7a64b1b14b72)
1#!/bin/awk -f
2# SPDX-License-Identifier: GPL-2.0
3# gen-insn-attr-x86.awk: Instruction attribute table generator
4# Written by Masami Hiramatsu <mhiramat@redhat.com>
5#
6# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
7
8# Awk implementation sanity check
9function check_awk_implement() {
10	if (sprintf("%x", 0) != "0")
11		return "Your awk has a printf-format problem."
12	return ""
13}
14
15# Clear working vars
16function clear_vars() {
17	delete table
18	delete lptable2
19	delete lptable1
20	delete lptable3
21	eid = -1 # escape id
22	gid = -1 # group id
23	aid = -1 # AVX id
24	tname = ""
25}
26
27BEGIN {
28	# Implementation error checking
29	awkchecked = check_awk_implement()
30	if (awkchecked != "") {
31		print "Error: " awkchecked > "/dev/stderr"
32		print "Please try to use gawk." > "/dev/stderr"
33		exit 1
34	}
35
36	# Setup generating tables
37	print "/* x86 opcode map generated from x86-opcode-map.txt */"
38	print "/* Do not change this code. */\n"
39	ggid = 1
40	geid = 1
41	gaid = 0
42	delete etable
43	delete gtable
44	delete atable
45
46	opnd_expr = "^[A-Za-z/]"
47	ext_expr = "^\\("
48	sep_expr = "^\\|$"
49	group_expr = "^Grp[0-9A-Za-z]+"
50
51	imm_expr = "^[IJAOL][a-z]"
52	imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
53	imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
54	imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
55	imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
56	imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
57	imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
58	imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
59	imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
60	imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
61	imm_flag["Ob"] = "INAT_MOFFSET"
62	imm_flag["Ov"] = "INAT_MOFFSET"
63	imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
64
65	modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
66	force64_expr = "\\([df]64\\)"
67	invalid64_expr = "\\(i64\\)"
68	only64_expr = "\\(o64\\)"
69	rex_expr = "^((REX(\\.[XRWB]+)+)|(REX$))"
70	rex2_expr = "\\(REX2\\)"
71	no_rex2_expr = "\\(!REX2\\)"
72	fpu_expr = "^ESC" # TODO
73
74	lprefix1_expr = "\\((66|!F3)\\)"
75	lprefix2_expr = "\\(F3\\)"
76	lprefix3_expr = "\\((F2|!F3|66&F2)\\)"
77	lprefix_expr = "\\((66|F2|F3)\\)"
78	max_lprefix = 4
79
80	# All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
81	# accepts VEX prefix
82	vexok_opcode_expr = "^[vk].*"
83	vexok_expr = "\\(v1\\)"
84	# All opcodes with (v) superscript supports *only* VEX prefix
85	vexonly_expr = "\\(v\\)"
86	# All opcodes with (ev) superscript supports *only* EVEX prefix
87	evexonly_expr = "\\(ev\\)"
88	# (es) is the same as (ev) but also "SCALABLE" i.e. W and pp determine operand size
89	evex_scalable_expr = "\\(es\\)"
90
91	prefix_expr = "\\(Prefix\\)"
92	prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
93	prefix_num["REPNE"] = "INAT_PFX_REPNE"
94	prefix_num["REP/REPE"] = "INAT_PFX_REPE"
95	prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
96	prefix_num["XRELEASE"] = "INAT_PFX_REPE"
97	prefix_num["LOCK"] = "INAT_PFX_LOCK"
98	prefix_num["SEG=CS"] = "INAT_PFX_CS"
99	prefix_num["SEG=DS"] = "INAT_PFX_DS"
100	prefix_num["SEG=ES"] = "INAT_PFX_ES"
101	prefix_num["SEG=FS"] = "INAT_PFX_FS"
102	prefix_num["SEG=GS"] = "INAT_PFX_GS"
103	prefix_num["SEG=SS"] = "INAT_PFX_SS"
104	prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
105	prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
106	prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
107	prefix_num["EVEX"] = "INAT_PFX_EVEX"
108	prefix_num["REX2"] = "INAT_PFX_REX2"
109
110	clear_vars()
111}
112
113function semantic_error(msg) {
114	print "Semantic error at " NR ": " msg > "/dev/stderr"
115	exit 1
116}
117
118function debug(msg) {
119	print "DEBUG: " msg
120}
121
122function array_size(arr,   i,c) {
123	c = 0
124	for (i in arr)
125		c++
126	return c
127}
128
129/^Table:/ {
130	print "/* " $0 " */"
131	if (tname != "")
132		semantic_error("Hit Table: before EndTable:.");
133}
134
135/^Referrer:/ {
136	if (NF != 1) {
137		# escape opcode table
138		ref = ""
139		for (i = 2; i <= NF; i++)
140			ref = ref $i
141		eid = escape[ref]
142		tname = sprintf("inat_escape_table_%d", eid)
143	}
144}
145
146/^AVXcode:/ {
147	if (NF != 1) {
148		# AVX/escape opcode table
149		aid = $2
150		if (gaid <= aid)
151			gaid = aid + 1
152		if (tname == "")	# AVX only opcode table
153			tname = sprintf("inat_avx_table_%d", $2)
154	}
155	if (aid == -1 && eid == -1)	# primary opcode table
156		tname = "inat_primary_table"
157}
158
159/^GrpTable:/ {
160	print "/* " $0 " */"
161	if (!($2 in group))
162		semantic_error("No group: " $2 )
163	gid = group[$2]
164	tname = "inat_group_table_" gid
165}
166
167function print_table(tbl,name,fmt,n)
168{
169	print "const insn_attr_t " name " = {"
170	for (i = 0; i < n; i++) {
171		id = sprintf(fmt, i)
172		if (tbl[id])
173			print "	[" id "] = " tbl[id] ","
174	}
175	print "};"
176}
177
178/^EndTable/ {
179	if (gid != -1) {
180		# print group tables
181		if (array_size(table) != 0) {
182			print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
183				    "0x%x", 8)
184			gtable[gid,0] = tname
185		}
186		if (array_size(lptable1) != 0) {
187			print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
188				    "0x%x", 8)
189			gtable[gid,1] = tname "_1"
190		}
191		if (array_size(lptable2) != 0) {
192			print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
193				    "0x%x", 8)
194			gtable[gid,2] = tname "_2"
195		}
196		if (array_size(lptable3) != 0) {
197			print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
198				    "0x%x", 8)
199			gtable[gid,3] = tname "_3"
200		}
201	} else {
202		# print primary/escaped tables
203		if (array_size(table) != 0) {
204			print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
205				    "0x%02x", 256)
206			etable[eid,0] = tname
207			if (aid >= 0)
208				atable[aid,0] = tname
209		}
210		if (array_size(lptable1) != 0) {
211			print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
212				    "0x%02x", 256)
213			etable[eid,1] = tname "_1"
214			if (aid >= 0)
215				atable[aid,1] = tname "_1"
216		}
217		if (array_size(lptable2) != 0) {
218			print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
219				    "0x%02x", 256)
220			etable[eid,2] = tname "_2"
221			if (aid >= 0)
222				atable[aid,2] = tname "_2"
223		}
224		if (array_size(lptable3) != 0) {
225			print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
226				    "0x%02x", 256)
227			etable[eid,3] = tname "_3"
228			if (aid >= 0)
229				atable[aid,3] = tname "_3"
230		}
231	}
232	print ""
233	clear_vars()
234}
235
236function add_flags(old,new) {
237	if (old && new)
238		return old " | " new
239	else if (old)
240		return old
241	else
242		return new
243}
244
245# convert operands to flags.
246function convert_operands(count,opnd,       i,j,imm,mod)
247{
248	imm = null
249	mod = null
250	for (j = 1; j <= count; j++) {
251		i = opnd[j]
252		if (match(i, imm_expr) == 1) {
253			if (!imm_flag[i])
254				semantic_error("Unknown imm opnd: " i)
255			if (imm) {
256				if (i != "Ib")
257					semantic_error("Second IMM error")
258				imm = add_flags(imm, "INAT_SCNDIMM")
259			} else
260				imm = imm_flag[i]
261		} else if (match(i, modrm_expr))
262			mod = "INAT_MODRM"
263	}
264	return add_flags(imm, mod)
265}
266
267/^[0-9a-f]+:/ {
268	if (NR == 1)
269		next
270	# get index
271	idx = "0x" substr($1, 1, index($1,":") - 1)
272	if (idx in table)
273		semantic_error("Redefine " idx " in " tname)
274
275	# check if escaped opcode
276	if ("escape" == $2) {
277		if ($3 != "#")
278			semantic_error("No escaped name")
279		ref = ""
280		for (i = 4; i <= NF; i++)
281			ref = ref $i
282		if (ref in escape)
283			semantic_error("Redefine escape (" ref ")")
284		escape[ref] = geid
285		geid++
286		table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
287		next
288	}
289
290	variant = null
291	# converts
292	i = 2
293	while (i <= NF) {
294		opcode = $(i++)
295		delete opnds
296		ext = null
297		flags = null
298		opnd = null
299		# parse one opcode
300		if (match($i, opnd_expr)) {
301			opnd = $i
302			count = split($(i++), opnds, ",")
303			flags = convert_operands(count, opnds)
304		}
305		if (match($i, ext_expr))
306			ext = $(i++)
307		if (match($i, sep_expr))
308			i++
309		else if (i < NF)
310			semantic_error($i " is not a separator")
311
312		# check if group opcode
313		if (match(opcode, group_expr)) {
314			if (!(opcode in group)) {
315				group[opcode] = ggid
316				ggid++
317			}
318			flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
319		}
320		# check force(or default) 64bit
321		if (match(ext, force64_expr))
322			flags = add_flags(flags, "INAT_FORCE64")
323
324		# check invalid in 64-bit (and no only64)
325		if (match(ext, invalid64_expr) &&
326		    !match($0, only64_expr))
327			flags = add_flags(flags, "INAT_INV64")
328
329		# check REX2 not allowed
330		if (match(ext, no_rex2_expr))
331			flags = add_flags(flags, "INAT_NO_REX2")
332
333		# check REX prefix
334		if (match(opcode, rex_expr))
335			flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
336
337		# check coprocessor escape : TODO
338		if (match(opcode, fpu_expr))
339			flags = add_flags(flags, "INAT_MODRM")
340
341		# check VEX codes
342		if (match(ext, evexonly_expr))
343			flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
344		else if (match(ext, evex_scalable_expr))
345			flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY | INAT_EVEX_SCALABLE")
346		else if (match(ext, vexonly_expr))
347			flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
348		else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
349			flags = add_flags(flags, "INAT_VEXOK")
350
351		# check prefixes
352		if (match(ext, prefix_expr)) {
353			if (!prefix_num[opcode])
354				semantic_error("Unknown prefix: " opcode)
355			flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
356		}
357		if (length(flags) == 0)
358			continue
359		# check if last prefix
360		if (match(ext, lprefix1_expr)) {
361			lptable1[idx] = add_flags(lptable1[idx],flags)
362			variant = "INAT_VARIANT"
363		}
364		if (match(ext, lprefix2_expr)) {
365			lptable2[idx] = add_flags(lptable2[idx],flags)
366			variant = "INAT_VARIANT"
367		}
368		if (match(ext, lprefix3_expr)) {
369			lptable3[idx] = add_flags(lptable3[idx],flags)
370			variant = "INAT_VARIANT"
371		}
372		if (match(ext, rex2_expr))
373			table[idx] = add_flags(table[idx], "INAT_REX2_VARIANT")
374		if (!match(ext, lprefix_expr)){
375			table[idx] = add_flags(table[idx],flags)
376		}
377	}
378	if (variant)
379		table[idx] = add_flags(table[idx],variant)
380}
381
382END {
383	if (awkchecked != "")
384		exit 1
385
386	print "#ifndef __BOOT_COMPRESSED\n"
387
388	# print escape opcode map's array
389	print "/* Escape opcode map array */"
390	print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
391	      "[INAT_LSTPFX_MAX + 1] = {"
392	for (i = 0; i < geid; i++)
393		for (j = 0; j < max_lprefix; j++)
394			if (etable[i,j])
395				print "	["i"]["j"] = "etable[i,j]","
396	print "};\n"
397	# print group opcode map's array
398	print "/* Group opcode map array */"
399	print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
400	      "[INAT_LSTPFX_MAX + 1] = {"
401	for (i = 0; i < ggid; i++)
402		for (j = 0; j < max_lprefix; j++)
403			if (gtable[i,j])
404				print "	["i"]["j"] = "gtable[i,j]","
405	print "};\n"
406	# print AVX opcode map's array
407	print "/* AVX opcode map array */"
408	print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
409	      "[INAT_LSTPFX_MAX + 1] = {"
410	for (i = 0; i < gaid; i++)
411		for (j = 0; j < max_lprefix; j++)
412			if (atable[i,j])
413				print "	["i"]["j"] = "atable[i,j]","
414	print "};\n"
415
416	print "#else /* !__BOOT_COMPRESSED */\n"
417
418	print "/* Escape opcode map array */"
419	print "static const insn_attr_t *inat_escape_tables[INAT_ESC_MAX + 1]" \
420	      "[INAT_LSTPFX_MAX + 1];"
421	print ""
422
423	print "/* Group opcode map array */"
424	print "static const insn_attr_t *inat_group_tables[INAT_GRP_MAX + 1]"\
425	      "[INAT_LSTPFX_MAX + 1];"
426	print ""
427
428	print "/* AVX opcode map array */"
429	print "static const insn_attr_t *inat_avx_tables[X86_VEX_M_MAX + 1]"\
430	      "[INAT_LSTPFX_MAX + 1];"
431	print ""
432
433	print "static void inat_init_tables(void)"
434	print "{"
435
436	# print escape opcode map's array
437	print "\t/* Print Escape opcode map array */"
438	for (i = 0; i < geid; i++)
439		for (j = 0; j < max_lprefix; j++)
440			if (etable[i,j])
441				print "\tinat_escape_tables["i"]["j"] = "etable[i,j]";"
442	print ""
443
444	# print group opcode map's array
445	print "\t/* Print Group opcode map array */"
446	for (i = 0; i < ggid; i++)
447		for (j = 0; j < max_lprefix; j++)
448			if (gtable[i,j])
449				print "\tinat_group_tables["i"]["j"] = "gtable[i,j]";"
450	print ""
451	# print AVX opcode map's array
452	print "\t/* Print AVX opcode map array */"
453	for (i = 0; i < gaid; i++)
454		for (j = 0; j < max_lprefix; j++)
455			if (atable[i,j])
456				print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";"
457
458	print "}"
459	print "#endif"
460}
461
462