xref: /linux/arch/x86/tools/gen-insn-attr-x86.awk (revision 6116075e18f79698419f2606d9cb34d23198f7e3)
1#!/bin/awk -f
2# SPDX-License-Identifier: GPL-2.0
3# gen-insn-attr-x86.awk: Instruction attribute table generator
4# Written by Masami Hiramatsu <mhiramat@redhat.com>
5#
6# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
7
8# Awk implementation sanity check
9function check_awk_implement() {
10	if (sprintf("%x", 0) != "0")
11		return "Your awk has a printf-format problem."
12	return ""
13}
14
15# Clear working vars
16function clear_vars() {
17	delete table
18	delete lptable2
19	delete lptable1
20	delete lptable3
21	eid = -1 # escape id
22	gid = -1 # group id
23	aid = -1 # AVX id
24	tname = ""
25}
26
27BEGIN {
28	# Implementation error checking
29	awkchecked = check_awk_implement()
30	if (awkchecked != "") {
31		print "Error: " awkchecked > "/dev/stderr"
32		print "Please try to use gawk." > "/dev/stderr"
33		exit 1
34	}
35
36	# Setup generating tables
37	print "/* x86 opcode map generated from x86-opcode-map.txt */"
38	print "/* Do not change this code. */\n"
39	ggid = 1
40	geid = 1
41	gaid = 0
42	delete etable
43	delete gtable
44	delete atable
45
46	opnd_expr = "^[A-Za-z/]"
47	ext_expr = "^\\("
48	sep_expr = "^\\|$"
49	group_expr = "^Grp[0-9A-Za-z]+"
50
51	imm_expr = "^[IJAOL][a-z]"
52	imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
53	imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
54	imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
55	imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
56	imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
57	imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
58	imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
59	imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
60	imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
61	imm_flag["Ob"] = "INAT_MOFFSET"
62	imm_flag["Ov"] = "INAT_MOFFSET"
63	imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
64
65	modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
66	force64_expr = "\\([df]64\\)"
67	rex_expr = "^((REX(\\.[XRWB]+)+)|(REX$))"
68	rex2_expr = "\\(REX2\\)"
69	no_rex2_expr = "\\(!REX2\\)"
70	fpu_expr = "^ESC" # TODO
71
72	lprefix1_expr = "\\((66|!F3)\\)"
73	lprefix2_expr = "\\(F3\\)"
74	lprefix3_expr = "\\((F2|!F3|66&F2)\\)"
75	lprefix_expr = "\\((66|F2|F3)\\)"
76	max_lprefix = 4
77
78	# All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
79	# accepts VEX prefix
80	vexok_opcode_expr = "^[vk].*"
81	vexok_expr = "\\(v1\\)"
82	# All opcodes with (v) superscript supports *only* VEX prefix
83	vexonly_expr = "\\(v\\)"
84	# All opcodes with (ev) superscript supports *only* EVEX prefix
85	evexonly_expr = "\\(ev\\)"
86	# (es) is the same as (ev) but also "SCALABLE" i.e. W and pp determine operand size
87	evex_scalable_expr = "\\(es\\)"
88
89	prefix_expr = "\\(Prefix\\)"
90	prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
91	prefix_num["REPNE"] = "INAT_PFX_REPNE"
92	prefix_num["REP/REPE"] = "INAT_PFX_REPE"
93	prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
94	prefix_num["XRELEASE"] = "INAT_PFX_REPE"
95	prefix_num["LOCK"] = "INAT_PFX_LOCK"
96	prefix_num["SEG=CS"] = "INAT_PFX_CS"
97	prefix_num["SEG=DS"] = "INAT_PFX_DS"
98	prefix_num["SEG=ES"] = "INAT_PFX_ES"
99	prefix_num["SEG=FS"] = "INAT_PFX_FS"
100	prefix_num["SEG=GS"] = "INAT_PFX_GS"
101	prefix_num["SEG=SS"] = "INAT_PFX_SS"
102	prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
103	prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
104	prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
105	prefix_num["EVEX"] = "INAT_PFX_EVEX"
106	prefix_num["REX2"] = "INAT_PFX_REX2"
107
108	clear_vars()
109}
110
111function semantic_error(msg) {
112	print "Semantic error at " NR ": " msg > "/dev/stderr"
113	exit 1
114}
115
116function debug(msg) {
117	print "DEBUG: " msg
118}
119
120function array_size(arr,   i,c) {
121	c = 0
122	for (i in arr)
123		c++
124	return c
125}
126
127/^Table:/ {
128	print "/* " $0 " */"
129	if (tname != "")
130		semantic_error("Hit Table: before EndTable:.");
131}
132
133/^Referrer:/ {
134	if (NF != 1) {
135		# escape opcode table
136		ref = ""
137		for (i = 2; i <= NF; i++)
138			ref = ref $i
139		eid = escape[ref]
140		tname = sprintf("inat_escape_table_%d", eid)
141	}
142}
143
144/^AVXcode:/ {
145	if (NF != 1) {
146		# AVX/escape opcode table
147		aid = $2
148		if (gaid <= aid)
149			gaid = aid + 1
150		if (tname == "")	# AVX only opcode table
151			tname = sprintf("inat_avx_table_%d", $2)
152	}
153	if (aid == -1 && eid == -1)	# primary opcode table
154		tname = "inat_primary_table"
155}
156
157/^GrpTable:/ {
158	print "/* " $0 " */"
159	if (!($2 in group))
160		semantic_error("No group: " $2 )
161	gid = group[$2]
162	tname = "inat_group_table_" gid
163}
164
165function print_table(tbl,name,fmt,n)
166{
167	print "const insn_attr_t " name " = {"
168	for (i = 0; i < n; i++) {
169		id = sprintf(fmt, i)
170		if (tbl[id])
171			print "	[" id "] = " tbl[id] ","
172	}
173	print "};"
174}
175
176/^EndTable/ {
177	if (gid != -1) {
178		# print group tables
179		if (array_size(table) != 0) {
180			print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
181				    "0x%x", 8)
182			gtable[gid,0] = tname
183		}
184		if (array_size(lptable1) != 0) {
185			print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
186				    "0x%x", 8)
187			gtable[gid,1] = tname "_1"
188		}
189		if (array_size(lptable2) != 0) {
190			print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
191				    "0x%x", 8)
192			gtable[gid,2] = tname "_2"
193		}
194		if (array_size(lptable3) != 0) {
195			print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
196				    "0x%x", 8)
197			gtable[gid,3] = tname "_3"
198		}
199	} else {
200		# print primary/escaped tables
201		if (array_size(table) != 0) {
202			print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
203				    "0x%02x", 256)
204			etable[eid,0] = tname
205			if (aid >= 0)
206				atable[aid,0] = tname
207		}
208		if (array_size(lptable1) != 0) {
209			print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
210				    "0x%02x", 256)
211			etable[eid,1] = tname "_1"
212			if (aid >= 0)
213				atable[aid,1] = tname "_1"
214		}
215		if (array_size(lptable2) != 0) {
216			print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
217				    "0x%02x", 256)
218			etable[eid,2] = tname "_2"
219			if (aid >= 0)
220				atable[aid,2] = tname "_2"
221		}
222		if (array_size(lptable3) != 0) {
223			print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
224				    "0x%02x", 256)
225			etable[eid,3] = tname "_3"
226			if (aid >= 0)
227				atable[aid,3] = tname "_3"
228		}
229	}
230	print ""
231	clear_vars()
232}
233
234function add_flags(old,new) {
235	if (old && new)
236		return old " | " new
237	else if (old)
238		return old
239	else
240		return new
241}
242
243# convert operands to flags.
244function convert_operands(count,opnd,       i,j,imm,mod)
245{
246	imm = null
247	mod = null
248	for (j = 1; j <= count; j++) {
249		i = opnd[j]
250		if (match(i, imm_expr) == 1) {
251			if (!imm_flag[i])
252				semantic_error("Unknown imm opnd: " i)
253			if (imm) {
254				if (i != "Ib")
255					semantic_error("Second IMM error")
256				imm = add_flags(imm, "INAT_SCNDIMM")
257			} else
258				imm = imm_flag[i]
259		} else if (match(i, modrm_expr))
260			mod = "INAT_MODRM"
261	}
262	return add_flags(imm, mod)
263}
264
265/^[0-9a-f]+:/ {
266	if (NR == 1)
267		next
268	# get index
269	idx = "0x" substr($1, 1, index($1,":") - 1)
270	if (idx in table)
271		semantic_error("Redefine " idx " in " tname)
272
273	# check if escaped opcode
274	if ("escape" == $2) {
275		if ($3 != "#")
276			semantic_error("No escaped name")
277		ref = ""
278		for (i = 4; i <= NF; i++)
279			ref = ref $i
280		if (ref in escape)
281			semantic_error("Redefine escape (" ref ")")
282		escape[ref] = geid
283		geid++
284		table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
285		next
286	}
287
288	variant = null
289	# converts
290	i = 2
291	while (i <= NF) {
292		opcode = $(i++)
293		delete opnds
294		ext = null
295		flags = null
296		opnd = null
297		# parse one opcode
298		if (match($i, opnd_expr)) {
299			opnd = $i
300			count = split($(i++), opnds, ",")
301			flags = convert_operands(count, opnds)
302		}
303		if (match($i, ext_expr))
304			ext = $(i++)
305		if (match($i, sep_expr))
306			i++
307		else if (i < NF)
308			semantic_error($i " is not a separator")
309
310		# check if group opcode
311		if (match(opcode, group_expr)) {
312			if (!(opcode in group)) {
313				group[opcode] = ggid
314				ggid++
315			}
316			flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
317		}
318		# check force(or default) 64bit
319		if (match(ext, force64_expr))
320			flags = add_flags(flags, "INAT_FORCE64")
321
322		# check REX2 not allowed
323		if (match(ext, no_rex2_expr))
324			flags = add_flags(flags, "INAT_NO_REX2")
325
326		# check REX prefix
327		if (match(opcode, rex_expr))
328			flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
329
330		# check coprocessor escape : TODO
331		if (match(opcode, fpu_expr))
332			flags = add_flags(flags, "INAT_MODRM")
333
334		# check VEX codes
335		if (match(ext, evexonly_expr))
336			flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
337		else if (match(ext, evex_scalable_expr))
338			flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY | INAT_EVEX_SCALABLE")
339		else if (match(ext, vexonly_expr))
340			flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
341		else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
342			flags = add_flags(flags, "INAT_VEXOK")
343
344		# check prefixes
345		if (match(ext, prefix_expr)) {
346			if (!prefix_num[opcode])
347				semantic_error("Unknown prefix: " opcode)
348			flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
349		}
350		if (length(flags) == 0)
351			continue
352		# check if last prefix
353		if (match(ext, lprefix1_expr)) {
354			lptable1[idx] = add_flags(lptable1[idx],flags)
355			variant = "INAT_VARIANT"
356		}
357		if (match(ext, lprefix2_expr)) {
358			lptable2[idx] = add_flags(lptable2[idx],flags)
359			variant = "INAT_VARIANT"
360		}
361		if (match(ext, lprefix3_expr)) {
362			lptable3[idx] = add_flags(lptable3[idx],flags)
363			variant = "INAT_VARIANT"
364		}
365		if (match(ext, rex2_expr))
366			table[idx] = add_flags(table[idx], "INAT_REX2_VARIANT")
367		if (!match(ext, lprefix_expr)){
368			table[idx] = add_flags(table[idx],flags)
369		}
370	}
371	if (variant)
372		table[idx] = add_flags(table[idx],variant)
373}
374
375END {
376	if (awkchecked != "")
377		exit 1
378
379	print "#ifndef __BOOT_COMPRESSED\n"
380
381	# print escape opcode map's array
382	print "/* Escape opcode map array */"
383	print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
384	      "[INAT_LSTPFX_MAX + 1] = {"
385	for (i = 0; i < geid; i++)
386		for (j = 0; j < max_lprefix; j++)
387			if (etable[i,j])
388				print "	["i"]["j"] = "etable[i,j]","
389	print "};\n"
390	# print group opcode map's array
391	print "/* Group opcode map array */"
392	print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
393	      "[INAT_LSTPFX_MAX + 1] = {"
394	for (i = 0; i < ggid; i++)
395		for (j = 0; j < max_lprefix; j++)
396			if (gtable[i,j])
397				print "	["i"]["j"] = "gtable[i,j]","
398	print "};\n"
399	# print AVX opcode map's array
400	print "/* AVX opcode map array */"
401	print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
402	      "[INAT_LSTPFX_MAX + 1] = {"
403	for (i = 0; i < gaid; i++)
404		for (j = 0; j < max_lprefix; j++)
405			if (atable[i,j])
406				print "	["i"]["j"] = "atable[i,j]","
407	print "};\n"
408
409	print "#else /* !__BOOT_COMPRESSED */\n"
410
411	print "/* Escape opcode map array */"
412	print "static const insn_attr_t *inat_escape_tables[INAT_ESC_MAX + 1]" \
413	      "[INAT_LSTPFX_MAX + 1];"
414	print ""
415
416	print "/* Group opcode map array */"
417	print "static const insn_attr_t *inat_group_tables[INAT_GRP_MAX + 1]"\
418	      "[INAT_LSTPFX_MAX + 1];"
419	print ""
420
421	print "/* AVX opcode map array */"
422	print "static const insn_attr_t *inat_avx_tables[X86_VEX_M_MAX + 1]"\
423	      "[INAT_LSTPFX_MAX + 1];"
424	print ""
425
426	print "static void inat_init_tables(void)"
427	print "{"
428
429	# print escape opcode map's array
430	print "\t/* Print Escape opcode map array */"
431	for (i = 0; i < geid; i++)
432		for (j = 0; j < max_lprefix; j++)
433			if (etable[i,j])
434				print "\tinat_escape_tables["i"]["j"] = "etable[i,j]";"
435	print ""
436
437	# print group opcode map's array
438	print "\t/* Print Group opcode map array */"
439	for (i = 0; i < ggid; i++)
440		for (j = 0; j < max_lprefix; j++)
441			if (gtable[i,j])
442				print "\tinat_group_tables["i"]["j"] = "gtable[i,j]";"
443	print ""
444	# print AVX opcode map's array
445	print "\t/* Print AVX opcode map array */"
446	for (i = 0; i < gaid; i++)
447		for (j = 0; j < max_lprefix; j++)
448			if (atable[i,j])
449				print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";"
450
451	print "}"
452	print "#endif"
453}
454
455