xref: /linux/scripts/verify_builtin_ranges.awk (revision 5afca7e996c42aed1b4a42d4712817601ba42aff)
1#!/usr/bin/gawk -f
2# SPDX-License-Identifier: GPL-2.0
3# verify_builtin_ranges.awk: Verify address range data for builtin modules
4# Written by Kris Van Hees <kris.van.hees@oracle.com>
5#
6# Usage: verify_builtin_ranges.awk modules.builtin.ranges System.map \
7#				   modules.builtin vmlinux.map vmlinux.o.map
8#
9
10# Return the module name(s) (if any) associated with the given object.
11#
12# If we have seen this object before, return information from the cache.
13# Otherwise, retrieve it from the corresponding .cmd file.
14#
15function get_module_info(fn, mod, obj, s) {
16	if (fn in omod)
17		return omod[fn];
18
19	if (match(fn, /\/[^/]+$/) == 0)
20		return "";
21
22	obj = fn;
23	mod = "";
24	fn = substr(fn, 1, RSTART) "." substr(fn, RSTART + 1) ".cmd";
25	if (getline s <fn == 1) {
26		if (match(s, /DKBUILD_MODFILE=['"]+[^'"]+/) > 0) {
27			mod = substr(s, RSTART + 16, RLENGTH - 16);
28			gsub(/['"]/, "", mod);
29		} else if (match(s, /RUST_MODFILE=[^ ]+/) > 0)
30			mod = substr(s, RSTART + 13, RLENGTH - 13);
31	} else {
32		print "ERROR: Failed to read: " fn "\n\n" \
33		      "  For kernels built with O=<objdir>, cd to <objdir>\n" \
34		      "  and execute this script as ./source/scripts/..." \
35		      >"/dev/stderr";
36		close(fn);
37		total = 0;
38		exit(1);
39	}
40	close(fn);
41
42	# A single module (common case) also reflects objects that are not part
43	# of a module.  Some of those objects have names that are also a module
44	# name (e.g. core).  We check the associated module file name, and if
45	# they do not match, the object is not part of a module.
46	if (mod !~ / /) {
47		if (!(mod in mods))
48			mod = "";
49	}
50
51	gsub(/([^/ ]*\/)+/, "", mod);
52	gsub(/-/, "_", mod);
53
54	# At this point, mod is a single (valid) module name, or a list of
55	# module names (that do not need validation).
56	omod[obj] = mod;
57
58	return mod;
59}
60
61# Return a representative integer value for a given hexadecimal address.
62#
63# Since all kernel addresses fall within the same memory region, we can safely
64# strip off the first 6 hex digits before performing the hex-to-dec conversion,
65# thereby avoiding integer overflows.
66#
67function addr2val(val) {
68	sub(/^0x/, "", val);
69	if (length(val) == 16)
70		val = substr(val, 5);
71	return strtonum("0x" val);
72}
73
74# Determine the kernel build directory to use (default is .).
75#
76BEGIN {
77	if (ARGC < 6) {
78		print "Syntax: verify_builtin_ranges.awk <ranges-file> <system-map>\n" \
79		      "          <builtin-file> <vmlinux-map> <vmlinux-o-map>\n" \
80		      >"/dev/stderr";
81		total = 0;
82		exit(1);
83	}
84}
85
86# (1) Load the built-in module address range data.
87#
88ARGIND == 1 {
89	ranges[FNR] = $0;
90	rcnt++;
91	next;
92}
93
94# (2) Annotate System.map symbols with module names.
95#
96ARGIND == 2 {
97	addr = addr2val($1);
98	name = $3;
99
100	while (addr >= mod_eaddr) {
101		if (sect_symb) {
102			if (sect_symb != name)
103				next;
104
105			sect_base = addr - sect_off;
106			if (dbg)
107				printf "[%s] BASE (%s) %016x - %016x = %016x\n", sect_name, sect_symb, addr, sect_off, sect_base >"/dev/stderr";
108			sect_symb = 0;
109		}
110
111		if (++ridx > rcnt)
112			break;
113
114		$0 = ranges[ridx];
115		sub(/-/, " ");
116		if ($4 != "=") {
117			sub(/-/, " ");
118			mod_saddr = strtonum("0x" $2) + sect_base;
119			mod_eaddr = strtonum("0x" $3) + sect_base;
120			$1 = $2 = $3 = "";
121			sub(/^ +/, "");
122			mod_name = $0;
123
124			if (dbg)
125				printf "[%s] %s from %016x to %016x\n", sect_name, mod_name, mod_saddr, mod_eaddr >"/dev/stderr";
126		} else {
127			sect_name = $1;
128			sect_off = strtonum("0x" $2);
129			sect_symb = $5;
130		}
131	}
132
133	idx = addr"-"name;
134	if (addr >= mod_saddr && addr < mod_eaddr)
135		sym2mod[idx] = mod_name;
136
137	next;
138}
139
140# Once we are done annotating the System.map, we no longer need the ranges data.
141#
142FNR == 1 && ARGIND == 3 {
143	delete ranges;
144}
145
146# (3) Build a lookup map of built-in module names.
147#
148# Lines from modules.builtin will be like:
149#	kernel/crypto/lzo-rle.ko
150# and we record the object name "crypto/lzo-rle".
151#
152ARGIND == 3 {
153	sub(/kernel\//, "");			# strip off "kernel/" prefix
154	sub(/\.ko$/, "");			# strip off .ko suffix
155
156	mods[$1] = 1;
157	next;
158}
159
160# (4) Get a list of symbols (per object).
161#
162# Symbols by object are read from vmlinux.map, with fallback to vmlinux.o.map
163# if vmlinux is found to have inked in vmlinux.o.
164#
165
166# If we were able to get the data we need from vmlinux.map, there is no need to
167# process vmlinux.o.map.
168#
169FNR == 1 && ARGIND == 5 && total > 0 {
170	if (dbg)
171		printf "Note: %s is not needed.\n", FILENAME >"/dev/stderr";
172	exit;
173}
174
175# First determine whether we are dealing with a GNU ld or LLVM lld linker map.
176#
177ARGIND >= 4 && FNR == 1 && NF == 7 && $1 == "VMA" && $7 == "Symbol" {
178	map_is_lld = 1;
179	next;
180}
181
182# (LLD) Convert a section record fronm lld format to ld format.
183#
184ARGIND >= 4 && map_is_lld && NF == 5 && /[0-9] [^ ]+$/ {
185	$0 = $5 " 0x"$1 " 0x"$3 " load address 0x"$2;
186}
187
188# (LLD) Convert an object record from lld format to ld format.
189#
190ARGIND >= 4 && map_is_lld && NF == 5 && $5 ~ /:\(/ {
191	if (/\.a\(/ && !/ vmlinux\.a\(/)
192		next;
193
194	gsub(/\)/, "");
195	sub(/:\(/, " ");
196	sub(/ vmlinux\.a\(/, " ");
197	$0 = " "$6 " 0x"$1 " 0x"$3 " " $5;
198}
199
200# (LLD) Convert a symbol record from lld format to ld format.
201#
202ARGIND >= 4 && map_is_lld && NF == 5 && $5 ~ /^[A-Za-z_][A-Za-z0-9_]*$/ {
203	$0 = "  0x" $1 " " $5;
204}
205
206# (LLD) We do not need any other ldd linker map records.
207#
208ARGIND >= 4 && map_is_lld && /^[0-9a-f]{16} / {
209	next;
210}
211
212# Handle section records with long section names (spilling onto a 2nd line).
213#
214ARGIND >= 4 && !map_is_lld && NF == 1 && /^[^ ]/ {
215	s = $0;
216	getline;
217	$0 = s " " $0;
218}
219
220# Next section - previous one is done.
221#
222ARGIND >= 4 && /^[^ ]/ {
223	sect = 0;
224}
225
226# Get the (top level) section name.
227#
228ARGIND >= 4 && /^\./ {
229	# Explicitly ignore a few sections that are not relevant here.
230	if ($1 ~ /^\.orc_/ || $1 ~ /_sites$/ || $1 ~ /\.percpu/)
231		next;
232
233	# Sections with a 0-address can be ignored as well (in vmlinux.map).
234	if (ARGIND == 4 && $2 ~ /^0x0+$/)
235		next;
236
237	sect = $1;
238
239	next;
240}
241
242# If we are not currently in a section we care about, ignore records.
243#
244!sect {
245	next;
246}
247
248# Handle object records with long section names (spilling onto a 2nd line).
249#
250ARGIND >= 4 && /^ [^ \*]/ && NF == 1 {
251	# If the section name is long, the remainder of the entry is found on
252	# the next line.
253	s = $0;
254	getline;
255	$0 = s " " $0;
256}
257
258# Objects linked in from static libraries are ignored.
259# If the object is vmlinux.o, we need to consult vmlinux.o.map for per-object
260# symbol information
261#
262ARGIND == 4 && /^ [^ ]/ && NF == 4 {
263	if ($4 ~ /\.a\(/)
264		next;
265
266	idx = sect":"$1;
267	if (!(idx in sect_addend)) {
268		sect_addend[idx] = addr2val($2);
269		if (dbg)
270			printf "ADDEND %s = %016x\n", idx, sect_addend[idx] >"/dev/stderr";
271	}
272	if ($4 == "vmlinux.o") {
273		need_o_map = 1;
274		next;
275	}
276}
277
278# If data from vmlinux.o.map is needed, we only process section and object
279# records from vmlinux.map to determine which section we need to pay attention
280# to in vmlinux.o.map.  So skip everything else from vmlinux.map.
281#
282ARGIND == 4 && need_o_map {
283	next;
284}
285
286# Get module information for the current object.
287#
288ARGIND >= 4 && /^ [^ ]/ && NF == 4 {
289	msect = $1;
290	mod_name = get_module_info($4);
291	mod_eaddr = addr2val($2) + addr2val($3);
292
293	next;
294}
295
296# Process a symbol record.
297#
298# Evaluate the module information obtained from vmlinux.map (or vmlinux.o.map)
299# as follows:
300#  - For all symbols in a given object:
301#     - If the symbol is annotated with the same module name(s) that the object
302#       belongs to, count it as a match.
303#     - Otherwise:
304#        - If the symbol is known to have duplicates of which at least one is
305#          in a built-in module, disregard it.
306#        - If the symbol us not annotated with any module name(s) AND the
307#          object belongs to built-in modules, count it as missing.
308#        - Otherwise, count it as a mismatch.
309#
310ARGIND >= 4 && /^ / && NF == 2 && $1 ~ /^0x/ {
311	idx = sect":"msect;
312	if (!(idx in sect_addend))
313		next;
314
315	addr = addr2val($1);
316
317	# Handle the rare but annoying case where a 0-size symbol is placed at
318	# the byte *after* the module range.  Based on vmlinux.map it will be
319	# considered part of the current object, but it falls just beyond the
320	# module address range.  Unfortunately, its address could be at the
321	# start of another built-in module, so the only safe thing to do is to
322	# ignore it.
323	if (mod_name && addr == mod_eaddr)
324		next;
325
326	# If we are processing vmlinux.o.map, we need to apply the base address
327	# of the section to the relative address on the record.
328	#
329	if (ARGIND == 5)
330		addr += sect_addend[idx];
331
332	idx = addr"-"$2;
333	mod = "";
334	if (idx in sym2mod) {
335		mod = sym2mod[idx];
336		if (sym2mod[idx] == mod_name) {
337			mod_matches++;
338			matches++;
339		} else if (mod_name == "") {
340			print $2 " in " mod " (should NOT be)";
341			mismatches++;
342		} else {
343			print $2 " in " mod " (should be " mod_name ")";
344			mismatches++;
345		}
346	} else if (mod_name != "") {
347		print $2 " should be in " mod_name;
348		missing++;
349	} else
350		matches++;
351
352	total++;
353
354	next;
355}
356
357# Issue the comparison report.
358#
359END {
360	if (total) {
361		printf "Verification of %s:\n", ARGV[1];
362		printf "  Correct matches:  %6d (%d%% of total)\n", matches, 100 * matches / total;
363		printf "    Module matches: %6d (%d%% of matches)\n", mod_matches, 100 * mod_matches / matches;
364		printf "  Mismatches:       %6d (%d%% of total)\n", mismatches, 100 * mismatches / total;
365		printf "  Missing:          %6d (%d%% of total)\n", missing, 100 * missing / total;
366
367		if (mismatches || missing)
368			exit(1);
369	}
370}
371