xref: /freebsd/tools/pkgbase/metalog_reader.lua (revision d91f8db5f1822c43cd256f19aae1d059e4b25a26)
1#!/usr/libexec/flua
2
3-- SPDX-License-Identifier: BSD-2-Clause
4--
5-- Copyright(c) 2020 The FreeBSD Foundation.
6--
7-- Redistribution and use in source and binary forms, with or without
8-- modification, are permitted provided that the following conditions
9-- are met:
10-- 1. Redistributions of source code must retain the above copyright
11--    notice, this list of conditions and the following disclaimer.
12-- 2. Redistributions in binary form must reproduce the above copyright
13--    notice, this list of conditions and the following disclaimer in the
14--    documentation and/or other materials provided with the distribution.
15--
16-- THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18-- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19-- ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20-- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21-- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22-- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23-- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24-- LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25-- OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26-- SUCH DAMAGE.
27
28-- $FreeBSD$
29
30function main(args)
31	if #args == 0 then usage() end
32	local filename
33	local printall, checkonly, pkgonly =
34	    #args == 1, false, false
35	local dcount, dsize, fuid, fgid, fid =
36	    false, false, false, false, false
37	local verbose = false
38	local w_notagdirs = false
39
40	local i = 1
41	while i <= #args do
42		if args[i] == '-h' then
43			usage(true)
44		elseif args[i] == '-a' then
45			printall = true
46		elseif args[i] == '-c' then
47			printall = false
48			checkonly = true
49		elseif args[i] == '-p' then
50			printall = false
51			pkgonly = true
52			while i < #args do
53				i = i+1
54				if args[i] == '-count' then
55					dcount = true
56				elseif args[i] == '-size' then
57					dsize = true
58				elseif args[i] == '-fsetuid' then
59					fuid = true
60				elseif args[i] == '-fsetgid' then
61					fgid = true
62				elseif args[i] == '-fsetid' then
63					fid = true
64				else
65					i = i-1
66					break
67				end
68			end
69		elseif args[i] == '-v' then
70			verbose = true
71		elseif args[i] == '-Wcheck-notagdir' then
72			w_notagdirs = true
73		elseif args[i]:match('^%-') then
74			io.stderr:write('Unknown argument '..args[i]..'.\n')
75			usage()
76		else
77			filename = args[i]
78		end
79		i = i+1
80	end
81
82	if filename == nil then
83		io.stderr:write('Missing filename.\n')
84		usage()
85	end
86
87	local sess = Analysis_session(filename, verbose, w_notagdirs)
88
89	local errors
90	if printall then
91		io.write('--- PACKAGE REPORTS ---\n')
92		io.write(sess.pkg_report_full())
93		io.write('--- LINTING REPORTS ---\n')
94		errors = print_lints(sess)
95	elseif checkonly then
96		errors = print_lints(sess)
97	elseif pkgonly then
98		io.write(sess.pkg_report_simple(dcount, dsize, {
99			fuid and sess.pkg_issetuid or nil,
100			fgid and sess.pkg_issetgid or nil,
101			fid and sess.pkg_issetid or nil
102		}))
103	else
104		io.stderr:write('This text should not be displayed.')
105		usage()
106	end
107
108	if errors then
109		return 1
110	end
111end
112
113--- @param man boolean
114function usage(man)
115	local sn = 'Usage: '..arg[0].. ' [-h] [-a | -c | -p [-count] [-size] [-f...]] [-W...] metalog-path \n'
116	if man then
117		io.write('\n')
118		io.write(sn)
119		io.write(
120[[
121
122The script reads METALOG file created by pkgbase (make packages) and generates
123reports about the installed system and issues.  It accepts an mtree file in a
124format that's returned by `mtree -c | mtree -C`
125
126  Options:
127  -a         prints all scan results. this is the default option if no option
128             is provided.
129  -c         lints the file and gives warnings/errors, including duplication
130             and conflicting metadata
131      -Wcheck-notagdir    entries with dir type and no tags will be also
132                          included the first time they appear
133  -p         list all package names found in the file as exactly specified by
134             `tags=package=...`
135      -count       display the number of files of the package
136      -size        display the size of the package
137      -fsetgid     only include packages with setgid files
138      -fsetuid     only include packages with setuid files
139      -fsetid      only include packages with setgid or setuid files
140  -v          verbose mode
141  -h          help page
142
143]])
144		os.exit()
145	else
146		io.stderr:write(sn)
147		os.exit(1)
148	end
149end
150
151--- @param sess Analysis_session
152function print_lints(sess)
153	local dupwarn, duperr = sess.dup_report()
154	io.write(dupwarn)
155	io.write(duperr)
156	local inodewarn, inodeerr = sess.inode_report()
157	io.write(inodewarn)
158	io.write(inodeerr)
159	return #duperr > 0 or #inodeerr > 0
160end
161
162--- @param t table
163function sortedPairs(t)
164	local sortedk = {}
165	for k in next, t do sortedk[#sortedk+1] = k end
166	table.sort(sortedk)
167	local i = 0
168	return function()
169		i = i + 1
170		return sortedk[i], t[sortedk[i]]
171	end
172end
173
174--- @param t table <T, U>
175--- @param f function <U -> U>
176function table_map(t, f)
177	local res = {}
178	for k, v in pairs(t) do res[k] = f(v) end
179	return res
180end
181
182--- @class MetalogRow
183-- a table contaning file's info, from a line content from METALOG file
184-- all fields in the table are strings
185-- sample output:
186--	{
187--		filename = ./usr/share/man/man3/inet6_rthdr_segments.3.gz
188--		lineno = 5
189--		attrs = {
190--			gname = 'wheel'
191--			uname = 'root'
192--			mode = '0444'
193--			size = '1166'
194--			time = nil
195--			type = 'file'
196--			tags = 'package=clibs,debug'
197--		}
198--	}
199--- @param line string
200function MetalogRow(line, lineno)
201	local res, attrs = {}, {}
202	local filename, rest = line:match('^(%S+) (.+)$')
203	-- mtree file has space escaped as '\\040', not affecting splitting
204	-- string by space
205	for attrpair in rest:gmatch('[^ ]+') do
206		local k, v = attrpair:match('^(.-)=(.+)')
207		attrs[k] = v
208	end
209	res.filename = filename
210	res.linenum = lineno
211	res.attrs = attrs
212	return res
213end
214
215-- check if an array of MetalogRows are equivalent. if not, the first field
216-- that's different is returned secondly
217--- @param rows MetalogRow[]
218--- @param ignore_name boolean
219--- @param ignore_tags boolean
220function metalogrows_all_equal(rows, ignore_name, ignore_tags)
221	local __eq = function(l, o)
222		if not ignore_name and l.filename ~= o.filename then
223			return false, 'filename'
224		end
225		-- ignoring linenum in METALOG file as it's not relavant
226		for k in pairs(l.attrs) do
227			if ignore_tags and k == 'tags' then goto continue end
228			if l.attrs[k] ~= o.attrs[k] and o.attrs[k] ~= nil then
229				return false, k
230			end
231			::continue::
232		end
233		return true
234	end
235	for _, v in ipairs(rows) do
236		local bol, offby = __eq(v, rows[1])
237		if not bol then return false, offby end
238	end
239	return true
240end
241
242--- @param tagstr string
243function pkgname_from_tag(tagstr)
244	local ext, pkgname, pkgend = '', '', ''
245	for seg in tagstr:gmatch('[^,]+') do
246		if seg:match('package=') then
247			pkgname = seg:sub(9)
248		elseif seg == 'development' or seg == 'profile'
249			or seg == 'debug' or seg == 'docs' then
250			pkgend = seg
251		else
252			ext = ext == '' and seg or ext..'-'..seg
253		end
254	end
255	pkgname = pkgname
256		..(ext == '' and '' or '-'..ext)
257		..(pkgend == '' and '' or '-'..pkgend)
258	return pkgname
259end
260
261--- @class Analysis_session
262--- @param metalog string
263--- @param verbose boolean
264--- @param w_notagdirs boolean turn on to also check directories
265function Analysis_session(metalog, verbose, w_notagdirs)
266	local stage_root = {}
267	local files = {} -- map<string, MetalogRow[]>
268	-- set is map<elem, bool>. if bool is true then elem exists
269	local pkgs = {} -- map<string, set<string>>
270	----- used to keep track of files not belonging to a pkg. not used so
271	----- it is commented with -----
272	-----local nopkg = {} --            set<string>
273	--- @public
274	local swarn = {}
275	--- @public
276	local serrs = {}
277
278	-- returns number of files in package and size of package
279	-- nil is  returned upon errors
280	--- @param pkgname string
281	local function pkg_size(pkgname)
282		local filecount, sz = 0, 0
283		for filename in pairs(pkgs[pkgname]) do
284			local rows = files[filename]
285			-- normally, there should be only one row per filename
286			-- if these rows are equal, there should be warning, but it
287			-- does not affect size counting. if not, it is an error
288			if #rows > 1 and not metalogrows_all_equal(rows) then
289				return nil
290			end
291			local row = rows[1]
292			if row.attrs.type == 'file' then
293				sz = sz + tonumber(row.attrs.size)
294			end
295			filecount = filecount + 1
296		end
297		return filecount, sz
298	end
299
300	--- @param pkgname string
301	--- @param mode number
302	local function pkg_ismode(pkgname, mode)
303		for filename in pairs(pkgs[pkgname]) do
304			for _, row in ipairs(files[filename]) do
305				if tonumber(row.attrs.mode, 8) & mode ~= 0 then
306					return true
307				end
308			end
309		end
310		return false
311	end
312
313	--- @param pkgname string
314	--- @public
315	local function pkg_issetuid(pkgname)
316		return pkg_ismode(pkgname, 2048)
317	end
318
319	--- @param pkgname string
320	--- @public
321	local function pkg_issetgid(pkgname)
322		return pkg_ismode(pkgname, 1024)
323	end
324
325	--- @param pkgname string
326	--- @public
327	local function pkg_issetid(pkgname)
328		return pkg_issetuid(pkgname) or pkg_issetgid(pkgname)
329	end
330
331	-- sample return:
332	-- { [*string]: { count=1, size=2, issetuid=true, issetgid=true } }
333	local function pkg_report_helper_table()
334		local res = {}
335		for pkgname in pairs(pkgs) do
336			res[pkgname] = {}
337			res[pkgname].count,
338			res[pkgname].size = pkg_size(pkgname)
339			res[pkgname].issetuid = pkg_issetuid(pkgname)
340			res[pkgname].issetgid = pkg_issetgid(pkgname)
341		end
342		return res
343	end
344
345	-- returns a string describing package scan report
346	--- @public
347	local function pkg_report_full()
348		local sb = {}
349		for pkgname, v in sortedPairs(pkg_report_helper_table()) do
350			sb[#sb+1] = 'Package '..pkgname..':'
351			if v.issetuid or v.issetgid then
352				sb[#sb+1] = ''..table.concat({
353					v.issetuid and ' setuid' or '',
354					v.issetgid and ' setgid' or '' }, '')
355			end
356			sb[#sb+1] = '\n  number of files: '..(v.count or '?')
357				..'\n  total size: '..(v.size or '?')
358			sb[#sb+1] = '\n'
359		end
360		return table.concat(sb, '')
361	end
362
363	--- @param have_count boolean
364	--- @param have_size boolean
365	--- @param filters function[]
366	--- @public
367	-- returns a string describing package size report.
368	-- sample: "mypackage 2 2048"* if both booleans are true
369	local function pkg_report_simple(have_count, have_size, filters)
370		filters = filters or {}
371		local sb = {}
372		for pkgname, v in sortedPairs(pkg_report_helper_table()) do
373			local pred = true
374			-- doing a foldl to all the function results with (and)
375			for _, f in pairs(filters) do pred = pred and f(pkgname) end
376			if pred then
377				sb[#sb+1] = pkgname..table.concat({
378					have_count and (' '..(v.count or '?')) or '',
379					have_size and (' '..(v.size or '?')) or ''}, '')
380					..'\n'
381			end
382		end
383		return table.concat(sb, '')
384	end
385
386	-- returns a string describing duplicate file warnings,
387	-- returns a string describing duplicate file errors
388	--- @public
389	local function dup_report()
390		local warn, errs = {}, {}
391		for filename, rows in sortedPairs(files) do
392			if #rows == 1 then goto continue end
393			local iseq, offby = metalogrows_all_equal(rows)
394			if iseq then -- repeated line, just a warning
395				local dupmsg = filename .. ' ' ..
396				    rows[1].attrs.type ..
397				    ' repeated with same meta: line ' ..
398				    table.concat(table_map(rows, function(e) return e.linenum end), ',')
399				if rows[1].attrs.type == "dir" then
400					if verbose then
401						warn[#warn+1] = 'warning: ' .. dupmsg .. '\n'
402					end
403				else
404					errs[#errs+1] = 'error: ' .. dupmsg .. '\n'
405				end
406			elseif not metalogrows_all_equal(rows, false, true) then
407			-- same filename (possibly different tags), different metadata, an error
408				errs[#errs+1] = 'error: '..filename
409					..' exists in multiple locations and with different meta: line '
410					..table.concat(
411						table_map(rows, function(e) return e.linenum end), ',')
412					..'. off by "'..offby..'"'
413				errs[#errs+1] = '\n'
414			end
415			::continue::
416		end
417		return table.concat(warn, ''), table.concat(errs, '')
418	end
419
420	-- returns a string describing warnings of found hard links
421	-- returns a string describing errors of found hard links
422	--- @public
423	local function inode_report()
424		-- obtain inodes of filenames
425		local attributes = require('lfs').attributes
426		local inm = {} -- map<number, string[]>
427		local unstatables = {} -- string[]
428		for filename in pairs(files) do
429			-- i only took the first row of a filename,
430			-- and skip links and folders
431			if files[filename][1].attrs.type ~= 'file' then
432				goto continue
433			end
434			local fs = attributes(stage_root .. filename)
435			if fs == nil then
436				unstatables[#unstatables+1] = filename
437				goto continue
438			end
439			local inode = fs.ino
440			inm[inode] = inm[inode] or {}
441			table.insert(inm[inode], filename)
442			::continue::
443		end
444
445		local warn, errs = {}, {}
446		for _, filenames in pairs(inm) do
447			if #filenames == 1 then goto continue end
448			-- i only took the first row of a filename
449			local rows = table_map(filenames, function(e)
450				return files[e][1]
451			end)
452			local iseq, offby = metalogrows_all_equal(rows, true, true)
453			if not iseq then
454				errs[#errs+1] = 'error: '
455					..'entries point to the same inode but have different meta: '
456					..table.concat(filenames, ',')..' in line '
457					..table.concat(
458						table_map(rows, function(e) return e.linenum end), ',')
459					..'. off by "'..offby..'"'
460				errs[#errs+1] = '\n'
461			end
462			::continue::
463		end
464
465		if #unstatables > 0 then
466			warn[#warn+1] = verbose and
467				'note: skipped checking inodes: '..table.concat(unstatables, ',')..'\n'
468				or
469				'note: skipped checking inodes for '..#unstatables..' entries\n'
470		end
471
472		return table.concat(warn, ''), table.concat(errs, '')
473	end
474
475	-- The METALOG file is assumed to be at the top of the stage directory.
476	stage_root = string.gsub(metalog, '/[^/]*$', '/')
477
478	do
479	local fp, errmsg, errcode = io.open(metalog, 'r')
480	if fp == nil then
481		io.stderr:write('cannot open '..metalog..': '..errmsg..': '..errcode..'\n')
482		os.exit(1)
483	end
484
485	-- scan all lines and put file data into the dictionaries
486	local firsttimes = {} -- set<string>
487	local lineno = 0
488	for line in fp:lines() do
489		-----local isinpkg = false
490		lineno = lineno + 1
491		-- skip lines beginning with #
492		if line:match('^%s*#') then goto continue end
493		-- skip blank lines
494		if line:match('^%s*$') then goto continue end
495
496		local data = MetalogRow(line, lineno)
497		-- entries with dir and no tags... ignore for the first time
498		if not w_notagdirs and
499			data.attrs.tags == nil and data.attrs.type == 'dir'
500			and not firsttimes[data.filename] then
501			firsttimes[data.filename] = true
502			goto continue
503		end
504
505		files[data.filename] = files[data.filename] or {}
506		table.insert(files[data.filename], data)
507
508		if data.attrs.tags ~= nil then
509			pkgname = pkgname_from_tag(data.attrs.tags)
510			pkgs[pkgname] = pkgs[pkgname] or {}
511			pkgs[pkgname][data.filename] = true
512			------isinpkg = true
513		end
514		-----if not isinpkg then nopkg[data.filename] = true end
515		::continue::
516	end
517
518	fp:close()
519	end
520
521	return {
522		warn = swarn,
523		errs = serrs,
524		pkg_issetuid = pkg_issetuid,
525		pkg_issetgid = pkg_issetgid,
526		pkg_issetid = pkg_issetid,
527		pkg_report_full = pkg_report_full,
528		pkg_report_simple = pkg_report_simple,
529		dup_report = dup_report,
530		inode_report = inode_report
531	}
532end
533
534os.exit(main(arg))
535