xref: /freebsd/tools/pkgbase/metalog_reader.lua (revision 4f8f43b06ed07e96a250855488cc531799d5b78f)
1#!/usr/libexec/flua
2
3-- SPDX-License-Identifier: BSD-2-Clause
4--
5-- Copyright(c) 2020 The FreeBSD Foundation.
6--
7-- Redistribution and use in source and binary forms, with or without
8-- modification, are permitted provided that the following conditions
9-- are met:
10-- 1. Redistributions of source code must retain the above copyright
11--    notice, this list of conditions and the following disclaimer.
12-- 2. Redistributions in binary form must reproduce the above copyright
13--    notice, this list of conditions and the following disclaimer in the
14--    documentation and/or other materials provided with the distribution.
15--
16-- THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18-- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19-- ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20-- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21-- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22-- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23-- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24-- LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25-- OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26-- SUCH DAMAGE.
27
28
29function main(args)
30	if #args == 0 then usage() end
31	local filename
32	local printall, checkonly, pkgonly =
33	    #args == 1, false, false
34	local dcount, dsize, fuid, fgid, fid =
35	    false, false, false, false, false
36	local verbose = false
37	local w_notagdirs = false
38
39	local i = 1
40	while i <= #args do
41		if args[i] == '-h' then
42			usage(true)
43		elseif args[i] == '-a' then
44			printall = true
45		elseif args[i] == '-c' then
46			printall = false
47			checkonly = true
48		elseif args[i] == '-p' then
49			printall = false
50			pkgonly = true
51			while i < #args do
52				i = i+1
53				if args[i] == '-count' then
54					dcount = true
55				elseif args[i] == '-size' then
56					dsize = true
57				elseif args[i] == '-fsetuid' then
58					fuid = true
59				elseif args[i] == '-fsetgid' then
60					fgid = true
61				elseif args[i] == '-fsetid' then
62					fid = true
63				else
64					i = i-1
65					break
66				end
67			end
68		elseif args[i] == '-v' then
69			verbose = true
70		elseif args[i] == '-Wcheck-notagdir' then
71			w_notagdirs = true
72		elseif args[i]:match('^%-') then
73			io.stderr:write('Unknown argument '..args[i]..'.\n')
74			usage()
75		else
76			filename = args[i]
77		end
78		i = i+1
79	end
80
81	if filename == nil then
82		io.stderr:write('Missing filename.\n')
83		usage()
84	end
85
86	local sess = Analysis_session(filename, verbose, w_notagdirs)
87
88	local errors
89	if printall then
90		io.write('--- PACKAGE REPORTS ---\n')
91		io.write(sess.pkg_report_full())
92		io.write('--- LINTING REPORTS ---\n')
93		errors = print_lints(sess)
94	elseif checkonly then
95		errors = print_lints(sess)
96	elseif pkgonly then
97		io.write(sess.pkg_report_simple(dcount, dsize, {
98			fuid and sess.pkg_issetuid or nil,
99			fgid and sess.pkg_issetgid or nil,
100			fid and sess.pkg_issetid or nil
101		}))
102	else
103		io.stderr:write('This text should not be displayed.')
104		usage()
105	end
106
107	if errors then
108		return 1
109	end
110end
111
112--- @param man boolean
113function usage(man)
114	local sn = 'Usage: '..arg[0].. ' [-h] [-a | -c | -p [-count] [-size] [-f...]] [-W...] metalog-path \n'
115	if man then
116		io.write('\n')
117		io.write(sn)
118		io.write(
119[[
120
121The script reads METALOG file created by pkgbase (make packages) and generates
122reports about the installed system and issues.  It accepts an mtree file in a
123format that's returned by `mtree -c | mtree -C`
124
125  Options:
126  -a         prints all scan results. this is the default option if no option
127             is provided.
128  -c         lints the file and gives warnings/errors, including duplication
129             and conflicting metadata
130      -Wcheck-notagdir    entries with dir type and no tags will be also
131                          included the first time they appear
132  -p         list all package names found in the file as exactly specified by
133             `tags=package=...`
134      -count       display the number of files of the package
135      -size        display the size of the package
136      -fsetgid     only include packages with setgid files
137      -fsetuid     only include packages with setuid files
138      -fsetid      only include packages with setgid or setuid files
139  -v          verbose mode
140  -h          help page
141
142]])
143		os.exit()
144	else
145		io.stderr:write(sn)
146		os.exit(1)
147	end
148end
149
150--- @param sess Analysis_session
151function print_lints(sess)
152	local dupwarn, duperr = sess.dup_report()
153	io.write(dupwarn)
154	io.write(duperr)
155	local inodewarn, inodeerr = sess.inode_report()
156	io.write(inodewarn)
157	io.write(inodeerr)
158	return #duperr > 0 or #inodeerr > 0
159end
160
161--- @param t table
162function sortedPairs(t)
163	local sortedk = {}
164	for k in next, t do sortedk[#sortedk+1] = k end
165	table.sort(sortedk)
166	local i = 0
167	return function()
168		i = i + 1
169		return sortedk[i], t[sortedk[i]]
170	end
171end
172
173--- @param t table <T, U>
174--- @param f function <U -> U>
175function table_map(t, f)
176	local res = {}
177	for k, v in pairs(t) do res[k] = f(v) end
178	return res
179end
180
181--- @class MetalogRow
182-- a table contaning file's info, from a line content from METALOG file
183-- all fields in the table are strings
184-- sample output:
185--	{
186--		filename = ./usr/share/man/man3/inet6_rthdr_segments.3.gz
187--		lineno = 5
188--		attrs = {
189--			gname = 'wheel'
190--			uname = 'root'
191--			mode = '0444'
192--			size = '1166'
193--			time = nil
194--			type = 'file'
195--			tags = 'package=clibs,debug'
196--		}
197--	}
198--- @param line string
199function MetalogRow(line, lineno)
200	local res, attrs = {}, {}
201	local filename, rest = line:match('^(%S+) (.+)$')
202	-- mtree file has space escaped as '\\040', not affecting splitting
203	-- string by space
204	for attrpair in rest:gmatch('[^ ]+') do
205		local k, v = attrpair:match('^(.-)=(.+)')
206		attrs[k] = v
207	end
208	res.filename = filename
209	res.linenum = lineno
210	res.attrs = attrs
211	return res
212end
213
214-- check if an array of MetalogRows are equivalent. if not, the first field
215-- that's different is returned secondly
216--- @param rows MetalogRow[]
217--- @param ignore_name boolean
218--- @param ignore_tags boolean
219function metalogrows_all_equal(rows, ignore_name, ignore_tags)
220	local __eq = function(l, o)
221		if not ignore_name and l.filename ~= o.filename then
222			return false, 'filename'
223		end
224		-- ignoring linenum in METALOG file as it's not relavant
225		for k in pairs(l.attrs) do
226			if ignore_tags and k == 'tags' then goto continue end
227			if l.attrs[k] ~= o.attrs[k] and o.attrs[k] ~= nil then
228				return false, k
229			end
230			::continue::
231		end
232		return true
233	end
234	for _, v in ipairs(rows) do
235		local bol, offby = __eq(v, rows[1])
236		if not bol then return false, offby end
237	end
238	return true
239end
240
241--- @param tagstr string
242function pkgname_from_tag(tagstr)
243	local ext, pkgname, pkgend = '', '', ''
244	for seg in tagstr:gmatch('[^,]+') do
245		if seg:match('package=') then
246			pkgname = seg:sub(9)
247		elseif seg == 'development' or seg == 'profile'
248			or seg == 'debug' or seg == 'docs' then
249			pkgend = seg
250		else
251			ext = ext == '' and seg or ext..'-'..seg
252		end
253	end
254	pkgname = pkgname
255		..(ext == '' and '' or '-'..ext)
256		..(pkgend == '' and '' or '-'..pkgend)
257	return pkgname
258end
259
260--- @class Analysis_session
261--- @param metalog string
262--- @param verbose boolean
263--- @param w_notagdirs boolean turn on to also check directories
264function Analysis_session(metalog, verbose, w_notagdirs)
265	local stage_root = {}
266	local files = {} -- map<string, MetalogRow[]>
267	-- set is map<elem, bool>. if bool is true then elem exists
268	local pkgs = {} -- map<string, set<string>>
269	----- used to keep track of files not belonging to a pkg. not used so
270	----- it is commented with -----
271	-----local nopkg = {} --            set<string>
272	--- @public
273	local swarn = {}
274	--- @public
275	local serrs = {}
276
277	-- returns number of files in package and size of package
278	-- nil is  returned upon errors
279	--- @param pkgname string
280	local function pkg_size(pkgname)
281		local filecount, sz = 0, 0
282		for filename in pairs(pkgs[pkgname]) do
283			local rows = files[filename]
284			-- normally, there should be only one row per filename
285			-- if these rows are equal, there should be warning, but it
286			-- does not affect size counting. if not, it is an error
287			if #rows > 1 and not metalogrows_all_equal(rows) then
288				return nil
289			end
290			local row = rows[1]
291			if row.attrs.type == 'file' then
292				sz = sz + tonumber(row.attrs.size)
293			end
294			filecount = filecount + 1
295		end
296		return filecount, sz
297	end
298
299	--- @param pkgname string
300	--- @param mode number
301	local function pkg_ismode(pkgname, mode)
302		for filename in pairs(pkgs[pkgname]) do
303			for _, row in ipairs(files[filename]) do
304				if tonumber(row.attrs.mode, 8) & mode ~= 0 then
305					return true
306				end
307			end
308		end
309		return false
310	end
311
312	--- @param pkgname string
313	--- @public
314	local function pkg_issetuid(pkgname)
315		return pkg_ismode(pkgname, 2048)
316	end
317
318	--- @param pkgname string
319	--- @public
320	local function pkg_issetgid(pkgname)
321		return pkg_ismode(pkgname, 1024)
322	end
323
324	--- @param pkgname string
325	--- @public
326	local function pkg_issetid(pkgname)
327		return pkg_issetuid(pkgname) or pkg_issetgid(pkgname)
328	end
329
330	-- sample return:
331	-- { [*string]: { count=1, size=2, issetuid=true, issetgid=true } }
332	local function pkg_report_helper_table()
333		local res = {}
334		for pkgname in pairs(pkgs) do
335			res[pkgname] = {}
336			res[pkgname].count,
337			res[pkgname].size = pkg_size(pkgname)
338			res[pkgname].issetuid = pkg_issetuid(pkgname)
339			res[pkgname].issetgid = pkg_issetgid(pkgname)
340		end
341		return res
342	end
343
344	-- returns a string describing package scan report
345	--- @public
346	local function pkg_report_full()
347		local sb = {}
348		for pkgname, v in sortedPairs(pkg_report_helper_table()) do
349			sb[#sb+1] = 'Package '..pkgname..':'
350			if v.issetuid or v.issetgid then
351				sb[#sb+1] = ''..table.concat({
352					v.issetuid and ' setuid' or '',
353					v.issetgid and ' setgid' or '' }, '')
354			end
355			sb[#sb+1] = '\n  number of files: '..(v.count or '?')
356				..'\n  total size: '..(v.size or '?')
357			sb[#sb+1] = '\n'
358		end
359		return table.concat(sb, '')
360	end
361
362	--- @param have_count boolean
363	--- @param have_size boolean
364	--- @param filters function[]
365	--- @public
366	-- returns a string describing package size report.
367	-- sample: "mypackage 2 2048"* if both booleans are true
368	local function pkg_report_simple(have_count, have_size, filters)
369		filters = filters or {}
370		local sb = {}
371		for pkgname, v in sortedPairs(pkg_report_helper_table()) do
372			local pred = true
373			-- doing a foldl to all the function results with (and)
374			for _, f in pairs(filters) do pred = pred and f(pkgname) end
375			if pred then
376				sb[#sb+1] = pkgname..table.concat({
377					have_count and (' '..(v.count or '?')) or '',
378					have_size and (' '..(v.size or '?')) or ''}, '')
379					..'\n'
380			end
381		end
382		return table.concat(sb, '')
383	end
384
385	-- returns a string describing duplicate file warnings,
386	-- returns a string describing duplicate file errors
387	--- @public
388	local function dup_report()
389		local warn, errs = {}, {}
390		for filename, rows in sortedPairs(files) do
391			if #rows == 1 then goto continue end
392			local iseq, offby = metalogrows_all_equal(rows)
393			if iseq then -- repeated line, just a warning
394				local dupmsg = filename .. ' ' ..
395				    rows[1].attrs.type ..
396				    ' repeated with same meta: line ' ..
397				    table.concat(table_map(rows, function(e) return e.linenum end), ',')
398				if rows[1].attrs.type == "dir" then
399					if verbose then
400						warn[#warn+1] = 'warning: ' .. dupmsg .. '\n'
401					end
402				else
403					errs[#errs+1] = 'error: ' .. dupmsg .. '\n'
404				end
405			elseif not metalogrows_all_equal(rows, false, true) then
406			-- same filename (possibly different tags), different metadata, an error
407				errs[#errs+1] = 'error: '..filename
408					..' exists in multiple locations and with different meta: line '
409					..table.concat(
410						table_map(rows, function(e) return e.linenum end), ',')
411					..'. off by "'..offby..'"'
412				errs[#errs+1] = '\n'
413			end
414			::continue::
415		end
416		return table.concat(warn, ''), table.concat(errs, '')
417	end
418
419	-- returns a string describing warnings of found hard links
420	-- returns a string describing errors of found hard links
421	--- @public
422	local function inode_report()
423		-- obtain inodes of filenames
424		local attributes = require('lfs').attributes
425		local inm = {} -- map<number, string[]>
426		local unstatables = {} -- string[]
427		for filename in pairs(files) do
428			-- i only took the first row of a filename,
429			-- and skip links and folders
430			if files[filename][1].attrs.type ~= 'file' then
431				goto continue
432			end
433			local fs = attributes(stage_root .. filename)
434			if fs == nil then
435				unstatables[#unstatables+1] = filename
436				goto continue
437			end
438			local inode = fs.ino
439			inm[inode] = inm[inode] or {}
440			table.insert(inm[inode], filename)
441			::continue::
442		end
443
444		local warn, errs = {}, {}
445		for _, filenames in pairs(inm) do
446			if #filenames == 1 then goto continue end
447			-- i only took the first row of a filename
448			local rows = table_map(filenames, function(e)
449				return files[e][1]
450			end)
451			local iseq, offby = metalogrows_all_equal(rows, true, true)
452			if not iseq then
453				errs[#errs+1] = 'error: '
454					..'entries point to the same inode but have different meta: '
455					..table.concat(filenames, ',')..' in line '
456					..table.concat(
457						table_map(rows, function(e) return e.linenum end), ',')
458					..'. off by "'..offby..'"'
459				errs[#errs+1] = '\n'
460			end
461			::continue::
462		end
463
464		if #unstatables > 0 then
465			warn[#warn+1] = verbose and
466				'note: skipped checking inodes: '..table.concat(unstatables, ',')..'\n'
467				or
468				'note: skipped checking inodes for '..#unstatables..' entries\n'
469		end
470
471		return table.concat(warn, ''), table.concat(errs, '')
472	end
473
474	-- The METALOG file is assumed to be at the top of the stage directory.
475	stage_root = string.gsub(metalog, '/[^/]*$', '/')
476
477	do
478	local fp, errmsg, errcode = io.open(metalog, 'r')
479	if fp == nil then
480		io.stderr:write('cannot open '..metalog..': '..errmsg..': '..errcode..'\n')
481		os.exit(1)
482	end
483
484	-- scan all lines and put file data into the dictionaries
485	local firsttimes = {} -- set<string>
486	local lineno = 0
487	for line in fp:lines() do
488		-----local isinpkg = false
489		lineno = lineno + 1
490		-- skip lines beginning with #
491		if line:match('^%s*#') then goto continue end
492		-- skip blank lines
493		if line:match('^%s*$') then goto continue end
494
495		local data = MetalogRow(line, lineno)
496		-- entries with dir and no tags... ignore for the first time
497		if not w_notagdirs and
498			data.attrs.tags == nil and data.attrs.type == 'dir'
499			and not firsttimes[data.filename] then
500			firsttimes[data.filename] = true
501			goto continue
502		end
503
504		files[data.filename] = files[data.filename] or {}
505		table.insert(files[data.filename], data)
506
507		if data.attrs.tags ~= nil then
508			pkgname = pkgname_from_tag(data.attrs.tags)
509			pkgs[pkgname] = pkgs[pkgname] or {}
510			pkgs[pkgname][data.filename] = true
511			------isinpkg = true
512		end
513		-----if not isinpkg then nopkg[data.filename] = true end
514		::continue::
515	end
516
517	fp:close()
518	end
519
520	return {
521		warn = swarn,
522		errs = serrs,
523		pkg_issetuid = pkg_issetuid,
524		pkg_issetgid = pkg_issetgid,
525		pkg_issetid = pkg_issetid,
526		pkg_report_full = pkg_report_full,
527		pkg_report_simple = pkg_report_simple,
528		dup_report = dup_report,
529		inode_report = inode_report
530	}
531end
532
533os.exit(main(arg))
534