xref: /freebsd/tools/pkgbase/metalog_reader.lua (revision 7543a9c0280a0f4262489671936a6e03b9b2c563)
1#!/usr/libexec/flua
2
3-- SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4--
5-- Copyright(c) 2020 The FreeBSD Foundation.
6--
7-- Redistribution and use in source and binary forms, with or without
8-- modification, are permitted provided that the following conditions
9-- are met:
10-- 1. Redistributions of source code must retain the above copyright
11--    notice, this list of conditions and the following disclaimer.
12-- 2. Redistributions in binary form must reproduce the above copyright
13--    notice, this list of conditions and the following disclaimer in the
14--    documentation and/or other materials provided with the distribution.
15--
16-- THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18-- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19-- ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20-- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21-- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22-- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23-- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24-- LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25-- OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26-- SUCH DAMAGE.
27
28-- $FreeBSD$
29
30function main(args)
31	if #args == 0 then usage() end
32	local filename
33	local printall, checkonly, pkgonly =
34	    #args == 1, false, false
35	local dcount, dsize, fuid, fgid, fid =
36	    false, false, false, false, false
37	local verbose = false
38	local w_notagdirs = false
39
40	local i = 1
41	while i <= #args do
42		if args[i] == '-h' then
43			usage(true)
44		elseif args[i] == '-a' then
45			printall = true
46		elseif args[i] == '-c' then
47			printall = false
48			checkonly = true
49		elseif args[i] == '-p' then
50			printall = false
51			pkgonly = true
52			while i < #args do
53				i = i+1
54				if args[i] == '-count' then
55					dcount = true
56				elseif args[i] == '-size' then
57					dsize = true
58				elseif args[i] == '-fsetuid' then
59					fuid = true
60				elseif args[i] == '-fsetgid' then
61					fgid = true
62				elseif args[i] == '-fsetid' then
63					fid = true
64				else
65					i = i-1
66					break
67				end
68			end
69		elseif args[i] == '-v' then
70			verbose = true
71		elseif args[i] == '-Wcheck-notagdir' then
72			w_notagdirs = true
73		elseif args[i]:match('^%-') then
74			io.stderr:write('Unknown argument '..args[i]..'.\n')
75			usage()
76		else
77			filename = args[i]
78		end
79		i = i+1
80	end
81
82	if filename == nil then
83		io.stderr:write('Missing filename.\n')
84		usage()
85	end
86
87	local sess = Analysis_session(filename, verbose, w_notagdirs)
88
89	if printall then
90		io.write('--- PACKAGE REPORTS ---\n')
91		io.write(sess.pkg_report_full())
92		io.write('--- LINTING REPORTS ---\n')
93		print_lints(sess)
94	elseif checkonly then
95		print_lints(sess)
96	elseif pkgonly then
97		io.write(sess.pkg_report_simple(dcount, dsize, {
98			fuid and sess.pkg_issetuid or nil,
99			fgid and sess.pkg_issetgid or nil,
100			fid and sess.pkg_issetid or nil
101		}))
102	else
103		io.stderr:write('This text should not be displayed.')
104		usage()
105	end
106end
107
108--- @param man boolean
109function usage(man)
110	local sn = 'Usage: '..arg[0].. ' [-h] [-a | -c | -p [-count] [-size] [-f...]] [-W...] metalog-path \n'
111	if man then
112		io.write('\n')
113		io.write(sn)
114		io.write(
115[[
116
117The script reads METALOG file created by pkgbase (make packages) and generates
118reports about the installed system and issues.  It accepts an mtree file in a
119format that's returned by `mtree -c | mtree -C`
120
121  Options:
122  -a         prints all scan results. this is the default option if no option
123             is provided.
124  -c         lints the file and gives warnings/errors, including duplication
125             and conflicting metadata
126      -Wcheck-notagdir    entries with dir type and no tags will be also
127                          included the first time they appear
128  -p         list all package names found in the file as exactly specified by
129             `tags=package=...`
130      -count       display the number of files of the package
131      -size        display the size of the package
132      -fsetgid     only include packages with setgid files
133      -fsetuid     only include packages with setuid files
134      -fsetid      only include packages with setgid or setuid files
135  -v          verbose mode
136  -h          help page
137
138]])
139		os.exit()
140	else
141		io.stderr:write(sn)
142		os.exit(1)
143	end
144end
145
146--- @param sess Analysis_session
147function print_lints(sess)
148	local dupwarn, duperr = sess.dup_report()
149	io.write(dupwarn)
150	io.write(duperr)
151	local inodewarn, inodeerr = sess.inode_report()
152	io.write(inodewarn)
153	io.write(inodeerr)
154end
155
156--- @param t table
157function sortedPairs(t)
158	local sortedk = {}
159	for k in next, t do sortedk[#sortedk+1] = k end
160	table.sort(sortedk)
161	local i = 0
162	return function()
163		i = i + 1
164		return sortedk[i], t[sortedk[i]]
165	end
166end
167
168--- @param t table <T, U>
169--- @param f function <U -> U>
170function table_map(t, f)
171	local res = {}
172	for k, v in pairs(t) do res[k] = f(v) end
173	return res
174end
175
176--- @class MetalogRow
177-- a table contaning file's info, from a line content from METALOG file
178-- all fields in the table are strings
179-- sample output:
180--	{
181--		filename = ./usr/share/man/man3/inet6_rthdr_segments.3.gz
182--		lineno = 5
183--		attrs = {
184--			gname = 'wheel'
185--			uname = 'root'
186--			mode = '0444'
187--			size = '1166'
188--			time = nil
189--			type = 'file'
190--			tags = 'package=clibs,debug'
191--		}
192--	}
193--- @param line string
194function MetalogRow(line, lineno)
195	local res, attrs = {}, {}
196	local filename, rest = line:match('^(%S+) (.+)$')
197	-- mtree file has space escaped as '\\040', not affecting splitting
198	-- string by space
199	for attrpair in rest:gmatch('[^ ]+') do
200		local k, v = attrpair:match('^(.-)=(.+)')
201		attrs[k] = v
202	end
203	res.filename = filename
204	res.linenum = lineno
205	res.attrs = attrs
206	return res
207end
208
209-- check if an array of MetalogRows are equivalent. if not, the first field
210-- that's different is returned secondly
211--- @param rows MetalogRow[]
212--- @param ignore_name boolean
213--- @param ignore_tags boolean
214function metalogrows_all_equal(rows, ignore_name, ignore_tags)
215	local __eq = function(l, o)
216		if not ignore_name and l.filename ~= o.filename then
217			return false, 'filename'
218		end
219		-- ignoring linenum in METALOG file as it's not relavant
220		for k in pairs(l.attrs) do
221			if ignore_tags and k == 'tags' then goto continue end
222			if l.attrs[k] ~= o.attrs[k] and o.attrs[k] ~= nil then
223				return false, k
224			end
225			::continue::
226		end
227		return true
228	end
229	for _, v in ipairs(rows) do
230		local bol, offby = __eq(v, rows[1])
231		if not bol then return false, offby end
232	end
233	return true
234end
235
236--- @param tagstr string
237function pkgname_from_tag(tagstr)
238	local ext, pkgname, pkgend = '', '', ''
239	for seg in tagstr:gmatch('[^,]+') do
240		if seg:match('package=') then
241			pkgname = seg:sub(9)
242		elseif seg == 'development' or seg == 'profile'
243			or seg == 'debug' or seg == 'docs' then
244			pkgend = seg
245		else
246			ext = ext == '' and seg or ext..'-'..seg
247		end
248	end
249	pkgname = pkgname
250		..(ext == '' and '' or '-'..ext)
251		..(pkgend == '' and '' or '-'..pkgend)
252	return pkgname
253end
254
255--- @class Analysis_session
256--- @param metalog string
257--- @param verbose boolean
258--- @param w_notagdirs boolean turn on to also check directories
259function Analysis_session(metalog, verbose, w_notagdirs)
260	local stage_root = {}
261	local files = {} -- map<string, MetalogRow[]>
262	-- set is map<elem, bool>. if bool is true then elem exists
263	local pkgs = {} -- map<string, set<string>>
264	----- used to keep track of files not belonging to a pkg. not used so
265	----- it is commented with -----
266	-----local nopkg = {} --            set<string>
267	--- @public
268	local swarn = {}
269	--- @public
270	local serrs = {}
271
272	-- returns number of files in package and size of package
273	-- nil is  returned upon errors
274	--- @param pkgname string
275	local function pkg_size(pkgname)
276		local filecount, sz = 0, 0
277		for filename in pairs(pkgs[pkgname]) do
278			local rows = files[filename]
279			-- normally, there should be only one row per filename
280			-- if these rows are equal, there should be warning, but it
281			-- does not affect size counting. if not, it is an error
282			if #rows > 1 and not metalogrows_all_equal(rows) then
283				return nil
284			end
285			local row = rows[1]
286			if row.attrs.type == 'file' then
287				sz = sz + tonumber(row.attrs.size)
288			end
289			filecount = filecount + 1
290		end
291		return filecount, sz
292	end
293
294	--- @param pkgname string
295	--- @param mode number
296	local function pkg_ismode(pkgname, mode)
297		for filename in pairs(pkgs[pkgname]) do
298			for _, row in ipairs(files[filename]) do
299				if tonumber(row.attrs.mode, 8) & mode ~= 0 then
300					return true
301				end
302			end
303		end
304		return false
305	end
306
307	--- @param pkgname string
308	--- @public
309	local function pkg_issetuid(pkgname)
310		return pkg_ismode(pkgname, 2048)
311	end
312
313	--- @param pkgname string
314	--- @public
315	local function pkg_issetgid(pkgname)
316		return pkg_ismode(pkgname, 1024)
317	end
318
319	--- @param pkgname string
320	--- @public
321	local function pkg_issetid(pkgname)
322		return pkg_issetuid(pkgname) or pkg_issetgid(pkgname)
323	end
324
325	-- sample return:
326	-- { [*string]: { count=1, size=2, issetuid=true, issetgid=true } }
327	local function pkg_report_helper_table()
328		local res = {}
329		for pkgname in pairs(pkgs) do
330			res[pkgname] = {}
331			res[pkgname].count,
332			res[pkgname].size = pkg_size(pkgname)
333			res[pkgname].issetuid = pkg_issetuid(pkgname)
334			res[pkgname].issetgid = pkg_issetgid(pkgname)
335		end
336		return res
337	end
338
339	-- returns a string describing package scan report
340	--- @public
341	local function pkg_report_full()
342		local sb = {}
343		for pkgname, v in sortedPairs(pkg_report_helper_table()) do
344			sb[#sb+1] = 'Package '..pkgname..':'
345			if v.issetuid or v.issetgid then
346				sb[#sb+1] = ''..table.concat({
347					v.issetuid and ' setuid' or '',
348					v.issetgid and ' setgid' or '' }, '')
349			end
350			sb[#sb+1] = '\n  number of files: '..(v.count or '?')
351				..'\n  total size: '..(v.size or '?')
352			sb[#sb+1] = '\n'
353		end
354		return table.concat(sb, '')
355	end
356
357	--- @param have_count boolean
358	--- @param have_size boolean
359	--- @param filters function[]
360	--- @public
361	-- returns a string describing package size report.
362	-- sample: "mypackage 2 2048"* if both booleans are true
363	local function pkg_report_simple(have_count, have_size, filters)
364		filters = filters or {}
365		local sb = {}
366		for pkgname, v in sortedPairs(pkg_report_helper_table()) do
367			local pred = true
368			-- doing a foldl to all the function results with (and)
369			for _, f in pairs(filters) do pred = pred and f(pkgname) end
370			if pred then
371				sb[#sb+1] = pkgname..table.concat({
372					have_count and (' '..(v.count or '?')) or '',
373					have_size and (' '..(v.size or '?')) or ''}, '')
374					..'\n'
375			end
376		end
377		return table.concat(sb, '')
378	end
379
380	-- returns a string describing duplicate file warnings,
381	-- returns a string describing duplicate file errors
382	--- @public
383	local function dup_report()
384		local warn, errs = {}, {}
385		for filename, rows in sortedPairs(files) do
386			if #rows == 1 then goto continue end
387			local iseq, offby = metalogrows_all_equal(rows)
388			if iseq then -- repeated line, just a warning
389				warn[#warn+1] = 'warning: '..filename
390					.. ' ' .. rows[1].attrs.type
391					..' repeated with same meta: line '
392					..table.concat(
393						table_map(rows, function(e) return e.linenum end), ',')
394				warn[#warn+1] = '\n'
395			elseif not metalogrows_all_equal(rows, false, true) then
396			-- same filename (possibly different tags), different metadata, an error
397				errs[#errs+1] = 'error: '..filename
398					..' exists in multiple locations and with different meta: line '
399					..table.concat(
400						table_map(rows, function(e) return e.linenum end), ',')
401					..'. off by "'..offby..'"'
402				errs[#errs+1] = '\n'
403			end
404			::continue::
405		end
406		return table.concat(warn, ''), table.concat(errs, '')
407	end
408
409	-- returns a string describing warnings of found hard links
410	-- returns a string describing errors of found hard links
411	--- @public
412	local function inode_report()
413		-- obtain inodes of filenames
414		local attributes = require('lfs').attributes
415		local inm = {} -- map<number, string[]>
416		local unstatables = {} -- string[]
417		for filename in pairs(files) do
418			-- i only took the first row of a filename,
419			-- and skip links and folders
420			if files[filename][1].attrs.type ~= 'file' then
421				goto continue
422			end
423			local fs = attributes(stage_root .. filename)
424			if fs == nil then
425				unstatables[#unstatables+1] = filename
426				goto continue
427			end
428			local inode = fs.ino
429			inm[inode] = inm[inode] or {}
430			table.insert(inm[inode], filename)
431			::continue::
432		end
433
434		local warn, errs = {}, {}
435		for _, filenames in pairs(inm) do
436			if #filenames == 1 then goto continue end
437			-- i only took the first row of a filename
438			local rows = table_map(filenames, function(e)
439				return files[e][1]
440			end)
441			local iseq, offby = metalogrows_all_equal(rows, true, true)
442			if not iseq then
443				errs[#errs+1] = 'error: '
444					..'entries point to the same inode but have different meta: '
445					..table.concat(filenames, ',')..' in line '
446					..table.concat(
447						table_map(rows, function(e) return e.linenum end), ',')
448					..'. off by "'..offby..'"'
449				errs[#errs+1] = '\n'
450			end
451			::continue::
452		end
453
454		if #unstatables > 0 then
455			warn[#warn+1] = verbose and
456				'note: skipped checking inodes: '..table.concat(unstatables, ',')..'\n'
457				or
458				'note: skipped checking inodes for '..#unstatables..' entries\n'
459		end
460
461		return table.concat(warn, ''), table.concat(errs, '')
462	end
463
464	-- The METALOG file is assumed to be at the top of the stage directory.
465	stage_root = string.gsub(metalog, '/[^/]*$', '/')
466
467	do
468	local fp, errmsg, errcode = io.open(metalog, 'r')
469	if fp == nil then
470		io.stderr:write('cannot open '..metalog..': '..errmsg..': '..errcode..'\n')
471		os.exit(1)
472	end
473
474	-- scan all lines and put file data into the dictionaries
475	local firsttimes = {} -- set<string>
476	local lineno = 0
477	for line in fp:lines() do
478		-----local isinpkg = false
479		lineno = lineno + 1
480		-- skip lines beginning with #
481		if line:match('^%s*#') then goto continue end
482		-- skip blank lines
483		if line:match('^%s*$') then goto continue end
484
485		local data = MetalogRow(line, lineno)
486		-- entries with dir and no tags... ignore for the first time
487		if not w_notagdirs and
488			data.attrs.tags == nil and data.attrs.type == 'dir'
489			and not firsttimes[data.filename] then
490			firsttimes[data.filename] = true
491			goto continue
492		end
493
494		files[data.filename] = files[data.filename] or {}
495		table.insert(files[data.filename], data)
496
497		if data.attrs.tags ~= nil then
498			pkgname = pkgname_from_tag(data.attrs.tags)
499			pkgs[pkgname] = pkgs[pkgname] or {}
500			pkgs[pkgname][data.filename] = true
501			------isinpkg = true
502		end
503		-----if not isinpkg then nopkg[data.filename] = true end
504		::continue::
505	end
506
507	fp:close()
508	end
509
510	return {
511		warn = swarn,
512		errs = serrs,
513		pkg_issetuid = pkg_issetuid,
514		pkg_issetgid = pkg_issetgid,
515		pkg_issetid = pkg_issetid,
516		pkg_report_full = pkg_report_full,
517		pkg_report_simple = pkg_report_simple,
518		dup_report = dup_report,
519		inode_report = inode_report
520	}
521end
522
523main(arg)
524