xref: /titanic_50/usr/src/lib/libshell/common/scripts/simplefileattributetree1.sh (revision 392e836b07e8da771953e4d64233b2abe4393efe)
1#!/usr/bin/ksh93
2
3#
4# CDDL HEADER START
5#
6# The contents of this file are subject to the terms of the
7# Common Development and Distribution License (the "License").
8# You may not use this file except in compliance with the License.
9#
10# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11# or http://www.opensolaris.org/os/licensing.
12# See the License for the specific language governing permissions
13# and limitations under the License.
14#
15# When distributing Covered Code, include this CDDL HEADER in each
16# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17# If applicable, add the following below this CDDL HEADER, with the
18# fields enclosed by brackets "[]" replaced with your own identifying
19# information: Portions Copyright [yyyy] [name of copyright owner]
20#
21# CDDL HEADER END
22#
23
24#
25# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
26#
27
28#
29# simplefileattributetree1 - build a simple file tree (including file attributes)
30#
31
32# Solaris needs /usr/xpg6/bin:/usr/xpg4/bin because the tools in /usr/bin are not POSIX-conformant
33export PATH=/usr/xpg6/bin:/usr/xpg4/bin:/bin:/usr/bin
34
35# Make sure all math stuff runs in the "C" locale to avoid problems
36# with alternative # radix point representations (e.g. ',' instead of
37# '.' in de_DE.*-locales). This needs to be set _before_ any
38# floating-point constants are defined in this script).
39if [[ "${LC_ALL}" != "" ]] ; then
40    export \
41        LC_MONETARY="${LC_ALL}" \
42        LC_MESSAGES="${LC_ALL}" \
43        LC_COLLATE="${LC_ALL}" \
44        LC_CTYPE="${LC_ALL}"
45        unset LC_ALL
46fi
47export LC_NUMERIC=C
48
49
50function add_file_to_tree
51{
52	typeset treename=$1
53	typeset filename=$2
54	nameref destnodename=$3
55	integer i
56	typeset nodepath # full name of compound variable
57	typeset -a pe # path elements
58
59	# first built an array containing the names of each path element
60	# (e.g. "foo/var/baz"" results in an array containing "( 'foo' 'bar' 'baz' )")
61	typeset IFS='/'
62	pe+=( ${filename} )
63
64	[[ ${pe[0]} == '' ]] && pe[0]='/'
65
66	# walk path described via the "pe" array and build nodes if
67	# there aren't any nodes yet
68	nodepath="${treename}"
69	for (( i=0 ; i < (${#pe[@]}-1) ; i++ )) ; do
70		nameref x="${nodepath}"
71
72		# [[ -v ]] does not work for arrays because [[ -v ar ]]
73		# is equal to [[ -v ar[0] ]]. In this case we can
74		# use the output of typeset +p x.nodes
75		[[ "${ typeset +p x.nodes ; }" == "" ]] && compound -A x.nodes
76
77		nodepath+=".nodes[${pe[i]}]"
78	done
79
80	# insert element
81	nameref node="${nodepath}"
82	[[ "${ typeset +p node.elements ; }" == "" ]] && compound -A node.elements
83	node.elements[${pe[i]}]=(
84		filepath="${filename}"
85	)
86
87	destnodename="${!node}.elements[${pe[i]}]"
88
89	return 0
90}
91
92function parse_findls
93{
94	nameref out=$1
95	typeset str="$2"
96
97	# find -ls on Solaris uses the following output format by default:
98	#604302    3 -rw-r--r--   1 test001  users        2678 May  9 00:46 ./httpsresdump
99
100	integer out.inodenum="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\1}"
101	integer out.kbblocks="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\2}"
102	typeset out.mode="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\3}"
103	integer out.numlinks="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\4}"
104	compound out.owner=(
105		typeset user="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\5}"
106		typeset group="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\6}"
107	)
108	integer out.filesize="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\7}"
109	typeset out.date="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\8}"
110	typeset out.filepath="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\9}"
111
112	return 0
113}
114
115function usage
116{
117	OPTIND=0
118	getopts -a "${progname}" "${simplefileattributetree1_usage}" OPT '-?'
119	exit 2
120}
121
122# main
123builtin basename
124builtin dirname
125
126set -o noglob
127set -o nounset
128
129# tree base
130compound filetree
131
132# benchmark data
133compound bench=(
134	float start
135	float stop
136)
137
138compound appconfig=(
139	typeset do_benchmarking=false
140	compound do_record=(
141		typeset content=false
142		typeset filetype=false
143	)
144)
145
146
147integer i
148
149typeset progname="${ basename "${0}" ; }"
150
151typeset -r simplefileattributetree1_usage=$'+
152[-?\n@(#)\$Id: simplefileattributetree1 (Roland Mainz) 2010-03-27 \$\n]
153[-author?Roland Mainz <roland.mainz@nrubsig.org>]
154[+NAME?simplefileattributetree1 - generate compound variable tree which contains file names and their attributes]
155[+DESCRIPTION?\bsimplefileattributetree1\b is a simple variable tree
156	demo which builds a compound variable tree based on the output
157	of /usr/xpg4/bin/file which contains the file name, the file attributes
158	and optionally file type and content]
159[b:benchmark?Print time needed to generate the tree.]
160[c:includecontent?Include the file\'s content in the tree, split into 1kb blocks.]
161[t:includefiletype?Include the file type (output of /usr/xpg4/bin/file).]
162
163path
164
165[+SEE ALSO?\bksh93\b(1), \bfile\b(1), \bfind\b(1)]
166'
167
168while getopts -a "${progname}" "${simplefileattributetree1_usage}" OPT ; do
169#	printmsg "## OPT=|${OPT}|, OPTARG=|${OPTARG}|"
170	case ${OPT} in
171		b)	appconfig.do_benchmarking="true"	;;
172		+b)	appconfig.do_benchmarking="false"	;;
173		c)	appconfig.do_record.content="true"	;;
174		+c)	appconfig.do_record.content="false"	;;
175		t)	appconfig.do_record.filetype="true"	;;
176		+t)	appconfig.do_record.filetype="false"	;;
177		*)	usage ;;
178	esac
179done
180shift $((OPTIND-1))
181
182
183# argument prechecks
184if (( $# == 0 )) ; then
185	print -u2 -f "%s: Missing <path> argument.\n" "${progname}"
186	exit 1
187fi
188
189
190print -u2 -f "# reading file names...\n"
191while (( $# > 0 )) ; do
192	# "ulimit -c 0" use used to force ksh93 to use a seperate process for subshells,
193	# this is used to work around a bug with LC_ALL changes bleeding through subshells
194	IFS=$'\n' ; typeset -a findls_lines=( $(ulimit -c 0 ; LC_ALL=C find "$1" -type f -ls) ) ; IFS=$' \t\n'
195	shift
196done
197
198
199print -u2 -f "# building tree...\n"
200
201${appconfig.do_benchmarking} && (( bench.start=SECONDS ))
202
203for (( i=0 ; i < ${#findls_lines[@]} ; i++ )) ; do
204	compound parseddata
205	typeset treenodename
206
207	# parse "find -ls" output
208	parse_findls parseddata "${findls_lines[i]}"
209
210	# add node to tree and return it's absolute name in "treenodename"
211	add_file_to_tree filetree "${parseddata.filepath}" treenodename
212
213	# merge parsed "find -ls" output into tree node
214	nameref treenode="${treenodename}"
215	treenode+=parseddata
216
217	# extras (calculated from the existing values in "parseddata")
218	typeset treenode.dirname="${ dirname "${treenode.filepath}" ; }"
219	typeset treenode.basename="${ basename "${treenode.filepath}" ; }"
220
221	if ${appconfig.do_record.filetype} ; then
222		# Using /usr/(xpg4/)*/bin/file requires a |fork()|+|exec()| which makes the script a few hundred times slower... ;-(
223		typeset treenode.filetype="$(file "${treenode.filepath}")"
224	fi
225
226	if ${appconfig.do_record.content} ; then
227		if [[ -r "${treenode.filepath}" ]] ; then
228			# We use an array of compound variables here to support
229			# files with holes (and later alternative streams, too)
230			compound -a treenode.content
231			integer cl=0
232			while \
233				{
234					treenode.content[${cl}]=(
235						typeset type="data" # (todo: "add support for "holes" (sparse files))
236						typeset -b bin
237					)
238					read -n1024 treenode.content[${cl}].bin
239				} ; do
240				(( cl++ ))
241			done < "${treenode.filepath}"
242			unset treenode.content[${cl}]
243
244			typeset -A treenode.hashsum=(
245				[md5]="$(sum -x md5 < "${treenode.filepath}")"
246				[sha512]="$(sum -x sha512 < "${treenode.filepath}")"
247			)
248
249			# we do this for internal debugging only
250			if [[ "${ {
251					integer j
252					for (( j=0 ; j < ${#treenode.content[@]} ; j++ )) ; do
253						printf "%B" treenode.content[$j].bin
254					done
255				} | sum -x sha512 ; }" != "${treenode.hashsum[sha512]}" ]] ; then
256				# this should never happen...
257				print -u2 -f "fatal hash mismatch for %s\n" "${treenode.filepath}"
258				unset treenode.content treenode.hashsum
259			fi
260		fi
261	fi
262done
263
264${appconfig.do_benchmarking} && (( bench.stop=SECONDS ))
265
266
267if ${appconfig.do_benchmarking} ; then
268	# print benchmark data
269	print -u2 -f "# time used: %f\n" $((bench.stop - bench.start))
270fi
271
272# print variable tree
273print -v filetree
274
275exit 0
276# EOF.
277