xref: /titanic_41/usr/src/lib/libshell/common/scripts/simplefileattributetree1.sh (revision 835ee2195df075073d2670eb95a6eab413d6c789)
1#!/usr/bin/ksh93
2
3#
4# CDDL HEADER START
5#
6# The contents of this file are subject to the terms of the
7# Common Development and Distribution License (the "License").
8# You may not use this file except in compliance with the License.
9#
10# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11# or http://www.opensolaris.org/os/licensing.
12# See the License for the specific language governing permissions
13# and limitations under the License.
14#
15# When distributing Covered Code, include this CDDL HEADER in each
16# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17# If applicable, add the following below this CDDL HEADER, with the
18# fields enclosed by brackets "[]" replaced with your own identifying
19# information: Portions Copyright [yyyy] [name of copyright owner]
20#
21# CDDL HEADER END
22#
23
24#
25# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
26# Use is subject to license terms.
27#
28
29#
30# simplefileattributetree1 - build a simple file tree (including file attributes)
31#
32
33# Solaris needs /usr/xpg6/bin:/usr/xpg4/bin because the tools in /usr/bin are not POSIX-conformant
34export PATH=/usr/xpg6/bin:/usr/xpg4/bin:/bin:/usr/bin
35
36# Make sure all math stuff runs in the "C" locale to avoid problems
37# with alternative # radix point representations (e.g. ',' instead of
38# '.' in de_DE.*-locales). This needs to be set _before_ any
39# floating-point constants are defined in this script).
40if [[ "${LC_ALL}" != "" ]] ; then
41    export \
42        LC_MONETARY="${LC_ALL}" \
43        LC_MESSAGES="${LC_ALL}" \
44        LC_COLLATE="${LC_ALL}" \
45        LC_CTYPE="${LC_ALL}"
46        unset LC_ALL
47fi
48export LC_NUMERIC=C
49
50
51function add_file_to_tree
52{
53	typeset treename=$1
54	typeset filename=$2
55	nameref destnodename=$3
56	integer i
57	typeset nodepath # full name of compound variable
58	typeset -a pe # path elements
59
60	# first built an array containing the names of each path element
61	# (e.g. "foo/var/baz"" results in an array containing "( 'foo' 'bar' 'baz' )")
62	typeset IFS='/'
63	pe+=( ${filename} )
64
65	[[ ${pe[0]} == '' ]] && pe[0]='/'
66
67	# walk path described via the "pe" array and build nodes if
68	# there aren't any nodes yet
69	nodepath="${treename}"
70	for (( i=0 ; i < (${#pe[@]}-1) ; i++ )) ; do
71		nameref x="${nodepath}"
72		[[ ! -v x.node ]] && compound -A x.nodes
73
74		nodepath+=".nodes[${pe[i]}]"
75	done
76
77	# insert element
78	nameref node="${nodepath}"
79	[[ ! -v node.elements ]] && compound -A node.elements
80	node.elements[${pe[i]}]=(
81		filepath="${filename}"
82	)
83
84	destnodename="${!node}.elements[${pe[i]}]"
85
86	return 0
87}
88
89function parse_findls
90{
91	nameref out=$1
92	typeset str="$2"
93
94	# find -ls on Solaris uses the following output format by default:
95	#604302    3 -rw-r--r--   1 test001  users        2678 May  9 00:46 ./httpsresdump
96
97	integer out.inodenum="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\1}"
98	integer out.kbblocks="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\2}"
99	typeset out.mode="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\3}"
100	integer out.numlinks="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\4}"
101	compound out.owner=(
102		typeset user="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\5}"
103		typeset group="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\6}"
104	)
105	integer out.filesize="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\7}"
106	typeset out.date="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\8}"
107	typeset out.filepath="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\9}"
108
109	return 0
110}
111
112function usage
113{
114	OPTIND=0
115	getopts -a "${progname}" "${simplefileattributetree1_usage}" OPT '-?'
116	exit 2
117}
118
119# main
120builtin basename
121builtin dirname
122
123set -o noglob
124set -o nounset
125
126# tree base
127compound filetree
128
129# benchmark data
130compound bench=(
131	float start
132	float stop
133)
134
135compound appconfig=(
136	typeset do_benchmarking=false
137	compound do_record=(
138		typeset content=false
139		typeset filetype=false
140	)
141)
142
143
144integer i
145
146typeset progname="${ basename "${0}" ; }"
147
148typeset -r simplefileattributetree1_usage=$'+
149[-?\n@(#)\$Id: simplefileattributetree1 (Roland Mainz) 2009-06-26 \$\n]
150[-author?Roland Mainz <roland.mainz@nrubsig.org>]
151[+NAME?simplefileattributetree1 - generate compound variable tree which contains file names and their attributes]
152[+DESCRIPTION?\bsimplefileattributetree1\b is a simple variable tree
153	demo which builds a compound variable tree based on the output
154	of /usr/xpg4/bin/file which contains the file name, the file attributes
155	and optionally file type and content]
156[b:benchmark?Print time needed to generate the tree.]
157[c:includecontent?Include the file\'s content in the tree, split into 1kb blocks.]
158[t:includefiletype?Include the file type (output of /usr/xpg4/bin/file).]
159
160path
161
162[+SEE ALSO?\bksh93\b(1), \bfile\b(1), \bfind\b(1)]
163'
164
165while getopts -a "${progname}" "${simplefileattributetree1_usage}" OPT ; do
166#	printmsg "## OPT=|${OPT}|, OPTARG=|${OPTARG}|"
167	case ${OPT} in
168		b)	appconfig.do_benchmarking="true"	;;
169		+b)	appconfig.do_benchmarking="false"	;;
170		c)	appconfig.do_record.content="true"	;;
171		+c)	appconfig.do_record.content="false"	;;
172		t)	appconfig.do_record.filetype="true"	;;
173		+t)	appconfig.do_record.filetype="false"	;;
174		*)	usage ;;
175	esac
176done
177shift $((OPTIND-1))
178
179
180# argument prechecks
181if (( $# == 0 )) ; then
182	print -u2 -f "%s: Missing <path> argument.\n" "${progname}"
183	exit 1
184fi
185
186
187print -u2 -f "# reading file names...\n"
188while (( $# > 0 )) ; do
189	# "ulimit -c 0" use used to force ksh93 to use a seperate process for subshells,
190	# this is used to work around a bug with LC_ALL changes bleeding through subshells
191	IFS=$'\n' ; typeset -a findls_lines=( $(ulimit -c 0 ; LC_ALL=C find "$1" -type f -ls) ) ; IFS=$' \t\n'
192	shift
193done
194
195
196print -u2 -f "# building tree...\n"
197
198${appconfig.do_benchmarking} && (( bench.start=SECONDS ))
199
200for (( i=0 ; i < ${#findls_lines[@]} ; i++ )) ; do
201	compound parseddata
202	typeset treenodename
203
204	# parse "find -ls" output
205	parse_findls parseddata "${findls_lines[i]}"
206
207	# add node to tree and return it's absolute name in "treenodename"
208	add_file_to_tree filetree "${parseddata.filepath}" treenodename
209
210	# merge parsed "find -ls" output into tree node
211	nameref treenode="${treenodename}"
212	treenode+=parseddata
213
214	# extras (calculated from the existing values in "parseddata")
215	typeset treenode.dirname="${ dirname "${treenode.filepath}" ; }"
216	typeset treenode.basename="${ basename "${treenode.filepath}" ; }"
217
218	if ${appconfig.do_record.filetype} ; then
219		# Using /usr/(xpg4/)*/bin/file requires a |fork()|+|exec()| which makes the script a few hundred times slower... ;-(
220		typeset treenode.filetype="$(file "${treenode.filepath}")"
221	fi
222
223	if ${appconfig.do_record.content} ; then
224		if [[ -r "${treenode.filepath}" ]] ; then
225			# We use an array of compound variables here to support
226			# files with holes (and later alternative streams, too)
227			compound -a treenode.content
228			integer cl=0
229			while \
230				{
231					treenode.content[${cl}]=(
232						typeset type="data" # (todo: "add support for "holes" (sparse files))
233						typeset -b bin
234					)
235					read -n1024 treenode.content[${cl}].bin
236				} ; do
237				(( cl++ ))
238			done < "${treenode.filepath}"
239			unset treenode.content[${cl}]
240
241			typeset -A treenode.hashsum=(
242				[md5]="$(sum -x md5 < "${treenode.filepath}")"
243				[sha512]="$(sum -x sha512 < "${treenode.filepath}")"
244			)
245
246			# we do this for internal debugging only
247			if [[ "${ {
248					integer j
249					for (( j=0 ; j < ${#treenode.content[@]} ; j++ )) ; do
250						printf "%B" treenode.content[$j].bin
251					done
252				} | sum -x sha512 ; }" != "${treenode.hashsum[sha512]}" ]] ; then
253				# this should never happen...
254				print -u2 -f "fatal hash mismatch for %s\n" "${treenode.filepath}"
255				unset treenode.content treenode.hashsum
256			fi
257		fi
258	fi
259done
260
261${appconfig.do_benchmarking} && (( bench.stop=SECONDS ))
262
263
264if ${appconfig.do_benchmarking} ; then
265	# print benchmark data
266	print -u2 -f "# time used: %f\n" $((bench.stop - bench.start))
267fi
268
269# print variable tree
270print -v filetree
271
272exit 0
273# EOF.
274