xref: /titanic_50/usr/src/lib/libshell/common/scripts/simplefileattributetree1.sh (revision 3e14f97f673e8a630f076077de35afdd43dc1587)
134f9b3eeSRoland Mainz#!/usr/bin/ksh93
234f9b3eeSRoland Mainz
334f9b3eeSRoland Mainz#
434f9b3eeSRoland Mainz# CDDL HEADER START
534f9b3eeSRoland Mainz#
634f9b3eeSRoland Mainz# The contents of this file are subject to the terms of the
734f9b3eeSRoland Mainz# Common Development and Distribution License (the "License").
834f9b3eeSRoland Mainz# You may not use this file except in compliance with the License.
934f9b3eeSRoland Mainz#
1034f9b3eeSRoland Mainz# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
1134f9b3eeSRoland Mainz# or http://www.opensolaris.org/os/licensing.
1234f9b3eeSRoland Mainz# See the License for the specific language governing permissions
1334f9b3eeSRoland Mainz# and limitations under the License.
1434f9b3eeSRoland Mainz#
1534f9b3eeSRoland Mainz# When distributing Covered Code, include this CDDL HEADER in each
1634f9b3eeSRoland Mainz# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1734f9b3eeSRoland Mainz# If applicable, add the following below this CDDL HEADER, with the
1834f9b3eeSRoland Mainz# fields enclosed by brackets "[]" replaced with your own identifying
1934f9b3eeSRoland Mainz# information: Portions Copyright [yyyy] [name of copyright owner]
2034f9b3eeSRoland Mainz#
2134f9b3eeSRoland Mainz# CDDL HEADER END
2234f9b3eeSRoland Mainz#
2334f9b3eeSRoland Mainz
2434f9b3eeSRoland Mainz#
25*3e14f97fSRoger A. Faulkner# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
2634f9b3eeSRoland Mainz#
2734f9b3eeSRoland Mainz
2834f9b3eeSRoland Mainz#
2934f9b3eeSRoland Mainz# simplefileattributetree1 - build a simple file tree (including file attributes)
3034f9b3eeSRoland Mainz#
3134f9b3eeSRoland Mainz
3234f9b3eeSRoland Mainz# Solaris needs /usr/xpg6/bin:/usr/xpg4/bin because the tools in /usr/bin are not POSIX-conformant
3334f9b3eeSRoland Mainzexport PATH=/usr/xpg6/bin:/usr/xpg4/bin:/bin:/usr/bin
3434f9b3eeSRoland Mainz
3534f9b3eeSRoland Mainz# Make sure all math stuff runs in the "C" locale to avoid problems
3634f9b3eeSRoland Mainz# with alternative # radix point representations (e.g. ',' instead of
3734f9b3eeSRoland Mainz# '.' in de_DE.*-locales). This needs to be set _before_ any
3834f9b3eeSRoland Mainz# floating-point constants are defined in this script).
3934f9b3eeSRoland Mainzif [[ "${LC_ALL}" != "" ]] ; then
4034f9b3eeSRoland Mainz    export \
4134f9b3eeSRoland Mainz        LC_MONETARY="${LC_ALL}" \
4234f9b3eeSRoland Mainz        LC_MESSAGES="${LC_ALL}" \
4334f9b3eeSRoland Mainz        LC_COLLATE="${LC_ALL}" \
4434f9b3eeSRoland Mainz        LC_CTYPE="${LC_ALL}"
4534f9b3eeSRoland Mainz        unset LC_ALL
4634f9b3eeSRoland Mainzfi
4734f9b3eeSRoland Mainzexport LC_NUMERIC=C
4834f9b3eeSRoland Mainz
4934f9b3eeSRoland Mainz
5034f9b3eeSRoland Mainzfunction add_file_to_tree
5134f9b3eeSRoland Mainz{
5234f9b3eeSRoland Mainz	typeset treename=$1
5334f9b3eeSRoland Mainz	typeset filename=$2
5434f9b3eeSRoland Mainz	nameref destnodename=$3
5534f9b3eeSRoland Mainz	integer i
5634f9b3eeSRoland Mainz	typeset nodepath # full name of compound variable
5734f9b3eeSRoland Mainz	typeset -a pe # path elements
5834f9b3eeSRoland Mainz
5934f9b3eeSRoland Mainz	# first built an array containing the names of each path element
6034f9b3eeSRoland Mainz	# (e.g. "foo/var/baz"" results in an array containing "( 'foo' 'bar' 'baz' )")
6134f9b3eeSRoland Mainz	typeset IFS='/'
6234f9b3eeSRoland Mainz	pe+=( ${filename} )
6334f9b3eeSRoland Mainz
6434f9b3eeSRoland Mainz	[[ ${pe[0]} == '' ]] && pe[0]='/'
6534f9b3eeSRoland Mainz
6634f9b3eeSRoland Mainz	# walk path described via the "pe" array and build nodes if
6734f9b3eeSRoland Mainz	# there aren't any nodes yet
6834f9b3eeSRoland Mainz	nodepath="${treename}"
6934f9b3eeSRoland Mainz	for (( i=0 ; i < (${#pe[@]}-1) ; i++ )) ; do
7034f9b3eeSRoland Mainz		nameref x="${nodepath}"
71*3e14f97fSRoger A. Faulkner
72*3e14f97fSRoger A. Faulkner		# [[ -v ]] does not work for arrays because [[ -v ar ]]
73*3e14f97fSRoger A. Faulkner		# is equal to [[ -v ar[0] ]]. In this case we can
74*3e14f97fSRoger A. Faulkner		# use the output of typeset +p x.nodes
75*3e14f97fSRoger A. Faulkner		[[ "${ typeset +p x.nodes ; }" == "" ]] && compound -A x.nodes
7634f9b3eeSRoland Mainz
7734f9b3eeSRoland Mainz		nodepath+=".nodes[${pe[i]}]"
7834f9b3eeSRoland Mainz	done
7934f9b3eeSRoland Mainz
8034f9b3eeSRoland Mainz	# insert element
8134f9b3eeSRoland Mainz	nameref node="${nodepath}"
82*3e14f97fSRoger A. Faulkner	[[ "${ typeset +p node.elements ; }" == "" ]] && compound -A node.elements
8334f9b3eeSRoland Mainz	node.elements[${pe[i]}]=(
8434f9b3eeSRoland Mainz		filepath="${filename}"
8534f9b3eeSRoland Mainz	)
8634f9b3eeSRoland Mainz
8734f9b3eeSRoland Mainz	destnodename="${!node}.elements[${pe[i]}]"
8834f9b3eeSRoland Mainz
8934f9b3eeSRoland Mainz	return 0
9034f9b3eeSRoland Mainz}
9134f9b3eeSRoland Mainz
9234f9b3eeSRoland Mainzfunction parse_findls
9334f9b3eeSRoland Mainz{
9434f9b3eeSRoland Mainz	nameref out=$1
9534f9b3eeSRoland Mainz	typeset str="$2"
9634f9b3eeSRoland Mainz
9734f9b3eeSRoland Mainz	# find -ls on Solaris uses the following output format by default:
9834f9b3eeSRoland Mainz	#604302    3 -rw-r--r--   1 test001  users        2678 May  9 00:46 ./httpsresdump
9934f9b3eeSRoland Mainz
10034f9b3eeSRoland Mainz	integer out.inodenum="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\1}"
10134f9b3eeSRoland Mainz	integer out.kbblocks="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\2}"
10234f9b3eeSRoland Mainz	typeset out.mode="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\3}"
10334f9b3eeSRoland Mainz	integer out.numlinks="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\4}"
10434f9b3eeSRoland Mainz	compound out.owner=(
10534f9b3eeSRoland Mainz		typeset user="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\5}"
10634f9b3eeSRoland Mainz		typeset group="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\6}"
10734f9b3eeSRoland Mainz	)
10834f9b3eeSRoland Mainz	integer out.filesize="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\7}"
10934f9b3eeSRoland Mainz	typeset out.date="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\8}"
11034f9b3eeSRoland Mainz	typeset out.filepath="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\9}"
11134f9b3eeSRoland Mainz
11234f9b3eeSRoland Mainz	return 0
11334f9b3eeSRoland Mainz}
11434f9b3eeSRoland Mainz
11534f9b3eeSRoland Mainzfunction usage
11634f9b3eeSRoland Mainz{
11734f9b3eeSRoland Mainz	OPTIND=0
11834f9b3eeSRoland Mainz	getopts -a "${progname}" "${simplefileattributetree1_usage}" OPT '-?'
11934f9b3eeSRoland Mainz	exit 2
12034f9b3eeSRoland Mainz}
12134f9b3eeSRoland Mainz
12234f9b3eeSRoland Mainz# main
12334f9b3eeSRoland Mainzbuiltin basename
12434f9b3eeSRoland Mainzbuiltin dirname
12534f9b3eeSRoland Mainz
12634f9b3eeSRoland Mainzset -o noglob
12734f9b3eeSRoland Mainzset -o nounset
12834f9b3eeSRoland Mainz
12934f9b3eeSRoland Mainz# tree base
13034f9b3eeSRoland Mainzcompound filetree
13134f9b3eeSRoland Mainz
13234f9b3eeSRoland Mainz# benchmark data
13334f9b3eeSRoland Mainzcompound bench=(
13434f9b3eeSRoland Mainz	float start
13534f9b3eeSRoland Mainz	float stop
13634f9b3eeSRoland Mainz)
13734f9b3eeSRoland Mainz
13834f9b3eeSRoland Mainzcompound appconfig=(
13934f9b3eeSRoland Mainz	typeset do_benchmarking=false
14034f9b3eeSRoland Mainz	compound do_record=(
14134f9b3eeSRoland Mainz		typeset content=false
14234f9b3eeSRoland Mainz		typeset filetype=false
14334f9b3eeSRoland Mainz	)
14434f9b3eeSRoland Mainz)
14534f9b3eeSRoland Mainz
14634f9b3eeSRoland Mainz
14734f9b3eeSRoland Mainzinteger i
14834f9b3eeSRoland Mainz
14934f9b3eeSRoland Mainztypeset progname="${ basename "${0}" ; }"
15034f9b3eeSRoland Mainz
15134f9b3eeSRoland Mainztypeset -r simplefileattributetree1_usage=$'+
152*3e14f97fSRoger A. Faulkner[-?\n@(#)\$Id: simplefileattributetree1 (Roland Mainz) 2010-03-27 \$\n]
15334f9b3eeSRoland Mainz[-author?Roland Mainz <roland.mainz@nrubsig.org>]
15434f9b3eeSRoland Mainz[+NAME?simplefileattributetree1 - generate compound variable tree which contains file names and their attributes]
15534f9b3eeSRoland Mainz[+DESCRIPTION?\bsimplefileattributetree1\b is a simple variable tree
15634f9b3eeSRoland Mainz	demo which builds a compound variable tree based on the output
15734f9b3eeSRoland Mainz	of /usr/xpg4/bin/file which contains the file name, the file attributes
15834f9b3eeSRoland Mainz	and optionally file type and content]
15934f9b3eeSRoland Mainz[b:benchmark?Print time needed to generate the tree.]
16034f9b3eeSRoland Mainz[c:includecontent?Include the file\'s content in the tree, split into 1kb blocks.]
16134f9b3eeSRoland Mainz[t:includefiletype?Include the file type (output of /usr/xpg4/bin/file).]
16234f9b3eeSRoland Mainz
16334f9b3eeSRoland Mainzpath
16434f9b3eeSRoland Mainz
16534f9b3eeSRoland Mainz[+SEE ALSO?\bksh93\b(1), \bfile\b(1), \bfind\b(1)]
16634f9b3eeSRoland Mainz'
16734f9b3eeSRoland Mainz
16834f9b3eeSRoland Mainzwhile getopts -a "${progname}" "${simplefileattributetree1_usage}" OPT ; do
16934f9b3eeSRoland Mainz#	printmsg "## OPT=|${OPT}|, OPTARG=|${OPTARG}|"
17034f9b3eeSRoland Mainz	case ${OPT} in
17134f9b3eeSRoland Mainz		b)	appconfig.do_benchmarking="true"	;;
17234f9b3eeSRoland Mainz		+b)	appconfig.do_benchmarking="false"	;;
17334f9b3eeSRoland Mainz		c)	appconfig.do_record.content="true"	;;
17434f9b3eeSRoland Mainz		+c)	appconfig.do_record.content="false"	;;
17534f9b3eeSRoland Mainz		t)	appconfig.do_record.filetype="true"	;;
17634f9b3eeSRoland Mainz		+t)	appconfig.do_record.filetype="false"	;;
17734f9b3eeSRoland Mainz		*)	usage ;;
17834f9b3eeSRoland Mainz	esac
17934f9b3eeSRoland Mainzdone
18034f9b3eeSRoland Mainzshift $((OPTIND-1))
18134f9b3eeSRoland Mainz
18234f9b3eeSRoland Mainz
18334f9b3eeSRoland Mainz# argument prechecks
18434f9b3eeSRoland Mainzif (( $# == 0 )) ; then
18534f9b3eeSRoland Mainz	print -u2 -f "%s: Missing <path> argument.\n" "${progname}"
18634f9b3eeSRoland Mainz	exit 1
18734f9b3eeSRoland Mainzfi
18834f9b3eeSRoland Mainz
18934f9b3eeSRoland Mainz
19034f9b3eeSRoland Mainzprint -u2 -f "# reading file names...\n"
19134f9b3eeSRoland Mainzwhile (( $# > 0 )) ; do
19234f9b3eeSRoland Mainz	# "ulimit -c 0" use used to force ksh93 to use a seperate process for subshells,
19334f9b3eeSRoland Mainz	# this is used to work around a bug with LC_ALL changes bleeding through subshells
19434f9b3eeSRoland Mainz	IFS=$'\n' ; typeset -a findls_lines=( $(ulimit -c 0 ; LC_ALL=C find "$1" -type f -ls) ) ; IFS=$' \t\n'
19534f9b3eeSRoland Mainz	shift
19634f9b3eeSRoland Mainzdone
19734f9b3eeSRoland Mainz
19834f9b3eeSRoland Mainz
19934f9b3eeSRoland Mainzprint -u2 -f "# building tree...\n"
20034f9b3eeSRoland Mainz
20134f9b3eeSRoland Mainz${appconfig.do_benchmarking} && (( bench.start=SECONDS ))
20234f9b3eeSRoland Mainz
20334f9b3eeSRoland Mainzfor (( i=0 ; i < ${#findls_lines[@]} ; i++ )) ; do
20434f9b3eeSRoland Mainz	compound parseddata
20534f9b3eeSRoland Mainz	typeset treenodename
20634f9b3eeSRoland Mainz
20734f9b3eeSRoland Mainz	# parse "find -ls" output
20834f9b3eeSRoland Mainz	parse_findls parseddata "${findls_lines[i]}"
20934f9b3eeSRoland Mainz
21034f9b3eeSRoland Mainz	# add node to tree and return it's absolute name in "treenodename"
21134f9b3eeSRoland Mainz	add_file_to_tree filetree "${parseddata.filepath}" treenodename
21234f9b3eeSRoland Mainz
21334f9b3eeSRoland Mainz	# merge parsed "find -ls" output into tree node
21434f9b3eeSRoland Mainz	nameref treenode="${treenodename}"
21534f9b3eeSRoland Mainz	treenode+=parseddata
21634f9b3eeSRoland Mainz
21734f9b3eeSRoland Mainz	# extras (calculated from the existing values in "parseddata")
21834f9b3eeSRoland Mainz	typeset treenode.dirname="${ dirname "${treenode.filepath}" ; }"
21934f9b3eeSRoland Mainz	typeset treenode.basename="${ basename "${treenode.filepath}" ; }"
22034f9b3eeSRoland Mainz
22134f9b3eeSRoland Mainz	if ${appconfig.do_record.filetype} ; then
22234f9b3eeSRoland Mainz		# Using /usr/(xpg4/)*/bin/file requires a |fork()|+|exec()| which makes the script a few hundred times slower... ;-(
22334f9b3eeSRoland Mainz		typeset treenode.filetype="$(file "${treenode.filepath}")"
22434f9b3eeSRoland Mainz	fi
22534f9b3eeSRoland Mainz
22634f9b3eeSRoland Mainz	if ${appconfig.do_record.content} ; then
22734f9b3eeSRoland Mainz		if [[ -r "${treenode.filepath}" ]] ; then
22834f9b3eeSRoland Mainz			# We use an array of compound variables here to support
22934f9b3eeSRoland Mainz			# files with holes (and later alternative streams, too)
23034f9b3eeSRoland Mainz			compound -a treenode.content
23134f9b3eeSRoland Mainz			integer cl=0
23234f9b3eeSRoland Mainz			while \
23334f9b3eeSRoland Mainz				{
23434f9b3eeSRoland Mainz					treenode.content[${cl}]=(
23534f9b3eeSRoland Mainz						typeset type="data" # (todo: "add support for "holes" (sparse files))
23634f9b3eeSRoland Mainz						typeset -b bin
23734f9b3eeSRoland Mainz					)
23834f9b3eeSRoland Mainz					read -n1024 treenode.content[${cl}].bin
23934f9b3eeSRoland Mainz				} ; do
24034f9b3eeSRoland Mainz				(( cl++ ))
24134f9b3eeSRoland Mainz			done < "${treenode.filepath}"
24234f9b3eeSRoland Mainz			unset treenode.content[${cl}]
24334f9b3eeSRoland Mainz
24434f9b3eeSRoland Mainz			typeset -A treenode.hashsum=(
24534f9b3eeSRoland Mainz				[md5]="$(sum -x md5 < "${treenode.filepath}")"
24634f9b3eeSRoland Mainz				[sha512]="$(sum -x sha512 < "${treenode.filepath}")"
24734f9b3eeSRoland Mainz			)
24834f9b3eeSRoland Mainz
24934f9b3eeSRoland Mainz			# we do this for internal debugging only
25034f9b3eeSRoland Mainz			if [[ "${ {
25134f9b3eeSRoland Mainz					integer j
25234f9b3eeSRoland Mainz					for (( j=0 ; j < ${#treenode.content[@]} ; j++ )) ; do
25334f9b3eeSRoland Mainz						printf "%B" treenode.content[$j].bin
25434f9b3eeSRoland Mainz					done
25534f9b3eeSRoland Mainz				} | sum -x sha512 ; }" != "${treenode.hashsum[sha512]}" ]] ; then
25634f9b3eeSRoland Mainz				# this should never happen...
25734f9b3eeSRoland Mainz				print -u2 -f "fatal hash mismatch for %s\n" "${treenode.filepath}"
25834f9b3eeSRoland Mainz				unset treenode.content treenode.hashsum
25934f9b3eeSRoland Mainz			fi
26034f9b3eeSRoland Mainz		fi
26134f9b3eeSRoland Mainz	fi
26234f9b3eeSRoland Mainzdone
26334f9b3eeSRoland Mainz
26434f9b3eeSRoland Mainz${appconfig.do_benchmarking} && (( bench.stop=SECONDS ))
26534f9b3eeSRoland Mainz
26634f9b3eeSRoland Mainz
26734f9b3eeSRoland Mainzif ${appconfig.do_benchmarking} ; then
26834f9b3eeSRoland Mainz	# print benchmark data
26934f9b3eeSRoland Mainz	print -u2 -f "# time used: %f\n" $((bench.stop - bench.start))
27034f9b3eeSRoland Mainzfi
27134f9b3eeSRoland Mainz
27234f9b3eeSRoland Mainz# print variable tree
27334f9b3eeSRoland Mainzprint -v filetree
27434f9b3eeSRoland Mainz
27534f9b3eeSRoland Mainzexit 0
27634f9b3eeSRoland Mainz# EOF.
277