134f9b3eeSRoland Mainz#!/usr/bin/ksh93 234f9b3eeSRoland Mainz 334f9b3eeSRoland Mainz# 434f9b3eeSRoland Mainz# CDDL HEADER START 534f9b3eeSRoland Mainz# 634f9b3eeSRoland Mainz# The contents of this file are subject to the terms of the 734f9b3eeSRoland Mainz# Common Development and Distribution License (the "License"). 834f9b3eeSRoland Mainz# You may not use this file except in compliance with the License. 934f9b3eeSRoland Mainz# 1034f9b3eeSRoland Mainz# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 1134f9b3eeSRoland Mainz# or http://www.opensolaris.org/os/licensing. 1234f9b3eeSRoland Mainz# See the License for the specific language governing permissions 1334f9b3eeSRoland Mainz# and limitations under the License. 1434f9b3eeSRoland Mainz# 1534f9b3eeSRoland Mainz# When distributing Covered Code, include this CDDL HEADER in each 1634f9b3eeSRoland Mainz# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1734f9b3eeSRoland Mainz# If applicable, add the following below this CDDL HEADER, with the 1834f9b3eeSRoland Mainz# fields enclosed by brackets "[]" replaced with your own identifying 1934f9b3eeSRoland Mainz# information: Portions Copyright [yyyy] [name of copyright owner] 2034f9b3eeSRoland Mainz# 2134f9b3eeSRoland Mainz# CDDL HEADER END 2234f9b3eeSRoland Mainz# 2334f9b3eeSRoland Mainz 2434f9b3eeSRoland Mainz# 25*3e14f97fSRoger A. Faulkner# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 2634f9b3eeSRoland Mainz# 2734f9b3eeSRoland Mainz 2834f9b3eeSRoland Mainz# 2934f9b3eeSRoland Mainz# simplefileattributetree1 - build a simple file tree (including file attributes) 3034f9b3eeSRoland Mainz# 3134f9b3eeSRoland Mainz 3234f9b3eeSRoland Mainz# Solaris needs /usr/xpg6/bin:/usr/xpg4/bin because the tools in /usr/bin are not POSIX-conformant 3334f9b3eeSRoland Mainzexport PATH=/usr/xpg6/bin:/usr/xpg4/bin:/bin:/usr/bin 3434f9b3eeSRoland Mainz 3534f9b3eeSRoland Mainz# Make sure all math stuff runs in the "C" locale to avoid problems 3634f9b3eeSRoland Mainz# with alternative # radix point representations (e.g. ',' instead of 3734f9b3eeSRoland Mainz# '.' in de_DE.*-locales). This needs to be set _before_ any 3834f9b3eeSRoland Mainz# floating-point constants are defined in this script). 3934f9b3eeSRoland Mainzif [[ "${LC_ALL}" != "" ]] ; then 4034f9b3eeSRoland Mainz export \ 4134f9b3eeSRoland Mainz LC_MONETARY="${LC_ALL}" \ 4234f9b3eeSRoland Mainz LC_MESSAGES="${LC_ALL}" \ 4334f9b3eeSRoland Mainz LC_COLLATE="${LC_ALL}" \ 4434f9b3eeSRoland Mainz LC_CTYPE="${LC_ALL}" 4534f9b3eeSRoland Mainz unset LC_ALL 4634f9b3eeSRoland Mainzfi 4734f9b3eeSRoland Mainzexport LC_NUMERIC=C 4834f9b3eeSRoland Mainz 4934f9b3eeSRoland Mainz 5034f9b3eeSRoland Mainzfunction add_file_to_tree 5134f9b3eeSRoland Mainz{ 5234f9b3eeSRoland Mainz typeset treename=$1 5334f9b3eeSRoland Mainz typeset filename=$2 5434f9b3eeSRoland Mainz nameref destnodename=$3 5534f9b3eeSRoland Mainz integer i 5634f9b3eeSRoland Mainz typeset nodepath # full name of compound variable 5734f9b3eeSRoland Mainz typeset -a pe # path elements 5834f9b3eeSRoland Mainz 5934f9b3eeSRoland Mainz # first built an array containing the names of each path element 6034f9b3eeSRoland Mainz # (e.g. "foo/var/baz"" results in an array containing "( 'foo' 'bar' 'baz' )") 6134f9b3eeSRoland Mainz typeset IFS='/' 6234f9b3eeSRoland Mainz pe+=( ${filename} ) 6334f9b3eeSRoland Mainz 6434f9b3eeSRoland Mainz [[ ${pe[0]} == '' ]] && pe[0]='/' 6534f9b3eeSRoland Mainz 6634f9b3eeSRoland Mainz # walk path described via the "pe" array and build nodes if 6734f9b3eeSRoland Mainz # there aren't any nodes yet 6834f9b3eeSRoland Mainz nodepath="${treename}" 6934f9b3eeSRoland Mainz for (( i=0 ; i < (${#pe[@]}-1) ; i++ )) ; do 7034f9b3eeSRoland Mainz nameref x="${nodepath}" 71*3e14f97fSRoger A. Faulkner 72*3e14f97fSRoger A. Faulkner # [[ -v ]] does not work for arrays because [[ -v ar ]] 73*3e14f97fSRoger A. Faulkner # is equal to [[ -v ar[0] ]]. In this case we can 74*3e14f97fSRoger A. Faulkner # use the output of typeset +p x.nodes 75*3e14f97fSRoger A. Faulkner [[ "${ typeset +p x.nodes ; }" == "" ]] && compound -A x.nodes 7634f9b3eeSRoland Mainz 7734f9b3eeSRoland Mainz nodepath+=".nodes[${pe[i]}]" 7834f9b3eeSRoland Mainz done 7934f9b3eeSRoland Mainz 8034f9b3eeSRoland Mainz # insert element 8134f9b3eeSRoland Mainz nameref node="${nodepath}" 82*3e14f97fSRoger A. Faulkner [[ "${ typeset +p node.elements ; }" == "" ]] && compound -A node.elements 8334f9b3eeSRoland Mainz node.elements[${pe[i]}]=( 8434f9b3eeSRoland Mainz filepath="${filename}" 8534f9b3eeSRoland Mainz ) 8634f9b3eeSRoland Mainz 8734f9b3eeSRoland Mainz destnodename="${!node}.elements[${pe[i]}]" 8834f9b3eeSRoland Mainz 8934f9b3eeSRoland Mainz return 0 9034f9b3eeSRoland Mainz} 9134f9b3eeSRoland Mainz 9234f9b3eeSRoland Mainzfunction parse_findls 9334f9b3eeSRoland Mainz{ 9434f9b3eeSRoland Mainz nameref out=$1 9534f9b3eeSRoland Mainz typeset str="$2" 9634f9b3eeSRoland Mainz 9734f9b3eeSRoland Mainz # find -ls on Solaris uses the following output format by default: 9834f9b3eeSRoland Mainz #604302 3 -rw-r--r-- 1 test001 users 2678 May 9 00:46 ./httpsresdump 9934f9b3eeSRoland Mainz 10034f9b3eeSRoland Mainz integer out.inodenum="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\1}" 10134f9b3eeSRoland Mainz integer out.kbblocks="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\2}" 10234f9b3eeSRoland Mainz typeset out.mode="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\3}" 10334f9b3eeSRoland Mainz integer out.numlinks="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\4}" 10434f9b3eeSRoland Mainz compound out.owner=( 10534f9b3eeSRoland Mainz typeset user="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\5}" 10634f9b3eeSRoland Mainz typeset group="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\6}" 10734f9b3eeSRoland Mainz ) 10834f9b3eeSRoland Mainz integer out.filesize="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\7}" 10934f9b3eeSRoland Mainz typeset out.date="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\8}" 11034f9b3eeSRoland Mainz typeset out.filepath="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\9}" 11134f9b3eeSRoland Mainz 11234f9b3eeSRoland Mainz return 0 11334f9b3eeSRoland Mainz} 11434f9b3eeSRoland Mainz 11534f9b3eeSRoland Mainzfunction usage 11634f9b3eeSRoland Mainz{ 11734f9b3eeSRoland Mainz OPTIND=0 11834f9b3eeSRoland Mainz getopts -a "${progname}" "${simplefileattributetree1_usage}" OPT '-?' 11934f9b3eeSRoland Mainz exit 2 12034f9b3eeSRoland Mainz} 12134f9b3eeSRoland Mainz 12234f9b3eeSRoland Mainz# main 12334f9b3eeSRoland Mainzbuiltin basename 12434f9b3eeSRoland Mainzbuiltin dirname 12534f9b3eeSRoland Mainz 12634f9b3eeSRoland Mainzset -o noglob 12734f9b3eeSRoland Mainzset -o nounset 12834f9b3eeSRoland Mainz 12934f9b3eeSRoland Mainz# tree base 13034f9b3eeSRoland Mainzcompound filetree 13134f9b3eeSRoland Mainz 13234f9b3eeSRoland Mainz# benchmark data 13334f9b3eeSRoland Mainzcompound bench=( 13434f9b3eeSRoland Mainz float start 13534f9b3eeSRoland Mainz float stop 13634f9b3eeSRoland Mainz) 13734f9b3eeSRoland Mainz 13834f9b3eeSRoland Mainzcompound appconfig=( 13934f9b3eeSRoland Mainz typeset do_benchmarking=false 14034f9b3eeSRoland Mainz compound do_record=( 14134f9b3eeSRoland Mainz typeset content=false 14234f9b3eeSRoland Mainz typeset filetype=false 14334f9b3eeSRoland Mainz ) 14434f9b3eeSRoland Mainz) 14534f9b3eeSRoland Mainz 14634f9b3eeSRoland Mainz 14734f9b3eeSRoland Mainzinteger i 14834f9b3eeSRoland Mainz 14934f9b3eeSRoland Mainztypeset progname="${ basename "${0}" ; }" 15034f9b3eeSRoland Mainz 15134f9b3eeSRoland Mainztypeset -r simplefileattributetree1_usage=$'+ 152*3e14f97fSRoger A. Faulkner[-?\n@(#)\$Id: simplefileattributetree1 (Roland Mainz) 2010-03-27 \$\n] 15334f9b3eeSRoland Mainz[-author?Roland Mainz <roland.mainz@nrubsig.org>] 15434f9b3eeSRoland Mainz[+NAME?simplefileattributetree1 - generate compound variable tree which contains file names and their attributes] 15534f9b3eeSRoland Mainz[+DESCRIPTION?\bsimplefileattributetree1\b is a simple variable tree 15634f9b3eeSRoland Mainz demo which builds a compound variable tree based on the output 15734f9b3eeSRoland Mainz of /usr/xpg4/bin/file which contains the file name, the file attributes 15834f9b3eeSRoland Mainz and optionally file type and content] 15934f9b3eeSRoland Mainz[b:benchmark?Print time needed to generate the tree.] 16034f9b3eeSRoland Mainz[c:includecontent?Include the file\'s content in the tree, split into 1kb blocks.] 16134f9b3eeSRoland Mainz[t:includefiletype?Include the file type (output of /usr/xpg4/bin/file).] 16234f9b3eeSRoland Mainz 16334f9b3eeSRoland Mainzpath 16434f9b3eeSRoland Mainz 16534f9b3eeSRoland Mainz[+SEE ALSO?\bksh93\b(1), \bfile\b(1), \bfind\b(1)] 16634f9b3eeSRoland Mainz' 16734f9b3eeSRoland Mainz 16834f9b3eeSRoland Mainzwhile getopts -a "${progname}" "${simplefileattributetree1_usage}" OPT ; do 16934f9b3eeSRoland Mainz# printmsg "## OPT=|${OPT}|, OPTARG=|${OPTARG}|" 17034f9b3eeSRoland Mainz case ${OPT} in 17134f9b3eeSRoland Mainz b) appconfig.do_benchmarking="true" ;; 17234f9b3eeSRoland Mainz +b) appconfig.do_benchmarking="false" ;; 17334f9b3eeSRoland Mainz c) appconfig.do_record.content="true" ;; 17434f9b3eeSRoland Mainz +c) appconfig.do_record.content="false" ;; 17534f9b3eeSRoland Mainz t) appconfig.do_record.filetype="true" ;; 17634f9b3eeSRoland Mainz +t) appconfig.do_record.filetype="false" ;; 17734f9b3eeSRoland Mainz *) usage ;; 17834f9b3eeSRoland Mainz esac 17934f9b3eeSRoland Mainzdone 18034f9b3eeSRoland Mainzshift $((OPTIND-1)) 18134f9b3eeSRoland Mainz 18234f9b3eeSRoland Mainz 18334f9b3eeSRoland Mainz# argument prechecks 18434f9b3eeSRoland Mainzif (( $# == 0 )) ; then 18534f9b3eeSRoland Mainz print -u2 -f "%s: Missing <path> argument.\n" "${progname}" 18634f9b3eeSRoland Mainz exit 1 18734f9b3eeSRoland Mainzfi 18834f9b3eeSRoland Mainz 18934f9b3eeSRoland Mainz 19034f9b3eeSRoland Mainzprint -u2 -f "# reading file names...\n" 19134f9b3eeSRoland Mainzwhile (( $# > 0 )) ; do 19234f9b3eeSRoland Mainz # "ulimit -c 0" use used to force ksh93 to use a seperate process for subshells, 19334f9b3eeSRoland Mainz # this is used to work around a bug with LC_ALL changes bleeding through subshells 19434f9b3eeSRoland Mainz IFS=$'\n' ; typeset -a findls_lines=( $(ulimit -c 0 ; LC_ALL=C find "$1" -type f -ls) ) ; IFS=$' \t\n' 19534f9b3eeSRoland Mainz shift 19634f9b3eeSRoland Mainzdone 19734f9b3eeSRoland Mainz 19834f9b3eeSRoland Mainz 19934f9b3eeSRoland Mainzprint -u2 -f "# building tree...\n" 20034f9b3eeSRoland Mainz 20134f9b3eeSRoland Mainz${appconfig.do_benchmarking} && (( bench.start=SECONDS )) 20234f9b3eeSRoland Mainz 20334f9b3eeSRoland Mainzfor (( i=0 ; i < ${#findls_lines[@]} ; i++ )) ; do 20434f9b3eeSRoland Mainz compound parseddata 20534f9b3eeSRoland Mainz typeset treenodename 20634f9b3eeSRoland Mainz 20734f9b3eeSRoland Mainz # parse "find -ls" output 20834f9b3eeSRoland Mainz parse_findls parseddata "${findls_lines[i]}" 20934f9b3eeSRoland Mainz 21034f9b3eeSRoland Mainz # add node to tree and return it's absolute name in "treenodename" 21134f9b3eeSRoland Mainz add_file_to_tree filetree "${parseddata.filepath}" treenodename 21234f9b3eeSRoland Mainz 21334f9b3eeSRoland Mainz # merge parsed "find -ls" output into tree node 21434f9b3eeSRoland Mainz nameref treenode="${treenodename}" 21534f9b3eeSRoland Mainz treenode+=parseddata 21634f9b3eeSRoland Mainz 21734f9b3eeSRoland Mainz # extras (calculated from the existing values in "parseddata") 21834f9b3eeSRoland Mainz typeset treenode.dirname="${ dirname "${treenode.filepath}" ; }" 21934f9b3eeSRoland Mainz typeset treenode.basename="${ basename "${treenode.filepath}" ; }" 22034f9b3eeSRoland Mainz 22134f9b3eeSRoland Mainz if ${appconfig.do_record.filetype} ; then 22234f9b3eeSRoland Mainz # Using /usr/(xpg4/)*/bin/file requires a |fork()|+|exec()| which makes the script a few hundred times slower... ;-( 22334f9b3eeSRoland Mainz typeset treenode.filetype="$(file "${treenode.filepath}")" 22434f9b3eeSRoland Mainz fi 22534f9b3eeSRoland Mainz 22634f9b3eeSRoland Mainz if ${appconfig.do_record.content} ; then 22734f9b3eeSRoland Mainz if [[ -r "${treenode.filepath}" ]] ; then 22834f9b3eeSRoland Mainz # We use an array of compound variables here to support 22934f9b3eeSRoland Mainz # files with holes (and later alternative streams, too) 23034f9b3eeSRoland Mainz compound -a treenode.content 23134f9b3eeSRoland Mainz integer cl=0 23234f9b3eeSRoland Mainz while \ 23334f9b3eeSRoland Mainz { 23434f9b3eeSRoland Mainz treenode.content[${cl}]=( 23534f9b3eeSRoland Mainz typeset type="data" # (todo: "add support for "holes" (sparse files)) 23634f9b3eeSRoland Mainz typeset -b bin 23734f9b3eeSRoland Mainz ) 23834f9b3eeSRoland Mainz read -n1024 treenode.content[${cl}].bin 23934f9b3eeSRoland Mainz } ; do 24034f9b3eeSRoland Mainz (( cl++ )) 24134f9b3eeSRoland Mainz done < "${treenode.filepath}" 24234f9b3eeSRoland Mainz unset treenode.content[${cl}] 24334f9b3eeSRoland Mainz 24434f9b3eeSRoland Mainz typeset -A treenode.hashsum=( 24534f9b3eeSRoland Mainz [md5]="$(sum -x md5 < "${treenode.filepath}")" 24634f9b3eeSRoland Mainz [sha512]="$(sum -x sha512 < "${treenode.filepath}")" 24734f9b3eeSRoland Mainz ) 24834f9b3eeSRoland Mainz 24934f9b3eeSRoland Mainz # we do this for internal debugging only 25034f9b3eeSRoland Mainz if [[ "${ { 25134f9b3eeSRoland Mainz integer j 25234f9b3eeSRoland Mainz for (( j=0 ; j < ${#treenode.content[@]} ; j++ )) ; do 25334f9b3eeSRoland Mainz printf "%B" treenode.content[$j].bin 25434f9b3eeSRoland Mainz done 25534f9b3eeSRoland Mainz } | sum -x sha512 ; }" != "${treenode.hashsum[sha512]}" ]] ; then 25634f9b3eeSRoland Mainz # this should never happen... 25734f9b3eeSRoland Mainz print -u2 -f "fatal hash mismatch for %s\n" "${treenode.filepath}" 25834f9b3eeSRoland Mainz unset treenode.content treenode.hashsum 25934f9b3eeSRoland Mainz fi 26034f9b3eeSRoland Mainz fi 26134f9b3eeSRoland Mainz fi 26234f9b3eeSRoland Mainzdone 26334f9b3eeSRoland Mainz 26434f9b3eeSRoland Mainz${appconfig.do_benchmarking} && (( bench.stop=SECONDS )) 26534f9b3eeSRoland Mainz 26634f9b3eeSRoland Mainz 26734f9b3eeSRoland Mainzif ${appconfig.do_benchmarking} ; then 26834f9b3eeSRoland Mainz # print benchmark data 26934f9b3eeSRoland Mainz print -u2 -f "# time used: %f\n" $((bench.stop - bench.start)) 27034f9b3eeSRoland Mainzfi 27134f9b3eeSRoland Mainz 27234f9b3eeSRoland Mainz# print variable tree 27334f9b3eeSRoland Mainzprint -v filetree 27434f9b3eeSRoland Mainz 27534f9b3eeSRoland Mainzexit 0 27634f9b3eeSRoland Mainz# EOF. 277