1#!/usr/bin/ksh93 2 3# 4# CDDL HEADER START 5# 6# The contents of this file are subject to the terms of the 7# Common Development and Distribution License (the "License"). 8# You may not use this file except in compliance with the License. 9# 10# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 11# or http://www.opensolaris.org/os/licensing. 12# See the License for the specific language governing permissions 13# and limitations under the License. 14# 15# When distributing Covered Code, include this CDDL HEADER in each 16# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 17# If applicable, add the following below this CDDL HEADER, with the 18# fields enclosed by brackets "[]" replaced with your own identifying 19# information: Portions Copyright [yyyy] [name of copyright owner] 20# 21# CDDL HEADER END 22# 23 24# 25# Copyright 2009 Sun Microsystems, Inc. All rights reserved. 26# Use is subject to license terms. 27# 28 29# 30# simplefileattributetree1 - build a simple file tree (including file attributes) 31# 32 33# Solaris needs /usr/xpg6/bin:/usr/xpg4/bin because the tools in /usr/bin are not POSIX-conformant 34export PATH=/usr/xpg6/bin:/usr/xpg4/bin:/bin:/usr/bin 35 36# Make sure all math stuff runs in the "C" locale to avoid problems 37# with alternative # radix point representations (e.g. ',' instead of 38# '.' in de_DE.*-locales). This needs to be set _before_ any 39# floating-point constants are defined in this script). 40if [[ "${LC_ALL}" != "" ]] ; then 41 export \ 42 LC_MONETARY="${LC_ALL}" \ 43 LC_MESSAGES="${LC_ALL}" \ 44 LC_COLLATE="${LC_ALL}" \ 45 LC_CTYPE="${LC_ALL}" 46 unset LC_ALL 47fi 48export LC_NUMERIC=C 49 50 51function add_file_to_tree 52{ 53 typeset treename=$1 54 typeset filename=$2 55 nameref destnodename=$3 56 integer i 57 typeset nodepath # full name of compound variable 58 typeset -a pe # path elements 59 60 # first built an array containing the names of each path element 61 # (e.g. "foo/var/baz"" results in an array containing "( 'foo' 'bar' 'baz' )") 62 typeset IFS='/' 63 pe+=( ${filename} ) 64 65 [[ ${pe[0]} == '' ]] && pe[0]='/' 66 67 # walk path described via the "pe" array and build nodes if 68 # there aren't any nodes yet 69 nodepath="${treename}" 70 for (( i=0 ; i < (${#pe[@]}-1) ; i++ )) ; do 71 nameref x="${nodepath}" 72 [[ ! -v x.node ]] && compound -A x.nodes 73 74 nodepath+=".nodes[${pe[i]}]" 75 done 76 77 # insert element 78 nameref node="${nodepath}" 79 [[ ! -v node.elements ]] && compound -A node.elements 80 node.elements[${pe[i]}]=( 81 filepath="${filename}" 82 ) 83 84 destnodename="${!node}.elements[${pe[i]}]" 85 86 return 0 87} 88 89function parse_findls 90{ 91 nameref out=$1 92 typeset str="$2" 93 94 # find -ls on Solaris uses the following output format by default: 95 #604302 3 -rw-r--r-- 1 test001 users 2678 May 9 00:46 ./httpsresdump 96 97 integer out.inodenum="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\1}" 98 integer out.kbblocks="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\2}" 99 typeset out.mode="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\3}" 100 integer out.numlinks="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\4}" 101 compound out.owner=( 102 typeset user="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\5}" 103 typeset group="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\6}" 104 ) 105 integer out.filesize="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\7}" 106 typeset out.date="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\8}" 107 typeset out.filepath="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\9}" 108 109 return 0 110} 111 112function usage 113{ 114 OPTIND=0 115 getopts -a "${progname}" "${simplefileattributetree1_usage}" OPT '-?' 116 exit 2 117} 118 119# main 120builtin basename 121builtin dirname 122 123set -o noglob 124set -o nounset 125 126# tree base 127compound filetree 128 129# benchmark data 130compound bench=( 131 float start 132 float stop 133) 134 135compound appconfig=( 136 typeset do_benchmarking=false 137 compound do_record=( 138 typeset content=false 139 typeset filetype=false 140 ) 141) 142 143 144integer i 145 146typeset progname="${ basename "${0}" ; }" 147 148typeset -r simplefileattributetree1_usage=$'+ 149[-?\n@(#)\$Id: simplefileattributetree1 (Roland Mainz) 2009-06-26 \$\n] 150[-author?Roland Mainz <roland.mainz@nrubsig.org>] 151[+NAME?simplefileattributetree1 - generate compound variable tree which contains file names and their attributes] 152[+DESCRIPTION?\bsimplefileattributetree1\b is a simple variable tree 153 demo which builds a compound variable tree based on the output 154 of /usr/xpg4/bin/file which contains the file name, the file attributes 155 and optionally file type and content] 156[b:benchmark?Print time needed to generate the tree.] 157[c:includecontent?Include the file\'s content in the tree, split into 1kb blocks.] 158[t:includefiletype?Include the file type (output of /usr/xpg4/bin/file).] 159 160path 161 162[+SEE ALSO?\bksh93\b(1), \bfile\b(1), \bfind\b(1)] 163' 164 165while getopts -a "${progname}" "${simplefileattributetree1_usage}" OPT ; do 166# printmsg "## OPT=|${OPT}|, OPTARG=|${OPTARG}|" 167 case ${OPT} in 168 b) appconfig.do_benchmarking="true" ;; 169 +b) appconfig.do_benchmarking="false" ;; 170 c) appconfig.do_record.content="true" ;; 171 +c) appconfig.do_record.content="false" ;; 172 t) appconfig.do_record.filetype="true" ;; 173 +t) appconfig.do_record.filetype="false" ;; 174 *) usage ;; 175 esac 176done 177shift $((OPTIND-1)) 178 179 180# argument prechecks 181if (( $# == 0 )) ; then 182 print -u2 -f "%s: Missing <path> argument.\n" "${progname}" 183 exit 1 184fi 185 186 187print -u2 -f "# reading file names...\n" 188while (( $# > 0 )) ; do 189 # "ulimit -c 0" use used to force ksh93 to use a seperate process for subshells, 190 # this is used to work around a bug with LC_ALL changes bleeding through subshells 191 IFS=$'\n' ; typeset -a findls_lines=( $(ulimit -c 0 ; LC_ALL=C find "$1" -type f -ls) ) ; IFS=$' \t\n' 192 shift 193done 194 195 196print -u2 -f "# building tree...\n" 197 198${appconfig.do_benchmarking} && (( bench.start=SECONDS )) 199 200for (( i=0 ; i < ${#findls_lines[@]} ; i++ )) ; do 201 compound parseddata 202 typeset treenodename 203 204 # parse "find -ls" output 205 parse_findls parseddata "${findls_lines[i]}" 206 207 # add node to tree and return it's absolute name in "treenodename" 208 add_file_to_tree filetree "${parseddata.filepath}" treenodename 209 210 # merge parsed "find -ls" output into tree node 211 nameref treenode="${treenodename}" 212 treenode+=parseddata 213 214 # extras (calculated from the existing values in "parseddata") 215 typeset treenode.dirname="${ dirname "${treenode.filepath}" ; }" 216 typeset treenode.basename="${ basename "${treenode.filepath}" ; }" 217 218 if ${appconfig.do_record.filetype} ; then 219 # Using /usr/(xpg4/)*/bin/file requires a |fork()|+|exec()| which makes the script a few hundred times slower... ;-( 220 typeset treenode.filetype="$(file "${treenode.filepath}")" 221 fi 222 223 if ${appconfig.do_record.content} ; then 224 if [[ -r "${treenode.filepath}" ]] ; then 225 # We use an array of compound variables here to support 226 # files with holes (and later alternative streams, too) 227 compound -a treenode.content 228 integer cl=0 229 while \ 230 { 231 treenode.content[${cl}]=( 232 typeset type="data" # (todo: "add support for "holes" (sparse files)) 233 typeset -b bin 234 ) 235 read -n1024 treenode.content[${cl}].bin 236 } ; do 237 (( cl++ )) 238 done < "${treenode.filepath}" 239 unset treenode.content[${cl}] 240 241 typeset -A treenode.hashsum=( 242 [md5]="$(sum -x md5 < "${treenode.filepath}")" 243 [sha512]="$(sum -x sha512 < "${treenode.filepath}")" 244 ) 245 246 # we do this for internal debugging only 247 if [[ "${ { 248 integer j 249 for (( j=0 ; j < ${#treenode.content[@]} ; j++ )) ; do 250 printf "%B" treenode.content[$j].bin 251 done 252 } | sum -x sha512 ; }" != "${treenode.hashsum[sha512]}" ]] ; then 253 # this should never happen... 254 print -u2 -f "fatal hash mismatch for %s\n" "${treenode.filepath}" 255 unset treenode.content treenode.hashsum 256 fi 257 fi 258 fi 259done 260 261${appconfig.do_benchmarking} && (( bench.stop=SECONDS )) 262 263 264if ${appconfig.do_benchmarking} ; then 265 # print benchmark data 266 print -u2 -f "# time used: %f\n" $((bench.stop - bench.start)) 267fi 268 269# print variable tree 270print -v filetree 271 272exit 0 273# EOF. 274