1#!/usr/bin/ksh93 2 3# 4# CDDL HEADER START 5# 6# The contents of this file are subject to the terms of the 7# Common Development and Distribution License (the "License"). 8# You may not use this file except in compliance with the License. 9# 10# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 11# or http://www.opensolaris.org/os/licensing. 12# See the License for the specific language governing permissions 13# and limitations under the License. 14# 15# When distributing Covered Code, include this CDDL HEADER in each 16# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 17# If applicable, add the following below this CDDL HEADER, with the 18# fields enclosed by brackets "[]" replaced with your own identifying 19# information: Portions Copyright [yyyy] [name of copyright owner] 20# 21# CDDL HEADER END 22# 23 24# 25# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 26# 27 28# 29# simplefileattributetree1 - build a simple file tree (including file attributes) 30# 31 32# Solaris needs /usr/xpg6/bin:/usr/xpg4/bin because the tools in /usr/bin are not POSIX-conformant 33export PATH=/usr/xpg6/bin:/usr/xpg4/bin:/bin:/usr/bin 34 35# Make sure all math stuff runs in the "C" locale to avoid problems 36# with alternative # radix point representations (e.g. ',' instead of 37# '.' in de_DE.*-locales). This needs to be set _before_ any 38# floating-point constants are defined in this script). 39if [[ "${LC_ALL}" != "" ]] ; then 40 export \ 41 LC_MONETARY="${LC_ALL}" \ 42 LC_MESSAGES="${LC_ALL}" \ 43 LC_COLLATE="${LC_ALL}" \ 44 LC_CTYPE="${LC_ALL}" 45 unset LC_ALL 46fi 47export LC_NUMERIC=C 48 49 50function add_file_to_tree 51{ 52 typeset treename=$1 53 typeset filename=$2 54 nameref destnodename=$3 55 integer i 56 typeset nodepath # full name of compound variable 57 typeset -a pe # path elements 58 59 # first built an array containing the names of each path element 60 # (e.g. "foo/var/baz"" results in an array containing "( 'foo' 'bar' 'baz' )") 61 typeset IFS='/' 62 pe+=( ${filename} ) 63 64 [[ ${pe[0]} == '' ]] && pe[0]='/' 65 66 # walk path described via the "pe" array and build nodes if 67 # there aren't any nodes yet 68 nodepath="${treename}" 69 for (( i=0 ; i < (${#pe[@]}-1) ; i++ )) ; do 70 nameref x="${nodepath}" 71 72 # [[ -v ]] does not work for arrays because [[ -v ar ]] 73 # is equal to [[ -v ar[0] ]]. In this case we can 74 # use the output of typeset +p x.nodes 75 [[ "${ typeset +p x.nodes ; }" == "" ]] && compound -A x.nodes 76 77 nodepath+=".nodes[${pe[i]}]" 78 done 79 80 # insert element 81 nameref node="${nodepath}" 82 [[ "${ typeset +p node.elements ; }" == "" ]] && compound -A node.elements 83 node.elements[${pe[i]}]=( 84 filepath="${filename}" 85 ) 86 87 destnodename="${!node}.elements[${pe[i]}]" 88 89 return 0 90} 91 92function parse_findls 93{ 94 nameref out=$1 95 typeset str="$2" 96 97 # find -ls on Solaris uses the following output format by default: 98 #604302 3 -rw-r--r-- 1 test001 users 2678 May 9 00:46 ./httpsresdump 99 100 integer out.inodenum="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\1}" 101 integer out.kbblocks="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\2}" 102 typeset out.mode="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\3}" 103 integer out.numlinks="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\4}" 104 compound out.owner=( 105 typeset user="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\5}" 106 typeset group="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\6}" 107 ) 108 integer out.filesize="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\7}" 109 typeset out.date="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\8}" 110 typeset out.filepath="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\9}" 111 112 return 0 113} 114 115function usage 116{ 117 OPTIND=0 118 getopts -a "${progname}" "${simplefileattributetree1_usage}" OPT '-?' 119 exit 2 120} 121 122# main 123builtin basename 124builtin dirname 125 126set -o noglob 127set -o nounset 128 129# tree base 130compound filetree 131 132# benchmark data 133compound bench=( 134 float start 135 float stop 136) 137 138compound appconfig=( 139 typeset do_benchmarking=false 140 compound do_record=( 141 typeset content=false 142 typeset filetype=false 143 ) 144) 145 146 147integer i 148 149typeset progname="${ basename "${0}" ; }" 150 151typeset -r simplefileattributetree1_usage=$'+ 152[-?\n@(#)\$Id: simplefileattributetree1 (Roland Mainz) 2010-03-27 \$\n] 153[-author?Roland Mainz <roland.mainz@nrubsig.org>] 154[+NAME?simplefileattributetree1 - generate compound variable tree which contains file names and their attributes] 155[+DESCRIPTION?\bsimplefileattributetree1\b is a simple variable tree 156 demo which builds a compound variable tree based on the output 157 of /usr/xpg4/bin/file which contains the file name, the file attributes 158 and optionally file type and content] 159[b:benchmark?Print time needed to generate the tree.] 160[c:includecontent?Include the file\'s content in the tree, split into 1kb blocks.] 161[t:includefiletype?Include the file type (output of /usr/xpg4/bin/file).] 162 163path 164 165[+SEE ALSO?\bksh93\b(1), \bfile\b(1), \bfind\b(1)] 166' 167 168while getopts -a "${progname}" "${simplefileattributetree1_usage}" OPT ; do 169# printmsg "## OPT=|${OPT}|, OPTARG=|${OPTARG}|" 170 case ${OPT} in 171 b) appconfig.do_benchmarking="true" ;; 172 +b) appconfig.do_benchmarking="false" ;; 173 c) appconfig.do_record.content="true" ;; 174 +c) appconfig.do_record.content="false" ;; 175 t) appconfig.do_record.filetype="true" ;; 176 +t) appconfig.do_record.filetype="false" ;; 177 *) usage ;; 178 esac 179done 180shift $((OPTIND-1)) 181 182 183# argument prechecks 184if (( $# == 0 )) ; then 185 print -u2 -f "%s: Missing <path> argument.\n" "${progname}" 186 exit 1 187fi 188 189 190print -u2 -f "# reading file names...\n" 191while (( $# > 0 )) ; do 192 # "ulimit -c 0" use used to force ksh93 to use a seperate process for subshells, 193 # this is used to work around a bug with LC_ALL changes bleeding through subshells 194 IFS=$'\n' ; typeset -a findls_lines=( $(ulimit -c 0 ; LC_ALL=C find "$1" -type f -ls) ) ; IFS=$' \t\n' 195 shift 196done 197 198 199print -u2 -f "# building tree...\n" 200 201${appconfig.do_benchmarking} && (( bench.start=SECONDS )) 202 203for (( i=0 ; i < ${#findls_lines[@]} ; i++ )) ; do 204 compound parseddata 205 typeset treenodename 206 207 # parse "find -ls" output 208 parse_findls parseddata "${findls_lines[i]}" 209 210 # add node to tree and return it's absolute name in "treenodename" 211 add_file_to_tree filetree "${parseddata.filepath}" treenodename 212 213 # merge parsed "find -ls" output into tree node 214 nameref treenode="${treenodename}" 215 treenode+=parseddata 216 217 # extras (calculated from the existing values in "parseddata") 218 typeset treenode.dirname="${ dirname "${treenode.filepath}" ; }" 219 typeset treenode.basename="${ basename "${treenode.filepath}" ; }" 220 221 if ${appconfig.do_record.filetype} ; then 222 # Using /usr/(xpg4/)*/bin/file requires a |fork()|+|exec()| which makes the script a few hundred times slower... ;-( 223 typeset treenode.filetype="$(file "${treenode.filepath}")" 224 fi 225 226 if ${appconfig.do_record.content} ; then 227 if [[ -r "${treenode.filepath}" ]] ; then 228 # We use an array of compound variables here to support 229 # files with holes (and later alternative streams, too) 230 compound -a treenode.content 231 integer cl=0 232 while \ 233 { 234 treenode.content[${cl}]=( 235 typeset type="data" # (todo: "add support for "holes" (sparse files)) 236 typeset -b bin 237 ) 238 read -n1024 treenode.content[${cl}].bin 239 } ; do 240 (( cl++ )) 241 done < "${treenode.filepath}" 242 unset treenode.content[${cl}] 243 244 typeset -A treenode.hashsum=( 245 [md5]="$(sum -x md5 < "${treenode.filepath}")" 246 [sha512]="$(sum -x sha512 < "${treenode.filepath}")" 247 ) 248 249 # we do this for internal debugging only 250 if [[ "${ { 251 integer j 252 for (( j=0 ; j < ${#treenode.content[@]} ; j++ )) ; do 253 printf "%B" treenode.content[$j].bin 254 done 255 } | sum -x sha512 ; }" != "${treenode.hashsum[sha512]}" ]] ; then 256 # this should never happen... 257 print -u2 -f "fatal hash mismatch for %s\n" "${treenode.filepath}" 258 unset treenode.content treenode.hashsum 259 fi 260 fi 261 fi 262done 263 264${appconfig.do_benchmarking} && (( bench.stop=SECONDS )) 265 266 267if ${appconfig.do_benchmarking} ; then 268 # print benchmark data 269 print -u2 -f "# time used: %f\n" $((bench.stop - bench.start)) 270fi 271 272# print variable tree 273print -v filetree 274 275exit 0 276# EOF. 277