1#!/usr/bin/ksh93 2 3# 4# CDDL HEADER START 5# 6# The contents of this file are subject to the terms of the 7# Common Development and Distribution License (the "License"). 8# You may not use this file except in compliance with the License. 9# 10# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 11# or http://www.opensolaris.org/os/licensing. 12# See the License for the specific language governing permissions 13# and limitations under the License. 14# 15# When distributing Covered Code, include this CDDL HEADER in each 16# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 17# If applicable, add the following below this CDDL HEADER, with the 18# fields enclosed by brackets "[]" replaced with your own identifying 19# information: Portions Copyright [yyyy] [name of copyright owner] 20# 21# CDDL HEADER END 22# 23 24# 25# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 26# 27 28# Solaris needs /usr/xpg6/bin:/usr/xpg4/bin because the tools in /usr/bin are not POSIX-conformant 29export PATH=/usr/xpg6/bin:/usr/xpg4/bin:/bin:/usr/bin 30 31function fatal_error 32{ 33 print -u 2 "${progname}: $*" 34 exit 1 35} 36 37function attrstrtoattrarray 38{ 39#set -o xtrace 40 typeset s="$1" 41 nameref aa=$2 # attribute array 42 integer aa_count=0 43 integer aa_count=0 44 typeset nextattr 45 integer currattrlen=0 46 typeset tagstr 47 typeset tagval 48 49 while (( ${#s} > 0 )) ; do 50 # skip whitespaces 51 while [[ "${s:currattrlen:1}" == ~(E)[[:blank:][:space:]] ]] ; do 52 (( currattrlen++ )) 53 done 54 s="${s:currattrlen:${#s}}" 55 56 # anything left ? 57 (( ${#s} == 0 )) && break 58 59 # Pattern tests: 60 #x="foo=bar huz=123" ; print "${x##~(E)[[:alnum:]_-:]*=[^[:blank:]\"]*}" 61 #x='foo="ba=r o" huz=123' ; print "${x##~(E)[[:alnum:]_-:]*=\"[^\"]*\"}" 62 #x="foo='ba=r o' huz=123" ; print "${x##~(E)[[:alnum:]_-:]*=\'[^\"]*\'}" 63 #x="foox huz=123" ; print "${x##~(E)[[:alnum:]_-:]*}" 64 # All pattern combined via eregex (w|x|y|z): 65 #x='foo="bar=o" huz=123' ; print "${x##~(E)([[:alnum:]_-:]*=[^[:blank:]\"]*|[[:alnum:]_-:]*=\"[^\"]*\"|[[:alnum:]_-:]*=\'[^\"]*\')}" 66 nextattr="${s##~(E)([[:alnum:]_-:]*=[^[:blank:]\"]*|[[:alnum:]_-:]*=\"[^\"]*\"|[[:alnum:]_-:]*=\'[^\"]*\'|[[:alnum:]_-:]*)}" 67 currattrlen=$(( ${#s} - ${#nextattr})) 68 69 # add entry 70 tagstr="${s:0:currattrlen}" 71 if [[ "${tagstr}" == *=* ]] ; then 72 # normal case: attribute with value 73 74 tagval="${tagstr#*=}" 75 76 # strip quotes ('' or "") 77 if [[ "${tagval}" == ~(Elr)(\'.*\'|\".*\") ]] ; then 78 tagval="${tagval:1:${#tagval}-2}" 79 fi 80 81 aa[${aa_count}]=( name="${tagstr%%=*}" value="${tagval}" ) 82 else 83 # special case for HTML where you have something like <foo baz> 84 aa[${aa_count}]=( name="${tagstr}" ) 85 fi 86 (( aa_count++ )) 87 (( aa_count > 1000 )) && fatal_error "$0: aa_count too large" # assert 88 done 89} 90 91 92function handle_document 93{ 94#set -o xtrace 95 nameref callbacks=${1} 96 typeset tag_type="${2}" 97 typeset tag_value="${3}" 98 typeset tag_attributes="${4}" 99 nameref doc=${callbacks["arg_tree"]} 100 nameref nodepath="${stack.items[stack.pos]}" 101 nameref nodesnum="${stack.items[stack.pos]}num" 102 103 case "${tag_type}" in 104 tag_begin) 105 nodepath[${nodesnum}]+=( 106 typeset tagtype="element" 107 typeset tagname="${tag_value}" 108 compound -A tagattributes 109 compound -A nodes 110 integer nodesnum=0 111 ) 112 113 # fill attributes 114 if [[ "${tag_attributes}" != "" ]] ; then 115 attrstrtoattrarray "${tag_attributes}" "nodepath[${nodesnum}].tagattributes" 116 fi 117 118 (( stack.pos++ )) 119 stack.items[stack.pos]="${stack.items[stack.pos-1]}[${nodesnum}].nodes" 120 (( nodesnum++ )) 121 ;; 122 tag_end) 123 (( stack.pos-- )) 124 ;; 125 tag_text) 126 nodepath[${nodesnum}]+=( 127 typeset tagtype="text" 128 typeset tagvalue="${tag_value}" 129 ) 130 (( nodesnum++ )) 131 ;; 132 tag_comment) 133 nodepath[${nodesnum}]+=( 134 typeset tagtype="comment" 135 typeset tagvalue="${tag_value}" 136 ) 137 (( nodesnum++ )) 138 ;; 139 document_start) 140 ;; 141 document_end) 142 ;; 143 esac 144 145# print "xmltok: '${tag_type}' = '${tag_value}'" 146} 147 148function xml_tok 149{ 150 typeset buf="" 151 typeset namebuf="" 152 typeset attrbuf="" 153 typeset c="" 154 typeset isendtag # bool: true/false 155 typeset issingletag # bool: true/false (used for tags like "<br />") 156 nameref callbacks=${1} 157 158 [[ ! -z "${callbacks["document_start"]}" ]] && ${callbacks["document_start"]} "${1}" "document_start" 159 160 while IFS='' read -r -N 1 c ; do 161 isendtag=false 162 163 if [[ "$c" == "<" ]] ; then 164 # flush any text content 165 if [[ "$buf" != "" ]] ; then 166 [[ ! -z "${callbacks["tag_text"]}" ]] && ${callbacks["tag_text"]} "${1}" "tag_text" "$buf" 167 buf="" 168 fi 169 170 IFS='' read -r -N 1 c 171 if [[ "$c" == "/" ]] ; then 172 isendtag=true 173 else 174 buf="$c" 175 fi 176 IFS='' read -r -d '>' c 177 buf+="$c" 178 179 # handle comments 180 if [[ "$buf" == ~(El)!-- ]] ; then 181 # did we read the comment completely ? 182 if [[ "$buf" != ~(Elr)!--.*-- ]] ; then 183 buf+=">" 184 while [[ "$buf" != ~(Elr)!--.*-- ]] ; do 185 IFS='' read -r -N 1 c || break 186 buf+="$c" 187 done 188 fi 189 190 [[ ! -z "${callbacks["tag_comment"]}" ]] && ${callbacks["tag_comment"]} "${1}" "tag_comment" "${buf:3:${#buf}-5}" 191 buf="" 192 continue 193 fi 194 195 # check if the tag starts and ends at the same time (like "<br />") 196 if [[ "${buf}" == ~(Er).*/ ]] ; then 197 issingletag=true 198 buf="${buf%*/}" 199 else 200 issingletag=false 201 fi 202 203 # check if the tag has attributes (e.g. space after name) 204 if [[ "$buf" == ~(E)[[:space:][:blank:]] ]] ; then 205 namebuf="${buf%%~(E)[[:space:][:blank:]].*}" 206 attrbuf="${buf#~(E).*[[:space:][:blank:]]}" 207 else 208 namebuf="$buf" 209 attrbuf="" 210 fi 211 212 if ${isendtag} ; then 213 [[ ! -z "${callbacks["tag_end"]}" ]] && ${callbacks["tag_end"]} "${1}" "tag_end" "$namebuf" 214 else 215 [[ ! -z "${callbacks["tag_begin"]}" ]] && ${callbacks["tag_begin"]} "${1}" "tag_begin" "$namebuf" "$attrbuf" 216 217 # handle tags like <br/> (which are start- and end-tag in one piece) 218 if ${issingletag} ; then 219 [[ ! -z "${callbacks["tag_end"]}" ]] && ${callbacks["tag_end"]} "${1}" "tag_end" "$namebuf" 220 fi 221 fi 222 buf="" 223 else 224 buf+="$c" 225 fi 226 done 227 228 [[ ! -z "${callbacks["document_end"]}" ]] && ${callbacks["document_end"]} "${1}" "document_end" "exit_success" 229 230 print # final newline to make filters like "sed" happy 231} 232 233function print_sample1_xml 234{ 235cat <<EOF 236<br /> 237<score-partwise instrument="flute1"> 238 <identification> 239 <kaiman>nocrocodile</kaiman> 240 </identification> 241 <!-- a comment --> 242 <partlist> 243 <foo>myfootext</foo> 244 <bar>mybartext</bar> 245 <snap /> 246 <!-- another 247 comment --> 248 <ttt>myttttext</ttt> 249 </partlist> 250</score-partwise> 251EOF 252} 253 254function usage 255{ 256 OPTIND=0 257 getopts -a "${progname}" "${xmldocumenttree1_usage}" OPT '-?' 258 exit 2 259} 260 261# program start 262builtin basename 263builtin cat 264builtin date 265builtin uname 266 267typeset progname="${ basename "${0}" ; }" 268 269typeset -r xmldocumenttree1_usage=$'+ 270[-?\n@(#)\$Id: xmldocumenttree1 (Roland Mainz) 2009-05-09 \$\n] 271[-author?Roland Mainz <roland.mainz@nrubsig.org>] 272[+NAME?xmldocumenttree1 - XML tree demo] 273[+DESCRIPTION?\bxmldocumenttree\b is a small ksh93 compound variable demo 274 which reads a XML input file, converts it into an internal 275 variable tree representation and outputs it in the format 276 specified by viewmode (either "list", "namelist", "tree" or "compacttree").] 277 278file viewmode 279 280[+SEE ALSO?\bksh93\b(1)] 281' 282 283while getopts -a "${progname}" "${xmldocumenttree1_usage}" OPT ; do 284# printmsg "## OPT=|${OPT}|, OPTARG=|${OPTARG}|" 285 case ${OPT} in 286 *) usage ;; 287 esac 288done 289shift $((OPTIND-1)) 290 291typeset xmlfile="$1" 292typeset viewmode="$2" 293 294if [[ "${xmlfile}" == "" ]] ; then 295 fatal_error $"No file given." 296fi 297 298if [[ "${viewmode}" != ~(Elr)(list|namelist|tree|compacttree) ]] ; then 299 fatal_error $"Invalid view mode \"${viewmode}\"." 300fi 301 302compound xdoc 303compound -A xdoc.nodes 304integer xdoc.nodesnum=0 305 306compound stack 307typeset -a stack.items=( [0]="doc.nodes" ) 308integer stack.pos=0 309 310# setup callbacks for xml_tok 311typeset -A document_cb # callbacks for xml_tok 312document_cb["document_start"]="handle_document" 313document_cb["document_end"]="handle_document" 314document_cb["tag_begin"]="handle_document" 315document_cb["tag_end"]="handle_document" 316document_cb["tag_text"]="handle_document" 317document_cb["tag_comment"]="handle_document" 318# argument for "handle_document" 319document_cb["arg_tree"]="xdoc" 320 321 322if [[ "${xmlfile}" == "#sample1" ]] ; then 323 print_sample1_xml | xml_tok document_cb 324elif [[ "${xmlfile}" == "#sample2" ]] ; then 325 /usr/sfw/bin/wget \ 326 --user-agent='ksh93_xmldocumenttree' \ 327 --output-document=- \ 328 'http://www.google.com/custom?q=gummi+bears' | 329 /usr/bin/iconv -f "ISO8859-1" | 330 xml_tok document_cb 331else 332 cat "${xmlfile}" | xml_tok document_cb 333fi 334 335print -u2 "#parsing completed." 336 337case "${viewmode}" in 338 list) 339 set | egrep "xdoc.*(tagname|tagtype|tagval|tagattributes)" | fgrep -v ']=$' 340 ;; 341 namelist) 342 typeset + | egrep "xdoc.*(tagname|tagtype|tagval|tagattributes)" 343 ;; 344 tree) 345 print -v xdoc 346 ;; 347 compacttree) 348 print -C xdoc 349 ;; 350 *) 351 fatal_error $"Invalid view mode \"${viewmode}\"." 352 ;; 353esac 354 355print -u2 "#done." 356 357exit 0 358# EOF. 359