xref: /titanic_51/usr/src/cmd/ast/libshell/common/scripts/rssread.sh (revision b4dd7d09880f14016feece03929a224eca1cf39a)
1*b4dd7d09SAndy Fiddaman#!/usr/bin/ksh93
2*b4dd7d09SAndy Fiddaman
3*b4dd7d09SAndy Fiddaman#
4*b4dd7d09SAndy Fiddaman# CDDL HEADER START
5*b4dd7d09SAndy Fiddaman#
6*b4dd7d09SAndy Fiddaman# The contents of this file are subject to the terms of the
7*b4dd7d09SAndy Fiddaman# Common Development and Distribution License (the "License").
8*b4dd7d09SAndy Fiddaman# You may not use this file except in compliance with the License.
9*b4dd7d09SAndy Fiddaman#
10*b4dd7d09SAndy Fiddaman# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11*b4dd7d09SAndy Fiddaman# or http://www.opensolaris.org/os/licensing.
12*b4dd7d09SAndy Fiddaman# See the License for the specific language governing permissions
13*b4dd7d09SAndy Fiddaman# and limitations under the License.
14*b4dd7d09SAndy Fiddaman#
15*b4dd7d09SAndy Fiddaman# When distributing Covered Code, include this CDDL HEADER in each
16*b4dd7d09SAndy Fiddaman# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17*b4dd7d09SAndy Fiddaman# If applicable, add the following below this CDDL HEADER, with the
18*b4dd7d09SAndy Fiddaman# fields enclosed by brackets "[]" replaced with your own identifying
19*b4dd7d09SAndy Fiddaman# information: Portions Copyright [yyyy] [name of copyright owner]
20*b4dd7d09SAndy Fiddaman#
21*b4dd7d09SAndy Fiddaman# CDDL HEADER END
22*b4dd7d09SAndy Fiddaman#
23*b4dd7d09SAndy Fiddaman
24*b4dd7d09SAndy Fiddaman#
25*b4dd7d09SAndy Fiddaman# Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
26*b4dd7d09SAndy Fiddaman#
27*b4dd7d09SAndy Fiddaman
28*b4dd7d09SAndy Fiddaman#
29*b4dd7d09SAndy Fiddaman# rssread - a simple RSS2.0 reader with RSS to XHTML to
30*b4dd7d09SAndy Fiddaman# plaintext conversion.
31*b4dd7d09SAndy Fiddaman#
32*b4dd7d09SAndy Fiddaman
33*b4dd7d09SAndy Fiddaman# Solaris needs /usr/xpg6/bin:/usr/xpg4/bin because the tools in /usr/bin are not POSIX-conformant
34*b4dd7d09SAndy Fiddamanexport PATH=/usr/xpg6/bin:/usr/xpg4/bin:/bin:/usr/bin
35*b4dd7d09SAndy Fiddaman
36*b4dd7d09SAndy Fiddamanfunction printmsg
37*b4dd7d09SAndy Fiddaman{
38*b4dd7d09SAndy Fiddaman	print -u2 "$*"
39*b4dd7d09SAndy Fiddaman}
40*b4dd7d09SAndy Fiddaman
41*b4dd7d09SAndy Fiddamanfunction debugmsg
42*b4dd7d09SAndy Fiddaman{
43*b4dd7d09SAndy Fiddaman#	printmsg "$*"
44*b4dd7d09SAndy Fiddamantrue
45*b4dd7d09SAndy Fiddaman}
46*b4dd7d09SAndy Fiddaman
47*b4dd7d09SAndy Fiddamanfunction fatal_error
48*b4dd7d09SAndy Fiddaman{
49*b4dd7d09SAndy Fiddaman	print -u2 "${progname}: $*"
50*b4dd7d09SAndy Fiddaman	exit 1
51*b4dd7d09SAndy Fiddaman}
52*b4dd7d09SAndy Fiddaman
53*b4dd7d09SAndy Fiddamantypeset -T urlconnection_t=(
54*b4dd7d09SAndy Fiddaman	# public
55*b4dd7d09SAndy Fiddaman	typeset user_agent="ksh93/urlconnection_t"
56*b4dd7d09SAndy Fiddaman
57*b4dd7d09SAndy Fiddaman	# private variables
58*b4dd7d09SAndy Fiddaman	typeset protocol
59*b4dd7d09SAndy Fiddaman	typeset path1
60*b4dd7d09SAndy Fiddaman	typeset host
61*b4dd7d09SAndy Fiddaman	typeset path
62*b4dd7d09SAndy Fiddaman	typeset port
63*b4dd7d09SAndy Fiddaman
64*b4dd7d09SAndy Fiddaman	compound netfd=(
65*b4dd7d09SAndy Fiddaman		integer in=-1  # incoming traffic
66*b4dd7d09SAndy Fiddaman		integer out=-1 # outgoing traffic
67*b4dd7d09SAndy Fiddaman	)
68*b4dd7d09SAndy Fiddaman
69*b4dd7d09SAndy Fiddaman	# only used for https
70*b4dd7d09SAndy Fiddaman	compound ssl=(
71*b4dd7d09SAndy Fiddaman		compound fifo=(
72*b4dd7d09SAndy Fiddaman			typeset dir=""
73*b4dd7d09SAndy Fiddaman			typeset in=""
74*b4dd7d09SAndy Fiddaman			typeset out=""
75*b4dd7d09SAndy Fiddaman		)
76*b4dd7d09SAndy Fiddaman		integer openssl_client_pid=-1
77*b4dd7d09SAndy Fiddaman	)
78*b4dd7d09SAndy Fiddaman
79*b4dd7d09SAndy Fiddaman	# parse HTTP return code, cookies etc.
80*b4dd7d09SAndy Fiddaman	function parse_http_response
81*b4dd7d09SAndy Fiddaman	{
82*b4dd7d09SAndy Fiddaman		nameref response="$1"
83*b4dd7d09SAndy Fiddaman		typeset h statuscode statusmsg i
84*b4dd7d09SAndy Fiddaman
85*b4dd7d09SAndy Fiddaman		# we use '\r' as additional IFS to filter the final '\r'
86*b4dd7d09SAndy Fiddaman		IFS=$' \t\r' read -r h statuscode statusmsg # read HTTP/1.[01] <code>
87*b4dd7d09SAndy Fiddaman		[[ "$h" != ~(Eil)HTTP/.* ]]         && { print -u2 -f $"%s: HTTP/ header missing\n" "$0" ; return 1 ; }
88*b4dd7d09SAndy Fiddaman		[[ "$statuscode" != ~(Elr)[0-9]* ]] && { print -u2 -f $"%s: invalid status code\n"  "$0" ; return 1 ; }
89*b4dd7d09SAndy Fiddaman		response.statuscode="$statuscode"
90*b4dd7d09SAndy Fiddaman		response.statusmsg="$statusmsg"
91*b4dd7d09SAndy Fiddaman
92*b4dd7d09SAndy Fiddaman		# skip remaining headers
93*b4dd7d09SAndy Fiddaman		while IFS='' read -r i ; do
94*b4dd7d09SAndy Fiddaman			[[ "$i" == $'\r' ]] && break
95*b4dd7d09SAndy Fiddaman
96*b4dd7d09SAndy Fiddaman			# strip '\r' at the end
97*b4dd7d09SAndy Fiddaman			i="${i/~(Er)$'\r'/}"
98*b4dd7d09SAndy Fiddaman
99*b4dd7d09SAndy Fiddaman			case "$i" in
100*b4dd7d09SAndy Fiddaman				~(Eli)Content-Type:.*)
101*b4dd7d09SAndy Fiddaman					response.content_type="${i/~(El).*:[[:blank:]]*/}"
102*b4dd7d09SAndy Fiddaman					;;
103*b4dd7d09SAndy Fiddaman				~(Eli)Content-Length:[[:blank:]]*[0-9]*)
104*b4dd7d09SAndy Fiddaman					integer response.content_length="${i/~(El).*:[[:blank:]]*/}"
105*b4dd7d09SAndy Fiddaman					;;
106*b4dd7d09SAndy Fiddaman				~(Eli)Transfer-Encoding:.*)
107*b4dd7d09SAndy Fiddaman					response.transfer_encoding="${i/~(El).*:[[:blank:]]*/}"
108*b4dd7d09SAndy Fiddaman					;;
109*b4dd7d09SAndy Fiddaman			esac
110*b4dd7d09SAndy Fiddaman		done
111*b4dd7d09SAndy Fiddaman
112*b4dd7d09SAndy Fiddaman		return 0
113*b4dd7d09SAndy Fiddaman	}
114*b4dd7d09SAndy Fiddaman
115*b4dd7d09SAndy Fiddaman	function cat_http_body
116*b4dd7d09SAndy Fiddaman	{
117*b4dd7d09SAndy Fiddaman		typeset emode="$1"
118*b4dd7d09SAndy Fiddaman		typeset hexchunksize="0"
119*b4dd7d09SAndy Fiddaman		integer chunksize=0
120*b4dd7d09SAndy Fiddaman
121*b4dd7d09SAndy Fiddaman		if [[ "${emode}" == "chunked" ]] ; then
122*b4dd7d09SAndy Fiddaman			while IFS=$'\n' read hexchunksize ; do
123*b4dd7d09SAndy Fiddaman				hexchunksize="${hexchunksize//$'\r'/}"
124*b4dd7d09SAndy Fiddaman				[[ "${hexchunksize}" != "" ]] || continue
125*b4dd7d09SAndy Fiddaman				[[ "${hexchunksize}" == ~(Elri)[0-9abcdef]+ ]] || break
126*b4dd7d09SAndy Fiddaman				(( chunksize=$( printf "16#%s\n" "${hexchunksize}" )  ))
127*b4dd7d09SAndy Fiddaman				(( chunksize > 0 )) || break
128*b4dd7d09SAndy Fiddaman				dd bs=1 count="${chunksize}" 2>/dev/null
129*b4dd7d09SAndy Fiddaman			done
130*b4dd7d09SAndy Fiddaman		else
131*b4dd7d09SAndy Fiddaman			cat
132*b4dd7d09SAndy Fiddaman		fi
133*b4dd7d09SAndy Fiddaman
134*b4dd7d09SAndy Fiddaman		return 0
135*b4dd7d09SAndy Fiddaman	}
136*b4dd7d09SAndy Fiddaman
137*b4dd7d09SAndy Fiddaman	function init_url
138*b4dd7d09SAndy Fiddaman	{
139*b4dd7d09SAndy Fiddaman		_.protocol="${1%://*}"
140*b4dd7d09SAndy Fiddaman		_.path1="${1#*://}" # "http://foo.bat.net/x/y.html" ----> "foo.bat.net/x/y.html"
141*b4dd7d09SAndy Fiddaman
142*b4dd7d09SAndy Fiddaman		if  [[ "${_.protocol}" == ~(Elr)http(|s) ]] ; then
143*b4dd7d09SAndy Fiddaman			_.host="${_.path1%%/*}"
144*b4dd7d09SAndy Fiddaman			_.path="${_.path1#*/}"
145*b4dd7d09SAndy Fiddaman			_.port="${_.host##*:}"
146*b4dd7d09SAndy Fiddaman		fi
147*b4dd7d09SAndy Fiddaman
148*b4dd7d09SAndy Fiddaman		return 0
149*b4dd7d09SAndy Fiddaman	}
150*b4dd7d09SAndy Fiddaman
151*b4dd7d09SAndy Fiddaman	# close connection
152*b4dd7d09SAndy Fiddaman	function close_connection
153*b4dd7d09SAndy Fiddaman	{
154*b4dd7d09SAndy Fiddaman		integer ret
155*b4dd7d09SAndy Fiddaman
156*b4dd7d09SAndy Fiddaman		if (( _.netfd.in != -1 )) ; then
157*b4dd7d09SAndy Fiddaman			redirect {_.netfd.in}<&-
158*b4dd7d09SAndy Fiddaman			(( _.netfd.in=-1 ))
159*b4dd7d09SAndy Fiddaman		fi
160*b4dd7d09SAndy Fiddaman
161*b4dd7d09SAndy Fiddaman		if (( _.netfd.in != _.netfd.out && _.netfd.out != -1 )) ; then
162*b4dd7d09SAndy Fiddaman			redirect {_.netfd.out}<&-
163*b4dd7d09SAndy Fiddaman			((  _.netfd.out=-1 ))
164*b4dd7d09SAndy Fiddaman		fi
165*b4dd7d09SAndy Fiddaman
166*b4dd7d09SAndy Fiddaman		if [[ "${_.protocol}" == "https" ]] ; then
167*b4dd7d09SAndy Fiddaman			wait ${_.ssl.openssl_client_pid} || { print -u2 -f "%s: openssl failed.\n" ; return 1 ; }
168*b4dd7d09SAndy Fiddaman			(( _.ssl.openssl_client_pid=-1 ))
169*b4dd7d09SAndy Fiddaman
170*b4dd7d09SAndy Fiddaman			rm -r \"${_.ssl.fifo.dir}\"
171*b4dd7d09SAndy Fiddaman			_.ssl.fifo.dir=""
172*b4dd7d09SAndy Fiddaman		fi
173*b4dd7d09SAndy Fiddaman
174*b4dd7d09SAndy Fiddaman		return 0
175*b4dd7d09SAndy Fiddaman	}
176*b4dd7d09SAndy Fiddaman
177*b4dd7d09SAndy Fiddaman	function open_connection
178*b4dd7d09SAndy Fiddaman	{
179*b4dd7d09SAndy Fiddaman		if [[ "${_.protocol}" == "https" ]] ; then
180*b4dd7d09SAndy Fiddaman			_.ssl.fifo.dir="$(mktemp -t -d)"
181*b4dd7d09SAndy Fiddaman			_.ssl.fifo.in="${_.ssl.fifo.dir}/in"
182*b4dd7d09SAndy Fiddaman			_.ssl.fifo.out="${_.ssl.fifo.dir}/out"
183*b4dd7d09SAndy Fiddaman
184*b4dd7d09SAndy Fiddaman			# Use "errexit" to leave it at the first error
185*b4dd7d09SAndy Fiddaman			# (this saves lots of if/fi tests for error checking)
186*b4dd7d09SAndy Fiddaman			set -o errexit
187*b4dd7d09SAndy Fiddaman
188*b4dd7d09SAndy Fiddaman			mkfifo "${_.ssl.fifo.in}" "${_.ssl.fifo.out}"
189*b4dd7d09SAndy Fiddaman
190*b4dd7d09SAndy Fiddaman			# create async openssl child to handle https
191*b4dd7d09SAndy Fiddaman			openssl s_client -quiet -connect "${_.host}:${_.port}" <"${_.ssl.fifo.in}" >>"${_.ssl.fifo.out}" &
192*b4dd7d09SAndy Fiddaman
193*b4dd7d09SAndy Fiddaman			_.ssl.openssl_client_pid=$!
194*b4dd7d09SAndy Fiddaman		else
195*b4dd7d09SAndy Fiddaman			redirect {_.netfd.in}<> "/dev/tcp/${_.host}/${_.port}"
196*b4dd7d09SAndy Fiddaman			(( $? != 0 )) && { print -u2 -f "%s: Could not open %s\n" "$0" "${1}" ; return 1 ; }
197*b4dd7d09SAndy Fiddaman			(( _.netfd.out=_.netfd.in ))
198*b4dd7d09SAndy Fiddaman		fi
199*b4dd7d09SAndy Fiddaman		return 0
200*b4dd7d09SAndy Fiddaman	}
201*b4dd7d09SAndy Fiddaman
202*b4dd7d09SAndy Fiddaman	function send_request
203*b4dd7d09SAndy Fiddaman	{
204*b4dd7d09SAndy Fiddaman		typeset request="$1"
205*b4dd7d09SAndy Fiddaman
206*b4dd7d09SAndy Fiddaman		set -o errexit
207*b4dd7d09SAndy Fiddaman
208*b4dd7d09SAndy Fiddaman		if [[ "${_.protocol}" == "https" ]] ; then
209*b4dd7d09SAndy Fiddaman				print -n -- "${request}\r\n" >>	"${_.ssl.fifo.in}"
210*b4dd7d09SAndy Fiddaman
211*b4dd7d09SAndy Fiddaman				redirect {_.netfd.in}< "${_.ssl.fifo.out}"
212*b4dd7d09SAndy Fiddaman		else
213*b4dd7d09SAndy Fiddaman				print -n -- "${request}\r\n" >&${_.netfd.out}
214*b4dd7d09SAndy Fiddaman		fi
215*b4dd7d09SAndy Fiddaman		return 0
216*b4dd7d09SAndy Fiddaman	}
217*b4dd7d09SAndy Fiddaman
218*b4dd7d09SAndy Fiddaman	function cat_url
219*b4dd7d09SAndy Fiddaman	{
220*b4dd7d09SAndy Fiddaman		if [[ "${_.protocol}" == "file" ]] ; then
221*b4dd7d09SAndy Fiddaman			cat "${_.path1}"
222*b4dd7d09SAndy Fiddaman			return $?
223*b4dd7d09SAndy Fiddaman		elif [[ "${_.protocol}" == ~(Elr)http(|s) ]] ; then
224*b4dd7d09SAndy Fiddaman			compound httpresponse # http response
225*b4dd7d09SAndy Fiddaman
226*b4dd7d09SAndy Fiddaman			# If URL did not contain a port number in the host part then look at the
227*b4dd7d09SAndy Fiddaman			# protocol to get the port number
228*b4dd7d09SAndy Fiddaman			if [[ "${_.port}" == "${_.host}" ]] ; then
229*b4dd7d09SAndy Fiddaman				case "${_.protocol}" in
230*b4dd7d09SAndy Fiddaman					"http")  _.port=80 ;;
231*b4dd7d09SAndy Fiddaman					"https") _.port=443 ;;
232*b4dd7d09SAndy Fiddaman					*)       _.port="$(getent services "${_.protocol}" | sed 's/[^0-9]*//;s/\/.*//')" ;;
233*b4dd7d09SAndy Fiddaman				esac
234*b4dd7d09SAndy Fiddaman			else
235*b4dd7d09SAndy Fiddaman				_.host="${_.host%:*}"
236*b4dd7d09SAndy Fiddaman			fi
237*b4dd7d09SAndy Fiddaman
238*b4dd7d09SAndy Fiddaman			printmsg "protocol=${_.protocol} port=${_.port} host=${_.host} path=${_.path}"
239*b4dd7d09SAndy Fiddaman
240*b4dd7d09SAndy Fiddaman			# prechecks
241*b4dd7d09SAndy Fiddaman			[[ "${_.protocol}" != "" ]] || { print -u2 -f "%s: protocol not set.\n" "$0" ; return 1 ; }
242*b4dd7d09SAndy Fiddaman			[[ "${_.port}"     != "" ]] || { print -u2 -f "%s: port not set.\n"     "$0" ; return 1 ; }
243*b4dd7d09SAndy Fiddaman			[[ "${_.host}"     != "" ]] || { print -u2 -f "%s: host not set.\n"     "$0" ; return 1 ; }
244*b4dd7d09SAndy Fiddaman			[[ "${_.path}"     != "" ]] || { print -u2 -f "%s: path not set.\n"     "$0" ; return 1 ; }
245*b4dd7d09SAndy Fiddaman
246*b4dd7d09SAndy Fiddaman			_.open_connection || return 1
247*b4dd7d09SAndy Fiddaman
248*b4dd7d09SAndy Fiddaman			# send HTTP request
249*b4dd7d09SAndy Fiddaman			request="GET /${_.path} HTTP/1.1\r\n"
250*b4dd7d09SAndy Fiddaman			request+="Host: ${_.host}\r\n"
251*b4dd7d09SAndy Fiddaman			request+="User-Agent: ${_.user_agent}\r\n"
252*b4dd7d09SAndy Fiddaman			request+="Connection: close\r\n"
253*b4dd7d09SAndy Fiddaman			_.send_request "${request}\r\n"
254*b4dd7d09SAndy Fiddaman
255*b4dd7d09SAndy Fiddaman			# collect response and send it to stdout
256*b4dd7d09SAndy Fiddaman			{
257*b4dd7d09SAndy Fiddaman				_.parse_http_response httpresponse
258*b4dd7d09SAndy Fiddaman				_.cat_http_body "${httpresponse.transfer_encoding}"
259*b4dd7d09SAndy Fiddaman			} <&${_.netfd.in}
260*b4dd7d09SAndy Fiddaman
261*b4dd7d09SAndy Fiddaman			_.close_connection
262*b4dd7d09SAndy Fiddaman
263*b4dd7d09SAndy Fiddaman			return 0
264*b4dd7d09SAndy Fiddaman		else
265*b4dd7d09SAndy Fiddaman			return 1
266*b4dd7d09SAndy Fiddaman		fi
267*b4dd7d09SAndy Fiddaman		# notreached
268*b4dd7d09SAndy Fiddaman	}
269*b4dd7d09SAndy Fiddaman)
270*b4dd7d09SAndy Fiddaman
271*b4dd7d09SAndy Fiddamanfunction html_entity_to_ascii
272*b4dd7d09SAndy Fiddaman{
273*b4dd7d09SAndy Fiddaman	typeset buf
274*b4dd7d09SAndy Fiddaman	typeset entity
275*b4dd7d09SAndy Fiddaman	typeset c
276*b4dd7d09SAndy Fiddaman	typeset value
277*b4dd7d09SAndy Fiddaman
278*b4dd7d09SAndy Fiddaman	# Todo: Add more HTML/MathML entities here
279*b4dd7d09SAndy Fiddaman	# Note we use a static variable (typeset -S) here to make sure we
280*b4dd7d09SAndy Fiddaman	# don't loose the cache data between calls
281*b4dd7d09SAndy Fiddaman	typeset -S -A entity_cache=(
282*b4dd7d09SAndy Fiddaman		# entity to ascii (fixme: add UTF-8 transliterations)
283*b4dd7d09SAndy Fiddaman		["nbsp"]=' '
284*b4dd7d09SAndy Fiddaman		["lt"]='<'
285*b4dd7d09SAndy Fiddaman		["le"]='<='
286*b4dd7d09SAndy Fiddaman		["gt"]='>'
287*b4dd7d09SAndy Fiddaman		["ge"]='>='
288*b4dd7d09SAndy Fiddaman		["amp"]='&'
289*b4dd7d09SAndy Fiddaman		["quot"]='"'
290*b4dd7d09SAndy Fiddaman		["apos"]="'"
291*b4dd7d09SAndy Fiddaman	)
292*b4dd7d09SAndy Fiddaman
293*b4dd7d09SAndy Fiddaman	buf=""
294*b4dd7d09SAndy Fiddaman	while IFS='' read -r -N 1 c ; do
295*b4dd7d09SAndy Fiddaman		if [[ "$c" != "&" ]] ; then
296*b4dd7d09SAndy Fiddaman			print -n -r -- "${c}"
297*b4dd7d09SAndy Fiddaman			continue
298*b4dd7d09SAndy Fiddaman		fi
299*b4dd7d09SAndy Fiddaman
300*b4dd7d09SAndy Fiddaman		entity=""
301*b4dd7d09SAndy Fiddaman		while IFS='' read -r -N 1 c ; do
302*b4dd7d09SAndy Fiddaman			case "$c" in
303*b4dd7d09SAndy Fiddaman				";")
304*b4dd7d09SAndy Fiddaman				break
305*b4dd7d09SAndy Fiddaman				;;
306*b4dd7d09SAndy Fiddaman			~(Eilr)[a-z0-9#])
307*b4dd7d09SAndy Fiddaman				entity+="$c"
308*b4dd7d09SAndy Fiddaman				continue
309*b4dd7d09SAndy Fiddaman				;;
310*b4dd7d09SAndy Fiddaman			*)
311*b4dd7d09SAndy Fiddaman#				debugmsg "error &${entity}${c}#"
312*b4dd7d09SAndy Fiddaman
313*b4dd7d09SAndy Fiddaman				print -n -r -- "${entity}${c}"
314*b4dd7d09SAndy Fiddaman				entity=""
315*b4dd7d09SAndy Fiddaman				continue 2
316*b4dd7d09SAndy Fiddaman				;;
317*b4dd7d09SAndy Fiddaman			esac
318*b4dd7d09SAndy Fiddaman		done
319*b4dd7d09SAndy Fiddaman
320*b4dd7d09SAndy Fiddaman		value=""
321*b4dd7d09SAndy Fiddaman		if [[ "${entity_cache["${entity}"]}" != "" ]] ; then
322*b4dd7d09SAndy Fiddaman#			debugmsg "match #${entity}# = #${entity_cache["${entity}"]}#"
323*b4dd7d09SAndy Fiddaman			value="${entity_cache["${entity}"]}"
324*b4dd7d09SAndy Fiddaman		else
325*b4dd7d09SAndy Fiddaman			if [[ "${entity:0:1}" == "#" ]] ; then
326*b4dd7d09SAndy Fiddaman				# decimal literal
327*b4dd7d09SAndy Fiddaman				value="${ printf "\u[${ printf "%x" "${entity:1:8}" ; }]" ; }"
328*b4dd7d09SAndy Fiddaman			elif [[ "${entity:0:7}" == ~(Eilr)[0-9a-f]* ]] ; then
329*b4dd7d09SAndy Fiddaman				# hexadecimal literal
330*b4dd7d09SAndy Fiddaman				value="${ printf "\u[${entity:0:7}]" ; }"
331*b4dd7d09SAndy Fiddaman			else
332*b4dd7d09SAndy Fiddaman				# unknown literal - pass-through
333*b4dd7d09SAndy Fiddaman				value="ENT=|${entity}|"
334*b4dd7d09SAndy Fiddaman			fi
335*b4dd7d09SAndy Fiddaman
336*b4dd7d09SAndy Fiddaman			entity_cache["${entity}"]="${value}"
337*b4dd7d09SAndy Fiddaman
338*b4dd7d09SAndy Fiddaman#			debugmsg "lookup #${entity}# = #${entity_cache["${entity}"]}#"
339*b4dd7d09SAndy Fiddaman		fi
340*b4dd7d09SAndy Fiddaman
341*b4dd7d09SAndy Fiddaman		printf "%s" "${value}"
342*b4dd7d09SAndy Fiddaman	done
343*b4dd7d09SAndy Fiddaman
344*b4dd7d09SAndy Fiddaman	return 0
345*b4dd7d09SAndy Fiddaman}
346*b4dd7d09SAndy Fiddaman
347*b4dd7d09SAndy Fiddaman# dumb xhtml handler - no CSS,  tables, images, iframes or nested
348*b4dd7d09SAndy Fiddaman# structures are supported (and we assume that the input is correct
349*b4dd7d09SAndy Fiddaman# xhtml). The code was written in a trial&&error manner and should be
350*b4dd7d09SAndy Fiddaman# rewritten to parse xhtml correctly.
351*b4dd7d09SAndy Fiddamanfunction handle_html
352*b4dd7d09SAndy Fiddaman{
353*b4dd7d09SAndy Fiddaman    # we can't use global variables here when multiple callbacks use the same
354*b4dd7d09SAndy Fiddaman    # callback function - but we can use the callback associative array for
355*b4dd7d09SAndy Fiddaman    # variable storage instead
356*b4dd7d09SAndy Fiddaman    nameref callbacks=${1}
357*b4dd7d09SAndy Fiddaman    typeset tag_type="$2"
358*b4dd7d09SAndy Fiddaman    typeset tag_value="$3"
359*b4dd7d09SAndy Fiddaman
360*b4dd7d09SAndy Fiddaman    case "${tag_type}" in
361*b4dd7d09SAndy Fiddaman        tag_begin)
362*b4dd7d09SAndy Fiddaman            case "${tag_value}" in
363*b4dd7d09SAndy Fiddaman                br) printf "\n" ;;
364*b4dd7d09SAndy Fiddaman                hr) printf "\n-------------------------------------\n" ;;
365*b4dd7d09SAndy Fiddaman                pre) callbacks["html_pre"]='true' ;;
366*b4dd7d09SAndy Fiddaman                p)  printf "\n" ;;
367*b4dd7d09SAndy Fiddaman            esac
368*b4dd7d09SAndy Fiddaman            ;;
369*b4dd7d09SAndy Fiddaman
370*b4dd7d09SAndy Fiddaman        tag_end)
371*b4dd7d09SAndy Fiddaman            case "${tag_value}" in
372*b4dd7d09SAndy Fiddaman                pre) callbacks["html_pre"]='false' ;;
373*b4dd7d09SAndy Fiddaman            esac
374*b4dd7d09SAndy Fiddaman            ;;
375*b4dd7d09SAndy Fiddaman
376*b4dd7d09SAndy Fiddaman        tag_text)
377*b4dd7d09SAndy Fiddaman            if ${callbacks["html_pre"]} ; then
378*b4dd7d09SAndy Fiddaman                printf "%s" "${tag_value}"
379*b4dd7d09SAndy Fiddaman            else
380*b4dd7d09SAndy Fiddaman                # compress spaces/newlines/tabs/etc.
381*b4dd7d09SAndy Fiddaman                printf "%s" "${tag_value//+([\n\r\t\v[:space:][:blank:]])/ }"
382*b4dd7d09SAndy Fiddaman            fi
383*b4dd7d09SAndy Fiddaman            ;;
384*b4dd7d09SAndy Fiddaman
385*b4dd7d09SAndy Fiddaman        document_start)
386*b4dd7d09SAndy Fiddaman            callbacks["html_pre"]='false'
387*b4dd7d09SAndy Fiddaman            ;;
388*b4dd7d09SAndy Fiddaman        document_end) ;;
389*b4dd7d09SAndy Fiddaman    esac
390*b4dd7d09SAndy Fiddaman
391*b4dd7d09SAndy Fiddaman    return 0
392*b4dd7d09SAndy Fiddaman}
393*b4dd7d09SAndy Fiddaman
394*b4dd7d09SAndy Fiddamanfunction handle_rss
395*b4dd7d09SAndy Fiddaman{
396*b4dd7d09SAndy Fiddaman	# we can't use global variables here when multiple callbacks use the same
397*b4dd7d09SAndy Fiddaman	# callback function - but we can use the callback associative array for
398*b4dd7d09SAndy Fiddaman	# variable storage instead
399*b4dd7d09SAndy Fiddaman	nameref callbacks=${1}
400*b4dd7d09SAndy Fiddaman	typeset tag_type="$2"
401*b4dd7d09SAndy Fiddaman	typeset tag_value="$3"
402*b4dd7d09SAndy Fiddaman
403*b4dd7d09SAndy Fiddaman	case "${tag_type}" in
404*b4dd7d09SAndy Fiddaman		tag_begin)
405*b4dd7d09SAndy Fiddaman			case "${tag_value}" in
406*b4dd7d09SAndy Fiddaman				item)
407*b4dd7d09SAndy Fiddaman					item["title"]=""
408*b4dd7d09SAndy Fiddaman					item["link"]=""
409*b4dd7d09SAndy Fiddaman					item["tag"]=""
410*b4dd7d09SAndy Fiddaman					item["description"]=""
411*b4dd7d09SAndy Fiddaman					;;
412*b4dd7d09SAndy Fiddaman			esac
413*b4dd7d09SAndy Fiddaman			callbacks["textbuf"]=""
414*b4dd7d09SAndy Fiddaman			;;
415*b4dd7d09SAndy Fiddaman		tag_end)
416*b4dd7d09SAndy Fiddaman			case "${tag_value}" in
417*b4dd7d09SAndy Fiddaman				item)
418*b4dd7d09SAndy Fiddaman					# note that each RSS item needs to be converted seperately from RSS to HTML to plain text
419*b4dd7d09SAndy Fiddaman					# to make sure that the state of one RSS item doesn't affect others
420*b4dd7d09SAndy Fiddaman					(
421*b4dd7d09SAndy Fiddaman						printf $"<br />#### RSS item: title: %s ####" "${item["title"]}"
422*b4dd7d09SAndy Fiddaman						printf $"<br />## author: %s" "${item["author"]}"
423*b4dd7d09SAndy Fiddaman						printf $"<br />## link:   %s" "${item["link"]}"
424*b4dd7d09SAndy Fiddaman						printf $"<br />## date:   %s" "${item["pubDate"]}"
425*b4dd7d09SAndy Fiddaman						printf $"<br />## begin description:"
426*b4dd7d09SAndy Fiddaman						printf $"<br />%s<br />" "${item["description"]}"
427*b4dd7d09SAndy Fiddaman						printf $"<br />## end description<br />"
428*b4dd7d09SAndy Fiddaman						print # extra newline to make sure the sed pipeline gets flushed
429*b4dd7d09SAndy Fiddaman					) |
430*b4dd7d09SAndy Fiddaman						html_entity_to_ascii |	# convert XML entities (e.g. decode RSS content to HTML code)
431*b4dd7d09SAndy Fiddaman						xml_tok "xhtmltok_cb" |	# convert HTML to plain text
432*b4dd7d09SAndy Fiddaman						html_entity_to_ascii	# convert HTML entities
433*b4dd7d09SAndy Fiddaman					;;
434*b4dd7d09SAndy Fiddaman				title)                item["title"]="${callbacks["textbuf"]}"        ; callbacks["textbuf"]="" ;;
435*b4dd7d09SAndy Fiddaman				link)                 item["link"]="${callbacks["textbuf"]}"         ; callbacks["textbuf"]="" ;;
436*b4dd7d09SAndy Fiddaman				dc:creator | author)  item["author"]="${callbacks["textbuf"]}"       ; callbacks["textbuf"]="" ;;
437*b4dd7d09SAndy Fiddaman				dc:date | pubDate)    item["pubDate"]="${callbacks["textbuf"]}"      ; callbacks["textbuf"]="" ;;
438*b4dd7d09SAndy Fiddaman				description)          item["description"]="${callbacks["textbuf"]}"  ; callbacks["textbuf"]="" ;;
439*b4dd7d09SAndy Fiddaman			esac
440*b4dd7d09SAndy Fiddaman			callbacks["textbuf"]=""
441*b4dd7d09SAndy Fiddaman			;;
442*b4dd7d09SAndy Fiddaman		tag_text)
443*b4dd7d09SAndy Fiddaman			callbacks["textbuf"]+="${tag_value}"
444*b4dd7d09SAndy Fiddaman			;;
445*b4dd7d09SAndy Fiddaman		document_start) ;;
446*b4dd7d09SAndy Fiddaman		document_end) ;;
447*b4dd7d09SAndy Fiddaman	esac
448*b4dd7d09SAndy Fiddaman	return 0
449*b4dd7d09SAndy Fiddaman}
450*b4dd7d09SAndy Fiddaman
451*b4dd7d09SAndy Fiddamanfunction xml_tok
452*b4dd7d09SAndy Fiddaman{
453*b4dd7d09SAndy Fiddaman    typeset buf=""
454*b4dd7d09SAndy Fiddaman    typeset namebuf=""
455*b4dd7d09SAndy Fiddaman    typeset attrbuf=""
456*b4dd7d09SAndy Fiddaman    typeset c=""
457*b4dd7d09SAndy Fiddaman    typeset isendtag # bool: true/false
458*b4dd7d09SAndy Fiddaman    typeset issingletag # bool: true/false (used for tags like "<br />")
459*b4dd7d09SAndy Fiddaman    nameref callbacks=${1}
460*b4dd7d09SAndy Fiddaman
461*b4dd7d09SAndy Fiddaman    [[ ! -z "${callbacks["document_start"]}" ]] && ${callbacks["document_start"]} "${1}" "document_start"
462*b4dd7d09SAndy Fiddaman
463*b4dd7d09SAndy Fiddaman    while IFS='' read -r -N 1 c ; do
464*b4dd7d09SAndy Fiddaman        isendtag=false
465*b4dd7d09SAndy Fiddaman
466*b4dd7d09SAndy Fiddaman        if [[ "$c" == "<" ]] ; then
467*b4dd7d09SAndy Fiddaman	    # flush any text content
468*b4dd7d09SAndy Fiddaman            if [[ "$buf" != "" ]] ; then
469*b4dd7d09SAndy Fiddaman                [[ ! -z "${callbacks["tag_text"]}" ]] && ${callbacks["tag_text"]} "${1}" "tag_text" "$buf"
470*b4dd7d09SAndy Fiddaman                buf=""
471*b4dd7d09SAndy Fiddaman            fi
472*b4dd7d09SAndy Fiddaman
473*b4dd7d09SAndy Fiddaman            IFS='' read -r -N 1 c
474*b4dd7d09SAndy Fiddaman            if [[ "$c" == "/" ]] ; then
475*b4dd7d09SAndy Fiddaman                isendtag=true
476*b4dd7d09SAndy Fiddaman            else
477*b4dd7d09SAndy Fiddaman                buf="$c"
478*b4dd7d09SAndy Fiddaman            fi
479*b4dd7d09SAndy Fiddaman            IFS='' read -r -d '>' c
480*b4dd7d09SAndy Fiddaman            buf+="$c"
481*b4dd7d09SAndy Fiddaman
482*b4dd7d09SAndy Fiddaman	    # handle comments
483*b4dd7d09SAndy Fiddaman	    if [[ "$buf" == ~(El)!-- ]] ; then
484*b4dd7d09SAndy Fiddaman	        # did we read the comment completely ?
485*b4dd7d09SAndy Fiddaman	        if [[ "$buf" != ~(Elr)!--.*-- ]] ; then
486*b4dd7d09SAndy Fiddaman		    buf+=">"
487*b4dd7d09SAndy Fiddaman	            while [[ "$buf" != ~(Elr)!--.*-- ]] ; do
488*b4dd7d09SAndy Fiddaman		        IFS='' read -r -N 1 c || break
489*b4dd7d09SAndy Fiddaman		        buf+="$c"
490*b4dd7d09SAndy Fiddaman		    done
491*b4dd7d09SAndy Fiddaman		fi
492*b4dd7d09SAndy Fiddaman
493*b4dd7d09SAndy Fiddaman		[[ ! -z "${callbacks["tag_comment"]}" ]] && ${callbacks["tag_comment"]} "${1}" "tag_comment" "${buf:3:${#buf}-5}"
494*b4dd7d09SAndy Fiddaman		buf=""
495*b4dd7d09SAndy Fiddaman		continue
496*b4dd7d09SAndy Fiddaman	    fi
497*b4dd7d09SAndy Fiddaman
498*b4dd7d09SAndy Fiddaman	    # check if the tag starts and ends at the same time (like "<br />")
499*b4dd7d09SAndy Fiddaman	    if [[ "${buf}" == ~(Er).*/ ]] ; then
500*b4dd7d09SAndy Fiddaman	        issingletag=true
501*b4dd7d09SAndy Fiddaman		buf="${buf%*/}"
502*b4dd7d09SAndy Fiddaman	    else
503*b4dd7d09SAndy Fiddaman	        issingletag=false
504*b4dd7d09SAndy Fiddaman	    fi
505*b4dd7d09SAndy Fiddaman
506*b4dd7d09SAndy Fiddaman	    # check if the tag has attributes (e.g. space after name)
507*b4dd7d09SAndy Fiddaman	    if [[ "$buf" == ~(E)[[:space:][:blank:]] ]] ; then
508*b4dd7d09SAndy Fiddaman	        namebuf="${buf%%~(E)[[:space:][:blank:]].*}"
509*b4dd7d09SAndy Fiddaman                attrbuf="${buf#~(E).*[[:space:][:blank:]]}"
510*b4dd7d09SAndy Fiddaman            else
511*b4dd7d09SAndy Fiddaman	        namebuf="$buf"
512*b4dd7d09SAndy Fiddaman		attrbuf=""
513*b4dd7d09SAndy Fiddaman	    fi
514*b4dd7d09SAndy Fiddaman
515*b4dd7d09SAndy Fiddaman            if ${isendtag} ; then
516*b4dd7d09SAndy Fiddaman                [[ ! -z "${callbacks["tag_end"]}" ]] && ${callbacks["tag_end"]} "${1}" "tag_end" "$namebuf"
517*b4dd7d09SAndy Fiddaman            else
518*b4dd7d09SAndy Fiddaman                [[ ! -z "${callbacks["tag_begin"]}" ]] && ${callbacks["tag_begin"]} "${1}" "tag_begin" "$namebuf" "$attrbuf"
519*b4dd7d09SAndy Fiddaman
520*b4dd7d09SAndy Fiddaman                # handle tags like <br/> (which are start- and end-tag in one piece)
521*b4dd7d09SAndy Fiddaman                if ${issingletag} ; then
522*b4dd7d09SAndy Fiddaman                    [[ ! -z "${callbacks["tag_end"]}" ]] && ${callbacks["tag_end"]} "${1}" "tag_end" "$namebuf"
523*b4dd7d09SAndy Fiddaman                fi
524*b4dd7d09SAndy Fiddaman            fi
525*b4dd7d09SAndy Fiddaman            buf=""
526*b4dd7d09SAndy Fiddaman        else
527*b4dd7d09SAndy Fiddaman            buf+="$c"
528*b4dd7d09SAndy Fiddaman        fi
529*b4dd7d09SAndy Fiddaman    done
530*b4dd7d09SAndy Fiddaman
531*b4dd7d09SAndy Fiddaman    [[ ! -z "${callbacks["document_end"]}" ]] && ${callbacks["document_end"]} "${1}" "document_end" "exit_success"
532*b4dd7d09SAndy Fiddaman
533*b4dd7d09SAndy Fiddaman    print # final newline to make filters like "sed" happy
534*b4dd7d09SAndy Fiddaman}
535*b4dd7d09SAndy Fiddaman
536*b4dd7d09SAndy Fiddaman# return the value of LC_MESSAGES needed for subprocesses which
537*b4dd7d09SAndy Fiddaman# want to run in a different locale/encoding
538*b4dd7d09SAndy Fiddamanfunction get_lc_messages
539*b4dd7d09SAndy Fiddaman{
540*b4dd7d09SAndy Fiddaman	[[ "${LC_ALL}"       != "" ]] && { print "${LC_ALL}"      ; return 0 ; }
541*b4dd7d09SAndy Fiddaman	[[ "${LC_MESSAGES}"  != "" ]] && { print "${LC_MESSAGES}" ; return 0 ; }
542*b4dd7d09SAndy Fiddaman	[[ "${LANG}"         != "" ]] && { print "${LANG}"        ; return 0 ; }
543*b4dd7d09SAndy Fiddaman	print "C" ; return 0
544*b4dd7d09SAndy Fiddaman}
545*b4dd7d09SAndy Fiddaman
546*b4dd7d09SAndy Fiddamanfunction do_rssread
547*b4dd7d09SAndy Fiddaman{
548*b4dd7d09SAndy Fiddaman	# set unicode locale since RSS is encoded in UTF-8
549*b4dd7d09SAndy Fiddaman	# (and make sure $LC_MESSAGES is set to the parent
550*b4dd7d09SAndy Fiddaman	# process's locale that all error messages are using
551*b4dd7d09SAndy Fiddaman	# the callers locale/encoding)
552*b4dd7d09SAndy Fiddaman	export \
553*b4dd7d09SAndy Fiddaman		LC_MESSAGES="${ get_lc_messages ; }" \
554*b4dd7d09SAndy Fiddaman		LC_MONETARY="en_US.UTF-8" \
555*b4dd7d09SAndy Fiddaman		LC_NUMERIC="en_US.UTF-8" \
556*b4dd7d09SAndy Fiddaman		LC_COLLATE="en_US.UTF-8" \
557*b4dd7d09SAndy Fiddaman		LC_CTYPE="en_US.UTF-8" \
558*b4dd7d09SAndy Fiddaman		LC_TIME="en_US.UTF-8" \
559*b4dd7d09SAndy Fiddaman		LANG="en_US.UTF-8"
560*b4dd7d09SAndy Fiddaman
561*b4dd7d09SAndy Fiddaman	# return non-zero exit code for this function if the rss processing below fails
562*b4dd7d09SAndy Fiddaman	set -o errexit
563*b4dd7d09SAndy Fiddaman
564*b4dd7d09SAndy Fiddaman	urlconnection_t hc
565*b4dd7d09SAndy Fiddaman	hc.user_agent="rssread/ksh93(ssl) (2010-03-27; $(uname -s -r -p))"
566*b4dd7d09SAndy Fiddaman	hc.init_url "$1"
567*b4dd7d09SAndy Fiddaman
568*b4dd7d09SAndy Fiddaman	# need extra newline after cat_url to terminate line with $'\n'
569*b4dd7d09SAndy Fiddaman	# to make "xml_tok" happy
570*b4dd7d09SAndy Fiddaman	data="${ hc.cat_url ; print ; }"
571*b4dd7d09SAndy Fiddaman
572*b4dd7d09SAndy Fiddaman	print -u2 -f "# Got %d lines of RSS data, processing...\n" "${ wc -l <<< "${data}" ; }"
573*b4dd7d09SAndy Fiddaman
574*b4dd7d09SAndy Fiddaman	xml_tok "rsstok_cb" <<< "${data}"
575*b4dd7d09SAndy Fiddaman
576*b4dd7d09SAndy Fiddaman	return 0
577*b4dd7d09SAndy Fiddaman}
578*b4dd7d09SAndy Fiddaman
579*b4dd7d09SAndy Fiddamanfunction usage
580*b4dd7d09SAndy Fiddaman{
581*b4dd7d09SAndy Fiddaman	OPTIND=0
582*b4dd7d09SAndy Fiddaman	getopts -a "${progname}" "${rssread_usage}" OPT '-?'
583*b4dd7d09SAndy Fiddaman	exit 2
584*b4dd7d09SAndy Fiddaman}
585*b4dd7d09SAndy Fiddaman
586*b4dd7d09SAndy Fiddaman# make sure we use the ksh93 builtin versions
587*b4dd7d09SAndy Fiddamanbuiltin basename
588*b4dd7d09SAndy Fiddamanbuiltin cat
589*b4dd7d09SAndy Fiddamanbuiltin mkfifo
590*b4dd7d09SAndy Fiddaman
591*b4dd7d09SAndy Fiddamantypeset -A rsstok_cb # callbacks for xml_tok
592*b4dd7d09SAndy Fiddamanrsstok_cb["tag_begin"]="handle_rss"
593*b4dd7d09SAndy Fiddamanrsstok_cb["tag_end"]="handle_rss"
594*b4dd7d09SAndy Fiddamanrsstok_cb["tag_text"]="handle_rss"
595*b4dd7d09SAndy Fiddamanrsstok_cb["textbuf"]=""
596*b4dd7d09SAndy Fiddaman
597*b4dd7d09SAndy Fiddamantypeset -A xhtmltok_cb # callbacks for xml_tok
598*b4dd7d09SAndy Fiddamanxhtmltok_cb["tag_begin"]="handle_html"
599*b4dd7d09SAndy Fiddamanxhtmltok_cb["tag_end"]="handle_html"
600*b4dd7d09SAndy Fiddamanxhtmltok_cb["tag_text"]="handle_html"
601*b4dd7d09SAndy Fiddamanxhtmltok_cb["textbuf"]=""
602*b4dd7d09SAndy Fiddamanxhtmltok_cb["html_pre"]='false'
603*b4dd7d09SAndy Fiddaman
604*b4dd7d09SAndy Fiddamantypeset -A item
605*b4dd7d09SAndy Fiddaman
606*b4dd7d09SAndy Fiddamantypeset -A bookmark_urls
607*b4dd7d09SAndy Fiddaman
608*b4dd7d09SAndy Fiddaman# "ramdom" urls for testing
609*b4dd7d09SAndy Fiddamanbookmark_urls=(
610*b4dd7d09SAndy Fiddaman	["google_blogs_ksh"]="http://blogsearch.google.com/blogsearch_feeds?hl=en&scoring=d&q=(%22ksh93%22%7C%22ksh+93%22+%7C+%22korn93%22+%7C+%22korn+93%22)&ie=utf-8&num=100&output=rss"
611*b4dd7d09SAndy Fiddaman	# some Sun staff/sites
612*b4dd7d09SAndy Fiddaman	["blogs_sun_com"]="http://blogs.sun.com/main/feed/entries/rss"
613*b4dd7d09SAndy Fiddaman	["bigadmin"]="http://www.sun.com/bigadmin/content/rss/motd.xml"
614*b4dd7d09SAndy Fiddaman	["bigadmin_scripts"]="https://www.sun.com/bigadmin/content/rss/scripts.xml"
615*b4dd7d09SAndy Fiddaman	["jmcp"]="http://www.jmcp.homeunix.com/roller/jmcp/feed/entries/rss"
616*b4dd7d09SAndy Fiddaman	["katakai"]="http://blogs.sun.com/katakai/feed/entries/rss"
617*b4dd7d09SAndy Fiddaman	["alanc"]="http://blogs.sun.com/alanc/feed/entries/rss"
618*b4dd7d09SAndy Fiddaman	["planetsun"]="http://www.planetsun.org/rss20.xml"
619*b4dd7d09SAndy Fiddaman	["planetsolaris"]="http://www.planetsolaris.org/rss20.xml"
620*b4dd7d09SAndy Fiddaman	["planetopensolaris"]="http://planet.opensolaris.org/rss20.xml"
621*b4dd7d09SAndy Fiddaman	["theregister_uk"]="http://www.theregister.co.uk/headlines.rss"
622*b4dd7d09SAndy Fiddaman	["heise"]="http://www.heise.de/newsticker/heise.rdf"
623*b4dd7d09SAndy Fiddaman	["slashdot"]="http://rss.slashdot.org/Slashdot/slashdot"
624*b4dd7d09SAndy Fiddaman	["wikipedia_command_shells"]="http://en.wikipedia.org/w/index.php?title=Comparison_of_command_shells&feed=rss&action=history"
625*b4dd7d09SAndy Fiddaman)
626*b4dd7d09SAndy Fiddaman
627*b4dd7d09SAndy Fiddamantypeset progname="${ basename "${0}" ; }"
628*b4dd7d09SAndy Fiddaman
629*b4dd7d09SAndy Fiddamantypeset -r rssread_usage=$'+
630*b4dd7d09SAndy Fiddaman[-?\n@(#)\$Id: rssread (Roland Mainz) 2010-03-27 \$\n]
631*b4dd7d09SAndy Fiddaman[-author?Roland Mainz <roland.mainz@sun.com>]
632*b4dd7d09SAndy Fiddaman[-author?Roland Mainz <roland.mainz@nrubsig.org>]
633*b4dd7d09SAndy Fiddaman[+NAME?rssread - fetch RSS messages and convert them to plain text]
634*b4dd7d09SAndy Fiddaman[+DESCRIPTION?\brssread\b RSS to plain text converter
635*b4dd7d09SAndy Fiddaman        which fetches RSS streams via HTTP and converts them from
636*b4dd7d09SAndy Fiddaman	RSS to HTML to plain text in the current locale/encoding.]
637*b4dd7d09SAndy Fiddaman[I:noiconv?Do not convert data from UTF-8 to current locale/encoding.]
638*b4dd7d09SAndy Fiddaman
639*b4dd7d09SAndy Fiddaman[ url ]
640*b4dd7d09SAndy Fiddaman
641*b4dd7d09SAndy Fiddaman[+SEE ALSO?\bksh93\b(1), \bshnote\b(1)]
642*b4dd7d09SAndy Fiddaman'
643*b4dd7d09SAndy Fiddaman
644*b4dd7d09SAndy Fiddamantypeset noiconv=false
645*b4dd7d09SAndy Fiddaman
646*b4dd7d09SAndy Fiddamanwhile getopts -a "${progname}" "${rssread_usage}" OPT ; do
647*b4dd7d09SAndy Fiddaman#	printmsg "## OPT=|${OPT}|, OPTARG=|${OPTARG}|"
648*b4dd7d09SAndy Fiddaman	case ${OPT} in
649*b4dd7d09SAndy Fiddaman		I)    noiconv=true  ;;
650*b4dd7d09SAndy Fiddaman		+I)   noiconv=false ;;
651*b4dd7d09SAndy Fiddaman		*)    usage ;;
652*b4dd7d09SAndy Fiddaman	esac
653*b4dd7d09SAndy Fiddamandone
654*b4dd7d09SAndy Fiddamanshift $((OPTIND-1))
655*b4dd7d09SAndy Fiddaman
656*b4dd7d09SAndy Fiddamantypeset url="$1"
657*b4dd7d09SAndy Fiddaman
658*b4dd7d09SAndy Fiddamanif [[ "${url}" == "" ]] ; then
659*b4dd7d09SAndy Fiddaman	fatal_error $"No url given."
660*b4dd7d09SAndy Fiddamanfi
661*b4dd7d09SAndy Fiddaman
662*b4dd7d09SAndy Fiddamanif [[ "${bookmark_urls[${url}]}" != "" ]] ; then
663*b4dd7d09SAndy Fiddaman	printmsg $"Using bookmark ${url} = ${bookmark_urls[${url}]}"
664*b4dd7d09SAndy Fiddaman	url="${bookmark_urls[${url}]}"
665*b4dd7d09SAndy Fiddamanfi
666*b4dd7d09SAndy Fiddaman
667*b4dd7d09SAndy Fiddamanif ${noiconv} ; then
668*b4dd7d09SAndy Fiddaman	do_rssread "${url}"
669*b4dd7d09SAndy Fiddamanelse
670*b4dd7d09SAndy Fiddaman	do_rssread "${url}" | iconv -f "UTF-8" - -
671*b4dd7d09SAndy Fiddamanfi
672*b4dd7d09SAndy Fiddaman
673*b4dd7d09SAndy Fiddamanexit 0
674*b4dd7d09SAndy Fiddaman#EOF.
675