xref: /freebsd/contrib/libc-vis/unvis.3 (revision fba3cde907930eed2adb8a320524bc250338c729)
1.\"	$NetBSD: unvis.3,v 1.27 2012/12/15 07:34:36 wiz Exp $
2.\"	$FreeBSD$
3.\"
4.\" Copyright (c) 1989, 1991, 1993
5.\"	The Regents of the University of California.  All rights reserved.
6.\"
7.\" Redistribution and use in source and binary forms, with or without
8.\" modification, are permitted provided that the following conditions
9.\" are met:
10.\" 1. Redistributions of source code must retain the above copyright
11.\"    notice, this list of conditions and the following disclaimer.
12.\" 2. Redistributions in binary form must reproduce the above copyright
13.\"    notice, this list of conditions and the following disclaimer in the
14.\"    documentation and/or other materials provided with the distribution.
15.\" 3. Neither the name of the University nor the names of its contributors
16.\"    may be used to endorse or promote products derived from this software
17.\"    without specific prior written permission.
18.\"
19.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29.\" SUCH DAMAGE.
30.\"
31.\"     @(#)unvis.3	8.2 (Berkeley) 12/11/93
32.\"
33.Dd March 12, 2011
34.Dt UNVIS 3
35.Os
36.Sh NAME
37.Nm unvis ,
38.Nm strunvis
39.Nd decode a visual representation of characters
40.Sh LIBRARY
41.Lb libc
42.Sh SYNOPSIS
43.In vis.h
44.Ft int
45.Fn unvis "char *cp" "int c" "int *astate" "int flag"
46.Ft int
47.Fn strunvis "char *dst" "const char *src"
48.Ft int
49.Fn strnunvis "char *dst" "size_t dlen" "const char *src"
50.Ft int
51.Fn strunvisx "char *dst" "const char *src" "int flag"
52.Ft int
53.Fn strnunvisx "char *dst" "size_t dlen" "const char *src" "int flag"
54.Sh DESCRIPTION
55The
56.Fn unvis ,
57.Fn strunvis
58and
59.Fn strunvisx
60functions
61are used to decode a visual representation of characters, as produced
62by the
63.Xr vis 3
64function, back into
65the original form.
66.Pp
67The
68.Fn unvis
69function is called with successive characters in
70.Ar c
71until a valid sequence is recognized, at which time the decoded
72character is available at the character pointed to by
73.Ar cp .
74.Pp
75The
76.Fn strunvis
77function decodes the characters pointed to by
78.Ar src
79into the buffer pointed to by
80.Ar dst .
81The
82.Fn strunvis
83function simply copies
84.Ar src
85to
86.Ar dst ,
87decoding any escape sequences along the way,
88and returns the number of characters placed into
89.Ar dst ,
90or \-1 if an
91invalid escape sequence was detected.
92The size of
93.Ar dst
94should be equal to the size of
95.Ar src
96(that is, no expansion takes place during decoding).
97.Pp
98The
99.Fn strunvisx
100function does the same as the
101.Fn strunvis
102function,
103but it allows you to add a flag that specifies the style the string
104.Ar src
105is encoded with.
106Currently, the supported flags are:
107.Dv VIS_HTTPSTYLE
108and
109.Dv VIS_MIMESTYLE .
110.Pp
111The
112.Fn unvis
113function implements a state machine that can be used to decode an
114arbitrary stream of bytes.
115All state associated with the bytes being decoded is stored outside the
116.Fn unvis
117function (that is, a pointer to the state is passed in), so
118calls decoding different streams can be freely intermixed.
119To start decoding a stream of bytes, first initialize an integer to zero.
120Call
121.Fn unvis
122with each successive byte, along with a pointer
123to this integer, and a pointer to a destination character.
124The
125.Fn unvis
126function has several return codes that must be handled properly.
127They are:
128.Bl -tag -width UNVIS_VALIDPUSH
129.It Li \&0 No (zero)
130Another character is necessary; nothing has been recognized yet.
131.It Dv UNVIS_VALID
132A valid character has been recognized and is available at the location
133pointed to by
134.Fa cp .
135.It Dv UNVIS_VALIDPUSH
136A valid character has been recognized and is available at the location
137pointed to by
138.Fa cp ;
139however, the character currently passed in should be passed in again.
140.It Dv UNVIS_NOCHAR
141A valid sequence was detected, but no character was produced.
142This return code is necessary to indicate a logical break between characters.
143.It Dv UNVIS_SYNBAD
144An invalid escape sequence was detected, or the decoder is in an unknown state.
145The decoder is placed into the starting state.
146.El
147.Pp
148When all bytes in the stream have been processed, call
149.Fn unvis
150one more time with flag set to
151.Dv UNVIS_END
152to extract any remaining character (the character passed in is ignored).
153.Pp
154The
155.Fa flag
156argument is also used to specify the encoding style of the source.
157If set to
158.Dv VIS_HTTPSTYLE
159or
160.Dv VIS_HTTP1808 ,
161.Fn unvis
162will decode URI strings as specified in RFC 1808.
163If set to
164.Dv VIS_HTTP1866 ,
165.Fn unvis
166will decode entity references and numeric character references
167as specified in RFC 1866.
168If set to
169.Dv VIS_MIMESTYLE ,
170.Fn unvis
171will decode MIME Quoted-Printable strings as specified in RFC 2045.
172If set to
173.Dv VIS_NOESCAPE ,
174.Fn unvis
175will not decode
176.Ql \e
177quoted characters.
178.Pp
179The following code fragment illustrates a proper use of
180.Fn unvis .
181.Bd -literal -offset indent
182int state = 0;
183char out;
184
185while ((ch = getchar()) != EOF) {
186again:
187	switch(unvis(\*[Am]out, ch, \*[Am]state, 0)) {
188	case 0:
189	case UNVIS_NOCHAR:
190		break;
191	case UNVIS_VALID:
192		(void)putchar(out);
193		break;
194	case UNVIS_VALIDPUSH:
195		(void)putchar(out);
196		goto again;
197	case UNVIS_SYNBAD:
198		errx(EXIT_FAILURE, "Bad character sequence!");
199	}
200}
201if (unvis(\*[Am]out, '\e0', \*[Am]state, UNVIS_END) == UNVIS_VALID)
202	(void)putchar(out);
203.Ed
204.Sh ERRORS
205The functions
206.Fn strunvis ,
207.Fn strnunvis ,
208.Fn strunvisx ,
209and
210.Fn strnunvisx
211will return \-1 on error and set
212.Va errno
213to:
214.Bl -tag -width Er
215.It Bq Er EINVAL
216An invalid escape sequence was detected, or the decoder is in an unknown state.
217.El
218.Pp
219In addition the functions
220.Fn strnunvis
221and
222.Fn strnunvisx
223will can also set
224.Va errno
225on error to:
226.Bl -tag -width Er
227.It Bq Er ENOSPC
228Not enough space to perform the conversion.
229.El
230.Sh SEE ALSO
231.Xr unvis 1 ,
232.Xr vis 1 ,
233.Xr vis 3
234.Rs
235.%A R. Fielding
236.%T Relative Uniform Resource Locators
237.%O RFC1808
238.Re
239.Sh HISTORY
240The
241.Fn unvis
242function
243first appeared in
244.Bx 4.4 .
245The
246.Fn strnunvis
247and
248.Fn strnunvisx
249functions appeared in
250.Nx 6.0
251and
252.Fx 9.2 .
253.Sh BUGS
254The names
255.Dv VIS_HTTP1808
256and
257.Dv VIS_HTTP1866
258are wrong.
259Percent-encoding was defined in RFC 1738, the original RFC for URL.
260RFC 1866 defines HTML 2.0, an application of SGML, from which it
261inherits concepts of numeric character references and entity
262references.
263