xref: /freebsd/contrib/libc-vis/unvis.3 (revision 6829dae12bb055451fa467da4589c43bd03b1e64)
1.\"	$NetBSD: unvis.3,v 1.29 2017/10/24 19:14:55 abhinav Exp $
2.\"	$FreeBSD$
3.\"
4.\" Copyright (c) 1989, 1991, 1993
5.\"	The Regents of the University of California.  All rights reserved.
6.\"
7.\" Redistribution and use in source and binary forms, with or without
8.\" modification, are permitted provided that the following conditions
9.\" are met:
10.\" 1. Redistributions of source code must retain the above copyright
11.\"    notice, this list of conditions and the following disclaimer.
12.\" 2. Redistributions in binary form must reproduce the above copyright
13.\"    notice, this list of conditions and the following disclaimer in the
14.\"    documentation and/or other materials provided with the distribution.
15.\" 3. Neither the name of the University nor the names of its contributors
16.\"    may be used to endorse or promote products derived from this software
17.\"    without specific prior written permission.
18.\"
19.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29.\" SUCH DAMAGE.
30.\"
31.\"     @(#)unvis.3	8.2 (Berkeley) 12/11/93
32.\"
33.Dd March 12, 2011
34.Dt UNVIS 3
35.Os
36.Sh NAME
37.Nm unvis ,
38.Nm strunvis ,
39.Nm strnunvis ,
40.Nm strunvisx ,
41.Nm strnunvisx
42.Nd decode a visual representation of characters
43.Sh LIBRARY
44.Lb libc
45.Sh SYNOPSIS
46.In vis.h
47.Ft int
48.Fn unvis "char *cp" "int c" "int *astate" "int flag"
49.Ft int
50.Fn strunvis "char *dst" "const char *src"
51.Ft int
52.Fn strnunvis "char *dst" "size_t dlen" "const char *src"
53.Ft int
54.Fn strunvisx "char *dst" "const char *src" "int flag"
55.Ft int
56.Fn strnunvisx "char *dst" "size_t dlen" "const char *src" "int flag"
57.Sh DESCRIPTION
58The
59.Fn unvis ,
60.Fn strunvis
61and
62.Fn strunvisx
63functions
64are used to decode a visual representation of characters, as produced
65by the
66.Xr vis 3
67function, back into
68the original form.
69.Pp
70The
71.Fn unvis
72function is called with successive characters in
73.Ar c
74until a valid sequence is recognized, at which time the decoded
75character is available at the character pointed to by
76.Ar cp .
77.Pp
78The
79.Fn strunvis
80function decodes the characters pointed to by
81.Ar src
82into the buffer pointed to by
83.Ar dst .
84The
85.Fn strunvis
86function simply copies
87.Ar src
88to
89.Ar dst ,
90decoding any escape sequences along the way,
91and returns the number of characters placed into
92.Ar dst ,
93or \-1 if an
94invalid escape sequence was detected.
95The size of
96.Ar dst
97should be equal to the size of
98.Ar src
99(that is, no expansion takes place during decoding).
100.Pp
101The
102.Fn strunvisx
103function does the same as the
104.Fn strunvis
105function,
106but it allows you to add a flag that specifies the style the string
107.Ar src
108is encoded with.
109Currently, the supported flags are:
110.Dv VIS_HTTPSTYLE
111and
112.Dv VIS_MIMESTYLE .
113.Pp
114The
115.Fn unvis
116function implements a state machine that can be used to decode an
117arbitrary stream of bytes.
118All state associated with the bytes being decoded is stored outside the
119.Fn unvis
120function (that is, a pointer to the state is passed in), so
121calls decoding different streams can be freely intermixed.
122To start decoding a stream of bytes, first initialize an integer to zero.
123Call
124.Fn unvis
125with each successive byte, along with a pointer
126to this integer, and a pointer to a destination character.
127The
128.Fn unvis
129function has several return codes that must be handled properly.
130They are:
131.Bl -tag -width UNVIS_VALIDPUSH
132.It Li \&0 No (zero)
133Another character is necessary; nothing has been recognized yet.
134.It Dv UNVIS_VALID
135A valid character has been recognized and is available at the location
136pointed to by
137.Fa cp .
138.It Dv UNVIS_VALIDPUSH
139A valid character has been recognized and is available at the location
140pointed to by
141.Fa cp ;
142however, the character currently passed in should be passed in again.
143.It Dv UNVIS_NOCHAR
144A valid sequence was detected, but no character was produced.
145This return code is necessary to indicate a logical break between characters.
146.It Dv UNVIS_SYNBAD
147An invalid escape sequence was detected, or the decoder is in an unknown state.
148The decoder is placed into the starting state.
149.El
150.Pp
151When all bytes in the stream have been processed, call
152.Fn unvis
153one more time with flag set to
154.Dv UNVIS_END
155to extract any remaining character (the character passed in is ignored).
156.Pp
157The
158.Fa flag
159argument is also used to specify the encoding style of the source.
160If set to
161.Dv VIS_HTTPSTYLE
162or
163.Dv VIS_HTTP1808 ,
164.Fn unvis
165will decode URI strings as specified in RFC 1808.
166If set to
167.Dv VIS_HTTP1866 ,
168.Fn unvis
169will decode entity references and numeric character references
170as specified in RFC 1866.
171If set to
172.Dv VIS_MIMESTYLE ,
173.Fn unvis
174will decode MIME Quoted-Printable strings as specified in RFC 2045.
175If set to
176.Dv VIS_NOESCAPE ,
177.Fn unvis
178will not decode
179.Ql \e
180quoted characters.
181.Pp
182The following code fragment illustrates a proper use of
183.Fn unvis .
184.Bd -literal -offset indent
185int state = 0;
186char out;
187
188while ((ch = getchar()) != EOF) {
189again:
190	switch(unvis(&out, ch, &state, 0)) {
191	case 0:
192	case UNVIS_NOCHAR:
193		break;
194	case UNVIS_VALID:
195		(void)putchar(out);
196		break;
197	case UNVIS_VALIDPUSH:
198		(void)putchar(out);
199		goto again;
200	case UNVIS_SYNBAD:
201		errx(EXIT_FAILURE, "Bad character sequence!");
202	}
203}
204if (unvis(&out, '\e0', &state, UNVIS_END) == UNVIS_VALID)
205	(void)putchar(out);
206.Ed
207.Sh ERRORS
208The functions
209.Fn strunvis ,
210.Fn strnunvis ,
211.Fn strunvisx ,
212and
213.Fn strnunvisx
214will return \-1 on error and set
215.Va errno
216to:
217.Bl -tag -width Er
218.It Bq Er EINVAL
219An invalid escape sequence was detected, or the decoder is in an unknown state.
220.El
221.Pp
222In addition the functions
223.Fn strnunvis
224and
225.Fn strnunvisx
226will can also set
227.Va errno
228on error to:
229.Bl -tag -width Er
230.It Bq Er ENOSPC
231Not enough space to perform the conversion.
232.El
233.Sh SEE ALSO
234.Xr unvis 1 ,
235.Xr vis 1 ,
236.Xr vis 3
237.Rs
238.%A R. Fielding
239.%T Relative Uniform Resource Locators
240.%O RFC1808
241.Re
242.Sh HISTORY
243The
244.Fn unvis
245function
246first appeared in
247.Bx 4.4 .
248The
249.Fn strnunvis
250and
251.Fn strnunvisx
252functions appeared in
253.Nx 6.0
254and
255.Fx 9.2 .
256.Sh BUGS
257The names
258.Dv VIS_HTTP1808
259and
260.Dv VIS_HTTP1866
261are wrong.
262Percent-encoding was defined in RFC 1738, the original RFC for URL.
263RFC 1866 defines HTML 2.0, an application of SGML, from which it
264inherits concepts of numeric character references and entity
265references.
266