xref: /freebsd/contrib/libc-vis/unvis.3 (revision c6ec7d31830ab1c80edae95ad5e4b9dba10c47ac)
1.\"	$NetBSD: unvis.3,v 1.23 2011/03/17 14:06:29 wiz Exp $
2.\"	$FreeBSD$
3.\"
4.\" Copyright (c) 1989, 1991, 1993
5.\"	The Regents of the University of California.  All rights reserved.
6.\"
7.\" Redistribution and use in source and binary forms, with or without
8.\" modification, are permitted provided that the following conditions
9.\" are met:
10.\" 1. Redistributions of source code must retain the above copyright
11.\"    notice, this list of conditions and the following disclaimer.
12.\" 2. Redistributions in binary form must reproduce the above copyright
13.\"    notice, this list of conditions and the following disclaimer in the
14.\"    documentation and/or other materials provided with the distribution.
15.\" 3. Neither the name of the University nor the names of its contributors
16.\"    may be used to endorse or promote products derived from this software
17.\"    without specific prior written permission.
18.\"
19.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29.\" SUCH DAMAGE.
30.\"
31.\"     @(#)unvis.3	8.2 (Berkeley) 12/11/93
32.\"
33.Dd March 12, 2011
34.Dt UNVIS 3
35.Os
36.Sh NAME
37.Nm unvis ,
38.Nm strunvis
39.Nd decode a visual representation of characters
40.Sh LIBRARY
41.Lb libc
42.Sh SYNOPSIS
43.In vis.h
44.Ft int
45.Fn unvis "char *cp" "int c" "int *astate" "int flag"
46.Ft int
47.Fn strunvis "char *dst" "const char *src"
48.Ft int
49.Fn strnunvis "char *dst" "size_t dlen" "const char *src"
50.Ft int
51.Fn strunvisx "char *dst" "const char *src" "int flag"
52.Ft int
53.Fn strnunvisx "char *dst" "size_t dlen" "const char *src" "int flag"
54.Sh DESCRIPTION
55The
56.Fn unvis ,
57.Fn strunvis
58and
59.Fn strunvisx
60functions
61are used to decode a visual representation of characters, as produced
62by the
63.Xr vis 3
64function, back into
65the original form.
66.Pp
67The
68.Fn unvis
69function is called with successive characters in
70.Ar c
71until a valid sequence is recognized, at which time the decoded
72character is available at the character pointed to by
73.Ar cp .
74.Pp
75The
76.Fn strunvis
77function decodes the characters pointed to by
78.Ar src
79into the buffer pointed to by
80.Ar dst .
81The
82.Fn strunvis
83function simply copies
84.Ar src
85to
86.Ar dst ,
87decoding any escape sequences along the way,
88and returns the number of characters placed into
89.Ar dst ,
90or \-1 if an
91invalid escape sequence was detected.
92The size of
93.Ar dst
94should be equal to the size of
95.Ar src
96(that is, no expansion takes place during decoding).
97.Pp
98The
99.Fn strunvisx
100function does the same as the
101.Fn strunvis
102function,
103but it allows you to add a flag that specifies the style the string
104.Ar src
105is encoded with.
106Currently, the supported flags are:
107.Dv VIS_HTTPSTYLE
108and
109.Dv VIS_MIMESTYLE .
110.Pp
111The
112.Fn unvis
113function implements a state machine that can be used to decode an
114arbitrary stream of bytes.
115All state associated with the bytes being decoded is stored outside the
116.Fn unvis
117function (that is, a pointer to the state is passed in), so
118calls decoding different streams can be freely intermixed.
119To start decoding a stream of bytes, first initialize an integer to zero.
120Call
121.Fn unvis
122with each successive byte, along with a pointer
123to this integer, and a pointer to a destination character.
124The
125.Fn unvis
126function has several return codes that must be handled properly.
127They are:
128.Bl -tag -width UNVIS_VALIDPUSH
129.It Li \&0 (zero)
130Another character is necessary; nothing has been recognized yet.
131.It Dv UNVIS_VALID
132A valid character has been recognized and is available at the location
133pointed to by cp.
134.It Dv UNVIS_VALIDPUSH
135A valid character has been recognized and is available at the location
136pointed to by cp; however, the character currently passed in should
137be passed in again.
138.It Dv UNVIS_NOCHAR
139A valid sequence was detected, but no character was produced.
140This return code is necessary to indicate a logical break between characters.
141.It Dv UNVIS_SYNBAD
142An invalid escape sequence was detected, or the decoder is in an unknown state.
143The decoder is placed into the starting state.
144.El
145.Pp
146When all bytes in the stream have been processed, call
147.Fn unvis
148one more time with flag set to
149.Dv UNVIS_END
150to extract any remaining character (the character passed in is ignored).
151.Pp
152The
153.Ar flag
154argument is also used to specify the encoding style of the source.
155If set to
156.Dv VIS_HTTPSTYLE
157or
158.Dv VIS_HTTP1808 ,
159.Fn unvis
160will decode URI strings as specified in RFC 1808.
161If set to
162.Dv VIS_HTTP1866 ,
163.Fn unvis
164will decode URI strings as specified in RFC 1866.
165If set to
166.Dv VIS_MIMESTYLE ,
167.Fn unvis
168will decode MIME Quoted-Printable strings as specified in RFC 2045.
169If set to
170.Dv VIS_NOESCAPE ,
171.Fn unvis
172will not decode \e quoted characters.
173.Pp
174The following code fragment illustrates a proper use of
175.Fn unvis .
176.Bd -literal -offset indent
177int state = 0;
178char out;
179
180while ((ch = getchar()) != EOF) {
181again:
182	switch(unvis(\*[Am]out, ch, \*[Am]state, 0)) {
183	case 0:
184	case UNVIS_NOCHAR:
185		break;
186	case UNVIS_VALID:
187		(void)putchar(out);
188		break;
189	case UNVIS_VALIDPUSH:
190		(void)putchar(out);
191		goto again;
192	case UNVIS_SYNBAD:
193		errx(EXIT_FAILURE, "Bad character sequence!");
194	}
195}
196if (unvis(\*[Am]out, '\e0', \*[Am]state, UNVIS_END) == UNVIS_VALID)
197	(void)putchar(out);
198.Ed
199.Sh ERRORS
200The functions
201.Fn strunvis ,
202.Fn strnunvis ,
203.Fn strunvisx ,
204and
205.Fn strnunvisx
206will return \-1 on error and set
207.Va errno
208to:
209.Bl -tag -width Er
210.It Bq Er EINVAL
211An invalid escape sequence was detected, or the decoder is in an unknown state.
212.El
213.Pp
214In addition the functions
215.Fn strnunvis
216and
217.Fn strnunvisx
218will can also set
219.Va errno
220on error to:
221.Bl -tag -width Er
222.It Bq Er ENOSPC
223Not enough space to perform the conversion.
224.El
225.Sh SEE ALSO
226.Xr unvis 1 ,
227.Xr vis 1 ,
228.Xr vis 3
229.Rs
230.%A R. Fielding
231.%T Relative Uniform Resource Locators
232.%O RFC1808
233.Re
234.Sh HISTORY
235The
236.Fn unvis
237function
238first appeared in
239.Bx 4.4 .
240The
241.Fn strnunvis
242and
243.Fn strnunvisx
244functions appeared in
245.Nx 6.0
246and
247.Fx 10.0 .
248