xref: /freebsd/contrib/libc-vis/vis.3 (revision 2710751bc309af25c6dea1171781678258e83840)
1.\"	$NetBSD: vis.3,v 1.39 2013/02/20 20:05:26 christos Exp $
2.\"	$FreeBSD$
3.\"
4.\" Copyright (c) 1989, 1991, 1993
5.\"	The Regents of the University of California.  All rights reserved.
6.\"
7.\" Redistribution and use in source and binary forms, with or without
8.\" modification, are permitted provided that the following conditions
9.\" are met:
10.\" 1. Redistributions of source code must retain the above copyright
11.\"    notice, this list of conditions and the following disclaimer.
12.\" 2. Redistributions in binary form must reproduce the above copyright
13.\"    notice, this list of conditions and the following disclaimer in the
14.\"    documentation and/or other materials provided with the distribution.
15.\" 3. Neither the name of the University nor the names of its contributors
16.\"    may be used to endorse or promote products derived from this software
17.\"    without specific prior written permission.
18.\"
19.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29.\" SUCH DAMAGE.
30.\"
31.\"     @(#)vis.3	8.1 (Berkeley) 6/9/93
32.\"
33.Dd February 19, 2013
34.Dt VIS 3
35.Os
36.Sh NAME
37.Nm vis ,
38.Nm nvis ,
39.Nm strvis ,
40.Nm strnvis ,
41.Nm strvisx ,
42.Nm strnvisx ,
43.Nm strenvisx ,
44.Nm svis ,
45.Nm snvis ,
46.Nm strsvis ,
47.Nm strsnvis ,
48.Nm strsvisx ,
49.Nm strsnvisx ,
50.Nm strsenvisx
51.Nd visually encode characters
52.Sh LIBRARY
53.Lb libc
54.Sh SYNOPSIS
55.In vis.h
56.Ft char *
57.Fn vis "char *dst" "int c" "int flag" "int nextc"
58.Ft char *
59.Fn nvis "char *dst" "size_t dlen" "int c" "int flag" "int nextc"
60.Ft int
61.Fn strvis "char *dst" "const char *src" "int flag"
62.Ft int
63.Fn strnvis "char *dst" "size_t dlen" "const char *src" "int flag"
64.Ft int
65.Fn strvisx "char *dst" "const char *src" "size_t len" "int flag"
66.Ft int
67.Fn strnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag"
68.Ft int
69.Fn strenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "int *cerr_ptr"
70.Ft char *
71.Fn svis "char *dst" "int c" "int flag" "int nextc" "const char *extra"
72.Ft char *
73.Fn snvis "char *dst" "size_t dlen" "int c" "int flag" "int nextc" "const char *extra"
74.Ft int
75.Fn strsvis "char *dst" "const char *src" "int flag" "const char *extra"
76.Ft int
77.Fn strsnvis "char *dst" "size_t dlen" "const char *src" "int flag" "const char *extra"
78.Ft int
79.Fn strsvisx "char *dst" "const char *src" "size_t len" "int flag" "const char *extra"
80.Ft int
81.Fn strsnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra"
82.Ft int
83.Fn strsenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra" "int *cerr_ptr"
84.Sh DESCRIPTION
85The
86.Fn vis
87function
88copies into
89.Fa dst
90a string which represents the character
91.Fa c .
92If
93.Fa c
94needs no encoding, it is copied in unaltered.
95The string is null terminated, and a pointer to the end of the string is
96returned.
97The maximum length of any encoding is four
98bytes (not including the trailing
99.Dv NUL ) ;
100thus, when
101encoding a set of characters into a buffer, the size of the buffer should
102be four times the number of bytes encoded, plus one for the trailing
103.Dv NUL .
104The flag parameter is used for altering the default range of
105characters considered for encoding and for altering the visual
106representation.
107The additional character,
108.Fa nextc ,
109is only used when selecting the
110.Dv VIS_CSTYLE
111encoding format (explained below).
112.Pp
113The
114.Fn strvis ,
115.Fn strnvis ,
116.Fn strvisx ,
117and
118.Fn strnvisx
119functions copy into
120.Fa dst
121a visual representation of
122the string
123.Fa src .
124The
125.Fn strvis
126and
127.Fn strnvis
128functions encode characters from
129.Fa src
130up to the
131first
132.Dv NUL .
133The
134.Fn strvisx
135and
136.Fn strnvisx
137functions encode exactly
138.Fa len
139characters from
140.Fa src
141(this
142is useful for encoding a block of data that may contain
143.Dv NUL Ns 's ) .
144Both forms
145.Dv NUL
146terminate
147.Fa dst .
148The size of
149.Fa dst
150must be four times the number
151of bytes encoded from
152.Fa src
153(plus one for the
154.Dv NUL ) .
155Both
156forms return the number of characters in
157.Fa dst
158(not including the trailing
159.Dv NUL ) .
160The
161.Dq Nm n
162versions of the functions also take an additional argument
163.Fa dlen
164that indicates the length of the
165.Fa dst
166buffer.
167If
168.Fa dlen
169is not large enough to fit the converted string then the
170.Fn strnvis
171and
172.Fn strnvisx
173functions return \-1 and set
174.Va errno
175to
176.Dv ENOSPC .
177The
178.Fn strenvisx
179function takes an additional argument,
180.Fa cerr_ptr ,
181that is used to pass in and out a multibyte conversion error flag.
182This is useful when processing single characters at a time when
183it is possible that the locale may be set to something other
184than the locale of the characters in the input data.
185.Pp
186The functions
187.Fn svis ,
188.Fn snvis ,
189.Fn strsvis ,
190.Fn strsnvis ,
191.Fn strsvisx ,
192.Fn strsnvisx ,
193and
194.Fn strsenvisx
195correspond to
196.Fn vis ,
197.Fn nvis ,
198.Fn strvis ,
199.Fn strnvis ,
200.Fn strvisx ,
201.Fn strnvisx ,
202and
203.Fn strenvisx
204but have an additional argument
205.Fa extra ,
206pointing to a
207.Dv NUL
208terminated list of characters.
209These characters will be copied encoded or backslash-escaped into
210.Fa dst .
211These functions are useful e.g. to remove the special meaning
212of certain characters to shells.
213.Pp
214The encoding is a unique, invertible representation composed entirely of
215graphic characters; it can be decoded back into the original form using
216the
217.Xr unvis 3 ,
218.Xr strunvis 3
219or
220.Xr strnunvis 3
221functions.
222.Pp
223There are two parameters that can be controlled: the range of
224characters that are encoded (applies only to
225.Fn vis ,
226.Fn nvis ,
227.Fn strvis ,
228.Fn strnvis ,
229.Fn strvisx ,
230and
231.Fn strnvisx ) ,
232and the type of representation used.
233By default, all non-graphic characters,
234except space, tab, and newline are encoded (see
235.Xr isgraph 3 ) .
236The following flags
237alter this:
238.Bl -tag -width VIS_WHITEX
239.It Dv VIS_GLOB
240Also encode the magic characters
241.Ql ( * ,
242.Ql \&? ,
243.Ql \&[
244and
245.Ql # )
246recognized by
247.Xr glob 3 .
248.It Dv VIS_SP
249Also encode space.
250.It Dv VIS_TAB
251Also encode tab.
252.It Dv VIS_NL
253Also encode newline.
254.It Dv VIS_WHITE
255Synonym for
256.Dv VIS_SP
257\&|
258.Dv VIS_TAB
259\&|
260.Dv VIS_NL .
261.It Dv VIS_SAFE
262Only encode
263.Dq unsafe
264characters.
265Unsafe means control characters which may cause common terminals to perform
266unexpected functions.
267Currently this form allows space, tab, newline, backspace, bell, and
268return \(em in addition to all graphic characters \(em unencoded.
269.El
270.Pp
271(The above flags have no effect for
272.Fn svis ,
273.Fn snvis ,
274.Fn strsvis ,
275.Fn strsnvis ,
276.Fn strsvisx ,
277and
278.Fn strsnvisx .
279When using these functions, place all graphic characters to be
280encoded in an array pointed to by
281.Fa extra .
282In general, the backslash character should be included in this array, see the
283warning on the use of the
284.Dv VIS_NOSLASH
285flag below).
286.Pp
287There are four forms of encoding.
288All forms use the backslash character
289.Ql \e
290to introduce a special
291sequence; two backslashes are used to represent a real backslash,
292except
293.Dv VIS_HTTPSTYLE
294that uses
295.Ql % ,
296or
297.Dv VIS_MIMESTYLE
298that uses
299.Ql = .
300These are the visual formats:
301.Bl -tag -width VIS_CSTYLE
302.It (default)
303Use an
304.Ql M
305to represent meta characters (characters with the 8th
306bit set), and use caret
307.Ql ^
308to represent control characters (see
309.Xr iscntrl 3 ) .
310The following formats are used:
311.Bl -tag -width xxxxx
312.It Dv \e^C
313Represents the control character
314.Ql C .
315Spans characters
316.Ql \e000
317through
318.Ql \e037 ,
319and
320.Ql \e177
321(as
322.Ql \e^? ) .
323.It Dv \eM-C
324Represents character
325.Ql C
326with the 8th bit set.
327Spans characters
328.Ql \e241
329through
330.Ql \e376 .
331.It Dv \eM^C
332Represents control character
333.Ql C
334with the 8th bit set.
335Spans characters
336.Ql \e200
337through
338.Ql \e237 ,
339and
340.Ql \e377
341(as
342.Ql \eM^? ) .
343.It Dv \e040
344Represents
345.Tn ASCII
346space.
347.It Dv \e240
348Represents Meta-space.
349.El
350.Pp
351.It Dv VIS_CSTYLE
352Use C-style backslash sequences to represent standard non-printable
353characters.
354The following sequences are used to represent the indicated characters:
355.Bd -unfilled -offset indent
356.Li \ea Tn  \(em BEL No (007)
357.Li \eb Tn  \(em BS No (010)
358.Li \ef Tn  \(em NP No (014)
359.Li \en Tn  \(em NL No (012)
360.Li \er Tn  \(em CR No (015)
361.Li \es Tn  \(em SP No (040)
362.Li \et Tn  \(em HT No (011)
363.Li \ev Tn  \(em VT No (013)
364.Li \e0 Tn  \(em NUL No (000)
365.Ed
366.Pp
367When using this format, the
368.Fa nextc
369parameter is looked at to determine if a
370.Dv NUL
371character can be encoded as
372.Ql \e0
373instead of
374.Ql \e000 .
375If
376.Fa nextc
377is an octal digit, the latter representation is used to
378avoid ambiguity.
379.It Dv VIS_OCTAL
380Use a three digit octal sequence.
381The form is
382.Ql \eddd
383where
384.Em d
385represents an octal digit.
386.It Dv VIS_HTTPSTYLE
387Use URI encoding as described in RFC 1738.
388The form is
389.Ql %xx
390where
391.Em x
392represents a lower case hexadecimal digit.
393.It Dv VIS_MIMESTYLE
394Use MIME Quoted-Printable encoding as described in RFC 2045, only don't
395break lines and don't handle CRLF.
396The form is
397.Ql =XX
398where
399.Em X
400represents an upper case hexadecimal digit.
401.El
402.Pp
403There is one additional flag,
404.Dv VIS_NOSLASH ,
405which inhibits the
406doubling of backslashes and the backslash before the default
407format (that is, control characters are represented by
408.Ql ^C
409and
410meta characters as
411.Ql M-C ) .
412With this flag set, the encoding is
413ambiguous and non-invertible.
414.Sh MULTIBYTE CHARACTER SUPPORT
415These functions support multibyte character input.
416The encoding conversion is influenced by the setting of the
417.Ev LC_CTYPE
418environment variable which defines the set of characters
419that can be copied without encoding.
420.Pp
421When 8-bit data is present in the input,
422.Ev LC_CTYPE
423must be set to the correct locale or to the C locale.
424If the locales of the data and the conversion are mismatched,
425multibyte character recognition may fail and encoding will be performed
426byte-by-byte instead.
427.Pp
428As noted above,
429.Fa dst
430must be four times the number of bytes processed from
431.Fa src .
432But note that each multibyte character can be up to
433.Dv MB_LEN_MAX
434bytes
435.\" (see
436.\" .Xr multibyte 3 )
437so in terms of multibyte characters,
438.Fa dst
439must be four times
440.Dv MB_LEN_MAX
441times the number of characters processed from
442.Fa src .
443.Sh ENVIRONMENT
444.Bl -tag -width ".Ev LC_CTYPE"
445.It Ev LC_CTYPE
446Specify the locale of the input data.
447Set to C if the input data locale is unknown.
448.El
449.Sh ERRORS
450The functions
451.Fn nvis
452and
453.Fn snvis
454will return
455.Dv NULL
456and the functions
457.Fn strnvis ,
458.Fn strnvisx ,
459.Fn strsnvis ,
460and
461.Fn strsnvisx ,
462will return \-1 when the
463.Fa dlen
464destination buffer size is not enough to perform the conversion while
465setting
466.Va errno
467to:
468.Bl -tag -width ".Bq Er ENOSPC"
469.It Bq Er ENOSPC
470The destination buffer size is not large enough to perform the conversion.
471.El
472.Sh SEE ALSO
473.Xr unvis 1 ,
474.Xr vis 1 ,
475.Xr glob 3 ,
476.\" .Xr multibyte 3 ,
477.Xr unvis 3
478.Rs
479.%A T. Berners-Lee
480.%T Uniform Resource Locators (URL)
481.%O "RFC 1738"
482.Re
483.Rs
484.%T "Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies"
485.%O "RFC 2045"
486.Re
487.Sh HISTORY
488The
489.Fn vis ,
490.Fn strvis ,
491and
492.Fn strvisx
493functions first appeared in
494.Bx 4.4 .
495The
496.Fn svis ,
497.Fn strsvis ,
498and
499.Fn strsvisx
500functions appeared in
501.Nx 1.5
502and
503.Fx 9.2 .
504The buffer size limited versions of the functions
505.Po Fn nvis ,
506.Fn strnvis ,
507.Fn strnvisx ,
508.Fn snvis ,
509.Fn strsnvis ,
510and
511.Fn strsnvisx Pc
512appeared in
513and
514.Fx 9.2 .
515Myltibyte character support was added in
516.Nx 7.0
517and
518.Fx 9.2 .
519