xref: /freebsd/contrib/libc-vis/vis.3 (revision b9c36cc755002809a7d7c7109e3425fdfca036d2)
1.\"	$NetBSD: vis.3,v 1.45 2016/06/08 15:00:04 wiz Exp $
2.\"	$FreeBSD$
3.\"
4.\" Copyright (c) 1989, 1991, 1993
5.\"	The Regents of the University of California.  All rights reserved.
6.\"
7.\" Redistribution and use in source and binary forms, with or without
8.\" modification, are permitted provided that the following conditions
9.\" are met:
10.\" 1. Redistributions of source code must retain the above copyright
11.\"    notice, this list of conditions and the following disclaimer.
12.\" 2. Redistributions in binary form must reproduce the above copyright
13.\"    notice, this list of conditions and the following disclaimer in the
14.\"    documentation and/or other materials provided with the distribution.
15.\" 3. Neither the name of the University nor the names of its contributors
16.\"    may be used to endorse or promote products derived from this software
17.\"    without specific prior written permission.
18.\"
19.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29.\" SUCH DAMAGE.
30.\"
31.\"     @(#)vis.3	8.1 (Berkeley) 6/9/93
32.\"
33.Dd January 14, 2015
34.Dt VIS 3
35.Os
36.Sh NAME
37.Nm vis ,
38.Nm nvis ,
39.Nm strvis ,
40.Nm stravis ,
41.Nm strnvis ,
42.Nm strvisx ,
43.Nm strnvisx ,
44.Nm strenvisx ,
45.Nm svis ,
46.Nm snvis ,
47.Nm strsvis ,
48.Nm strsnvis ,
49.Nm strsvisx ,
50.Nm strsnvisx ,
51.Nm strsenvisx
52.Nd visually encode characters
53.Sh LIBRARY
54.Lb libc
55.Sh SYNOPSIS
56.In vis.h
57.Ft char *
58.Fn vis "char *dst" "int c" "int flag" "int nextc"
59.Ft char *
60.Fn nvis "char *dst" "size_t dlen" "int c" "int flag" "int nextc"
61.Ft int
62.Fn strvis "char *dst" "const char *src" "int flag"
63.Ft int
64.Fn stravis "char **dst" "const char *src" "int flag"
65.Ft int
66.Fn strnvis "char *dst" "size_t dlen" "const char *src" "int flag"
67.Ft int
68.Fn strvisx "char *dst" "const char *src" "size_t len" "int flag"
69.Ft int
70.Fn strnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag"
71.Ft int
72.Fn strenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "int *cerr_ptr"
73.Ft char *
74.Fn svis "char *dst" "int c" "int flag" "int nextc" "const char *extra"
75.Ft char *
76.Fn snvis "char *dst" "size_t dlen" "int c" "int flag" "int nextc" "const char *extra"
77.Ft int
78.Fn strsvis "char *dst" "const char *src" "int flag" "const char *extra"
79.Ft int
80.Fn strsnvis "char *dst" "size_t dlen" "const char *src" "int flag" "const char *extra"
81.Ft int
82.Fn strsvisx "char *dst" "const char *src" "size_t len" "int flag" "const char *extra"
83.Ft int
84.Fn strsnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra"
85.Ft int
86.Fn strsenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra" "int *cerr_ptr"
87.Sh DESCRIPTION
88The
89.Fn vis
90function
91copies into
92.Fa dst
93a string which represents the character
94.Fa c .
95If
96.Fa c
97needs no encoding, it is copied in unaltered.
98The string is null terminated, and a pointer to the end of the string is
99returned.
100The maximum length of any encoding is four
101bytes (not including the trailing
102.Dv NUL ) ;
103thus, when
104encoding a set of characters into a buffer, the size of the buffer should
105be four times the number of bytes encoded, plus one for the trailing
106.Dv NUL .
107The flag parameter is used for altering the default range of
108characters considered for encoding and for altering the visual
109representation.
110The additional character,
111.Fa nextc ,
112is only used when selecting the
113.Dv VIS_CSTYLE
114encoding format (explained below).
115.Pp
116The
117.Fn strvis ,
118.Fn stravis ,
119.Fn strnvis ,
120.Fn strvisx ,
121and
122.Fn strnvisx
123functions copy into
124.Fa dst
125a visual representation of
126the string
127.Fa src .
128The
129.Fn strvis
130and
131.Fn strnvis
132functions encode characters from
133.Fa src
134up to the
135first
136.Dv NUL .
137The
138.Fn strvisx
139and
140.Fn strnvisx
141functions encode exactly
142.Fa len
143characters from
144.Fa src
145(this
146is useful for encoding a block of data that may contain
147.Dv NUL Ns 's ) .
148Both forms
149.Dv NUL
150terminate
151.Fa dst .
152The size of
153.Fa dst
154must be four times the number
155of bytes encoded from
156.Fa src
157(plus one for the
158.Dv NUL ) .
159Both
160forms return the number of characters in
161.Fa dst
162(not including the trailing
163.Dv NUL ) .
164The
165.Fn stravis
166function allocates space dynamically to hold the string.
167The
168.Dq Nm n
169versions of the functions also take an additional argument
170.Fa dlen
171that indicates the length of the
172.Fa dst
173buffer.
174If
175.Fa dlen
176is not large enough to fit the converted string then the
177.Fn strnvis
178and
179.Fn strnvisx
180functions return \-1 and set
181.Va errno
182to
183.Dv ENOSPC .
184The
185.Fn strenvisx
186function takes an additional argument,
187.Fa cerr_ptr ,
188that is used to pass in and out a multibyte conversion error flag.
189This is useful when processing single characters at a time when
190it is possible that the locale may be set to something other
191than the locale of the characters in the input data.
192.Pp
193The functions
194.Fn svis ,
195.Fn snvis ,
196.Fn strsvis ,
197.Fn strsnvis ,
198.Fn strsvisx ,
199.Fn strsnvisx ,
200and
201.Fn strsenvisx
202correspond to
203.Fn vis ,
204.Fn nvis ,
205.Fn strvis ,
206.Fn strnvis ,
207.Fn strvisx ,
208.Fn strnvisx ,
209and
210.Fn strenvisx
211but have an additional argument
212.Fa extra ,
213pointing to a
214.Dv NUL
215terminated list of characters.
216These characters will be copied encoded or backslash-escaped into
217.Fa dst .
218These functions are useful e.g. to remove the special meaning
219of certain characters to shells.
220.Pp
221The encoding is a unique, invertible representation composed entirely of
222graphic characters; it can be decoded back into the original form using
223the
224.Xr unvis 3 ,
225.Xr strunvis 3
226or
227.Xr strnunvis 3
228functions.
229.Pp
230There are two parameters that can be controlled: the range of
231characters that are encoded (applies only to
232.Fn vis ,
233.Fn nvis ,
234.Fn strvis ,
235.Fn strnvis ,
236.Fn strvisx ,
237and
238.Fn strnvisx ) ,
239and the type of representation used.
240By default, all non-graphic characters,
241except space, tab, and newline are encoded (see
242.Xr isgraph 3 ) .
243The following flags
244alter this:
245.Bl -tag -width VIS_WHITEX
246.It Dv VIS_GLOB
247Also encode the magic characters
248.Ql ( * ,
249.Ql \&? ,
250.Ql \&[ ,
251and
252.Ql # )
253recognized by
254.Xr glob 3 .
255.It Dv VIS_SHELL
256Also encode the meta characters used by shells (in addition to the glob
257characters):
258.Ql ( ' ,
259.Ql ` ,
260.Ql \&" ,
261.Ql \&; ,
262.Ql & ,
263.Ql < ,
264.Ql > ,
265.Ql \&( ,
266.Ql \&) ,
267.Ql \&| ,
268.Ql \&] ,
269.Ql \e ,
270.Ql $ ,
271.Ql \&! ,
272.Ql \&^ ,
273and
274.Ql ~ ) .
275.It Dv VIS_SP
276Also encode space.
277.It Dv VIS_TAB
278Also encode tab.
279.It Dv VIS_NL
280Also encode newline.
281.It Dv VIS_WHITE
282Synonym for
283.Dv VIS_SP | VIS_TAB | VIS_NL .
284.It Dv VIS_META
285Synonym for
286.Dv VIS_WHITE | VIS_GLOB | VIS_SHELL .
287.It Dv VIS_SAFE
288Only encode
289.Dq unsafe
290characters.
291Unsafe means control characters which may cause common terminals to perform
292unexpected functions.
293Currently this form allows space, tab, newline, backspace, bell, and
294return \(em in addition to all graphic characters \(em unencoded.
295.El
296.Pp
297(The above flags have no effect for
298.Fn svis ,
299.Fn snvis ,
300.Fn strsvis ,
301.Fn strsnvis ,
302.Fn strsvisx ,
303and
304.Fn strsnvisx .
305When using these functions, place all graphic characters to be
306encoded in an array pointed to by
307.Fa extra .
308In general, the backslash character should be included in this array, see the
309warning on the use of the
310.Dv VIS_NOSLASH
311flag below).
312.Pp
313There are four forms of encoding.
314All forms use the backslash character
315.Ql \e
316to introduce a special
317sequence; two backslashes are used to represent a real backslash,
318except
319.Dv VIS_HTTPSTYLE
320that uses
321.Ql % ,
322or
323.Dv VIS_MIMESTYLE
324that uses
325.Ql = .
326These are the visual formats:
327.Bl -tag -width VIS_CSTYLE
328.It (default)
329Use an
330.Ql M
331to represent meta characters (characters with the 8th
332bit set), and use caret
333.Ql ^
334to represent control characters (see
335.Xr iscntrl 3 ) .
336The following formats are used:
337.Bl -tag -width xxxxx
338.It Dv \e^C
339Represents the control character
340.Ql C .
341Spans characters
342.Ql \e000
343through
344.Ql \e037 ,
345and
346.Ql \e177
347(as
348.Ql \e^? ) .
349.It Dv \eM-C
350Represents character
351.Ql C
352with the 8th bit set.
353Spans characters
354.Ql \e241
355through
356.Ql \e376 .
357.It Dv \eM^C
358Represents control character
359.Ql C
360with the 8th bit set.
361Spans characters
362.Ql \e200
363through
364.Ql \e237 ,
365and
366.Ql \e377
367(as
368.Ql \eM^? ) .
369.It Dv \e040
370Represents
371.Tn ASCII
372space.
373.It Dv \e240
374Represents Meta-space.
375.El
376.Pp
377.It Dv VIS_CSTYLE
378Use C-style backslash sequences to represent standard non-printable
379characters.
380The following sequences are used to represent the indicated characters:
381.Bd -unfilled -offset indent
382.Li \ea Tn  \(em BEL No (007)
383.Li \eb Tn  \(em BS No (010)
384.Li \ef Tn  \(em NP No (014)
385.Li \en Tn  \(em NL No (012)
386.Li \er Tn  \(em CR No (015)
387.Li \es Tn  \(em SP No (040)
388.Li \et Tn  \(em HT No (011)
389.Li \ev Tn  \(em VT No (013)
390.Li \e0 Tn  \(em NUL No (000)
391.Ed
392.Pp
393When using this format, the
394.Fa nextc
395parameter is looked at to determine if a
396.Dv NUL
397character can be encoded as
398.Ql \e0
399instead of
400.Ql \e000 .
401If
402.Fa nextc
403is an octal digit, the latter representation is used to
404avoid ambiguity.
405.It Dv VIS_OCTAL
406Use a three digit octal sequence.
407The form is
408.Ql \eddd
409where
410.Em d
411represents an octal digit.
412.It Dv VIS_HTTPSTYLE
413Use URI encoding as described in RFC 1738.
414The form is
415.Ql %xx
416where
417.Em x
418represents a lower case hexadecimal digit.
419.It Dv VIS_MIMESTYLE
420Use MIME Quoted-Printable encoding as described in RFC 2045, only don't
421break lines and don't handle CRLF.
422The form is
423.Ql =XX
424where
425.Em X
426represents an upper case hexadecimal digit.
427.El
428.Pp
429There is one additional flag,
430.Dv VIS_NOSLASH ,
431which inhibits the
432doubling of backslashes and the backslash before the default
433format (that is, control characters are represented by
434.Ql ^C
435and
436meta characters as
437.Ql M-C ) .
438With this flag set, the encoding is
439ambiguous and non-invertible.
440.Sh MULTIBYTE CHARACTER SUPPORT
441These functions support multibyte character input.
442The encoding conversion is influenced by the setting of the
443.Ev LC_CTYPE
444environment variable which defines the set of characters
445that can be copied without encoding.
446.Pp
447If
448.Dv VIS_NOLOCALE
449is set, processing is done assuming the C locale and overriding
450any other environment settings.
451.Pp
452When 8-bit data is present in the input,
453.Ev LC_CTYPE
454must be set to the correct locale or to the C locale.
455If the locales of the data and the conversion are mismatched,
456multibyte character recognition may fail and encoding will be performed
457byte-by-byte instead.
458.Pp
459As noted above,
460.Fa dst
461must be four times the number of bytes processed from
462.Fa src .
463But note that each multibyte character can be up to
464.Dv MB_LEN_MAX
465bytes
466.\" (see
467.\" .Xr multibyte 3 )
468so in terms of multibyte characters,
469.Fa dst
470must be four times
471.Dv MB_LEN_MAX
472times the number of characters processed from
473.Fa src .
474.Sh ENVIRONMENT
475.Bl -tag -width ".Ev LC_CTYPE"
476.It Ev LC_CTYPE
477Specify the locale of the input data.
478Set to C if the input data locale is unknown.
479.El
480.Sh ERRORS
481The functions
482.Fn nvis
483and
484.Fn snvis
485will return
486.Dv NULL
487and the functions
488.Fn strnvis ,
489.Fn strnvisx ,
490.Fn strsnvis ,
491and
492.Fn strsnvisx ,
493will return \-1 when the
494.Fa dlen
495destination buffer size is not enough to perform the conversion while
496setting
497.Va errno
498to:
499.Bl -tag -width ".Bq Er ENOSPC"
500.It Bq Er ENOSPC
501The destination buffer size is not large enough to perform the conversion.
502.El
503.Sh SEE ALSO
504.Xr unvis 1 ,
505.Xr vis 1 ,
506.Xr glob 3 ,
507.\" .Xr multibyte 3 ,
508.Xr unvis 3
509.Rs
510.%A T. Berners-Lee
511.%T Uniform Resource Locators (URL)
512.%O "RFC 1738"
513.Re
514.Rs
515.%T "Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies"
516.%O "RFC 2045"
517.Re
518.Sh HISTORY
519The
520.Fn vis ,
521.Fn strvis ,
522and
523.Fn strvisx
524functions first appeared in
525.Bx 4.4 .
526The
527.Fn svis ,
528.Fn strsvis ,
529and
530.Fn strsvisx
531functions appeared in
532.Nx 1.5
533and
534.Fx 9.2 .
535The buffer size limited versions of the functions
536.Po Fn nvis ,
537.Fn strnvis ,
538.Fn strnvisx ,
539.Fn snvis ,
540.Fn strsnvis ,
541and
542.Fn strsnvisx Pc
543appeared in
544.Nx 6.0
545and
546.Fx 9.2 .
547Multibyte character support was added in
548.Nx 7.0
549and
550.Fx 9.2 .
551