xref: /freebsd/contrib/libc-vis/vis.3 (revision a2aef24aa3c8458e4036735dd6928b4ef77294e5)
1.\"	$NetBSD: vis.3,v 1.45 2016/06/08 15:00:04 wiz Exp $
2.\"	$FreeBSD$
3.\"
4.\" Copyright (c) 1989, 1991, 1993
5.\"	The Regents of the University of California.  All rights reserved.
6.\"
7.\" Redistribution and use in source and binary forms, with or without
8.\" modification, are permitted provided that the following conditions
9.\" are met:
10.\" 1. Redistributions of source code must retain the above copyright
11.\"    notice, this list of conditions and the following disclaimer.
12.\" 2. Redistributions in binary form must reproduce the above copyright
13.\"    notice, this list of conditions and the following disclaimer in the
14.\"    documentation and/or other materials provided with the distribution.
15.\" 3. Neither the name of the University nor the names of its contributors
16.\"    may be used to endorse or promote products derived from this software
17.\"    without specific prior written permission.
18.\"
19.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29.\" SUCH DAMAGE.
30.\"
31.\"     @(#)vis.3	8.1 (Berkeley) 6/9/93
32.\"
33.Dd January 14, 2015
34.Dt VIS 3
35.Os
36.Sh NAME
37.Nm vis ,
38.Nm nvis ,
39.Nm strvis ,
40.Nm stravis ,
41.Nm strnvis ,
42.Nm strvisx ,
43.Nm strnvisx ,
44.Nm strenvisx ,
45.Nm svis ,
46.Nm snvis ,
47.Nm strsvis ,
48.Nm strsnvis ,
49.Nm strsvisx ,
50.Nm strsnvisx ,
51.Nm strsenvisx
52.Nd visually encode characters
53.Sh LIBRARY
54.Lb libc
55.Sh SYNOPSIS
56.In vis.h
57.Ft char *
58.Fn vis "char *dst" "int c" "int flag" "int nextc"
59.Ft char *
60.Fn nvis "char *dst" "size_t dlen" "int c" "int flag" "int nextc"
61.Ft int
62.Fn strvis "char *dst" "const char *src" "int flag"
63.Ft int
64.Fn stravis "char **dst" "const char *src" "int flag"
65.Ft int
66.Fn strnvis "char *dst" "size_t dlen" "const char *src" "int flag"
67.Ft int
68.Fn strvisx "char *dst" "const char *src" "size_t len" "int flag"
69.Ft int
70.Fn strnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag"
71.Ft int
72.Fn strenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "int *cerr_ptr"
73.Ft char *
74.Fn svis "char *dst" "int c" "int flag" "int nextc" "const char *extra"
75.Ft char *
76.Fn snvis "char *dst" "size_t dlen" "int c" "int flag" "int nextc" "const char *extra"
77.Ft int
78.Fn strsvis "char *dst" "const char *src" "int flag" "const char *extra"
79.Ft int
80.Fn strsnvis "char *dst" "size_t dlen" "const char *src" "int flag" "const char *extra"
81.Ft int
82.Fn strsvisx "char *dst" "const char *src" "size_t len" "int flag" "const char *extra"
83.Ft int
84.Fn strsnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra"
85.Ft int
86.Fn strsenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra" "int *cerr_ptr"
87.Sh DESCRIPTION
88The
89.Fn vis
90function
91copies into
92.Fa dst
93a string which represents the character
94.Fa c .
95If
96.Fa c
97needs no encoding, it is copied in unaltered.
98The string is null terminated, and a pointer to the end of the string is
99returned.
100The maximum length of any encoding is four
101bytes (not including the trailing
102.Dv NUL ) ;
103thus, when
104encoding a set of characters into a buffer, the size of the buffer should
105be four times the number of bytes encoded, plus one for the trailing
106.Dv NUL .
107The flag parameter is used for altering the default range of
108characters considered for encoding and for altering the visual
109representation.
110The additional character,
111.Fa nextc ,
112is only used when selecting the
113.Dv VIS_CSTYLE
114encoding format (explained below).
115.Pp
116The
117.Fn strvis ,
118.Fn stravis ,
119.Fn strnvis ,
120.Fn strvisx ,
121and
122.Fn strnvisx
123functions copy into
124.Fa dst
125a visual representation of
126the string
127.Fa src .
128The
129.Fn strvis
130and
131.Fn strnvis
132functions encode characters from
133.Fa src
134up to the
135first
136.Dv NUL .
137The
138.Fn strvisx
139and
140.Fn strnvisx
141functions encode exactly
142.Fa len
143characters from
144.Fa src
145(this
146is useful for encoding a block of data that may contain
147.Dv NUL Ns 's ) .
148Both forms
149.Dv NUL
150terminate
151.Fa dst .
152The size of
153.Fa dst
154must be four times the number
155of bytes encoded from
156.Fa src
157(plus one for the
158.Dv NUL ) .
159Both
160forms return the number of characters in
161.Fa dst
162(not including the trailing
163.Dv NUL ) .
164The
165.Fn stravis
166function allocates space dynamically to hold the string.
167The
168.Dq Nm n
169versions of the functions also take an additional argument
170.Fa dlen
171that indicates the length of the
172.Fa dst
173buffer.
174If
175.Fa dlen
176is not large enough to fit the converted string then the
177.Fn strnvis
178and
179.Fn strnvisx
180functions return \-1 and set
181.Va errno
182to
183.Dv ENOSPC .
184The
185.Fn strenvisx
186function takes an additional argument,
187.Fa cerr_ptr ,
188that is used to pass in and out a multibyte conversion error flag.
189This is useful when processing single characters at a time when
190it is possible that the locale may be set to something other
191than the locale of the characters in the input data.
192.Pp
193The functions
194.Fn svis ,
195.Fn snvis ,
196.Fn strsvis ,
197.Fn strsnvis ,
198.Fn strsvisx ,
199.Fn strsnvisx ,
200and
201.Fn strsenvisx
202correspond to
203.Fn vis ,
204.Fn nvis ,
205.Fn strvis ,
206.Fn strnvis ,
207.Fn strvisx ,
208.Fn strnvisx ,
209and
210.Fn strenvisx
211but have an additional argument
212.Fa extra ,
213pointing to a
214.Dv NUL
215terminated list of characters.
216These characters will be copied encoded or backslash-escaped into
217.Fa dst .
218These functions are useful e.g. to remove the special meaning
219of certain characters to shells.
220.Pp
221The encoding is a unique, invertible representation composed entirely of
222graphic characters; it can be decoded back into the original form using
223the
224.Xr unvis 3 ,
225.Xr strunvis 3
226or
227.Xr strnunvis 3
228functions.
229.Pp
230There are two parameters that can be controlled: the range of
231characters that are encoded (applies only to
232.Fn vis ,
233.Fn nvis ,
234.Fn strvis ,
235.Fn strnvis ,
236.Fn strvisx ,
237and
238.Fn strnvisx ) ,
239and the type of representation used.
240By default, all non-graphic characters,
241except space, tab, and newline are encoded (see
242.Xr isgraph 3 ) .
243The following flags
244alter this:
245.Bl -tag -width VIS_WHITEX
246.It Dv VIS_GLOB
247Also encode the magic characters
248.Ql ( * ,
249.Ql \&? ,
250.Ql \&[ ,
251and
252.Ql # )
253recognized by
254.Xr glob 3 .
255.It Dv VIS_SHELL
256Also encode the meta characters used by shells (in addition to the glob
257characters):
258.Ql ( ' ,
259.Ql ` ,
260.Ql \&" ,
261.Ql \&; ,
262.Ql & ,
263.Ql < ,
264.Ql > ,
265.Ql \&( ,
266.Ql \&) ,
267.Ql \&| ,
268.Ql \&] ,
269.Ql \e ,
270.Ql $ ,
271.Ql \&! ,
272.Ql \&^ ,
273and
274.Ql ~ ) .
275.It Dv VIS_SP
276Also encode space.
277.It Dv VIS_TAB
278Also encode tab.
279.It Dv VIS_NL
280Also encode newline.
281.It Dv VIS_WHITE
282Synonym for
283.Dv VIS_SP | VIS_TAB | VIS_NL .
284.It Dv VIS_META
285Synonym for
286.Dv VIS_WHITE | VIS_GLOB | VIS_SHELL .
287.It Dv VIS_SAFE
288Only encode
289.Dq unsafe
290characters.
291Unsafe means control characters which may cause common terminals to perform
292unexpected functions.
293Currently this form allows space, tab, newline, backspace, bell, and
294return \(em in addition to all graphic characters \(em unencoded.
295.El
296.Pp
297(The above flags have no effect for
298.Fn svis ,
299.Fn snvis ,
300.Fn strsvis ,
301.Fn strsnvis ,
302.Fn strsvisx ,
303and
304.Fn strsnvisx .
305When using these functions, place all graphic characters to be
306encoded in an array pointed to by
307.Fa extra .
308In general, the backslash character should be included in this array, see the
309warning on the use of the
310.Dv VIS_NOSLASH
311flag below).
312.Pp
313There are four forms of encoding.
314All forms use the backslash character
315.Ql \e
316to introduce a special
317sequence; two backslashes are used to represent a real backslash,
318except
319.Dv VIS_HTTPSTYLE
320that uses
321.Ql % ,
322or
323.Dv VIS_MIMESTYLE
324that uses
325.Ql = .
326These are the visual formats:
327.Bl -tag -width VIS_CSTYLE
328.It (default)
329Use an
330.Ql M
331to represent meta characters (characters with the 8th
332bit set), and use caret
333.Ql ^
334to represent control characters (see
335.Xr iscntrl 3 ) .
336The following formats are used:
337.Bl -tag -width xxxxx
338.It Dv \e^C
339Represents the control character
340.Ql C .
341Spans characters
342.Ql \e000
343through
344.Ql \e037 ,
345and
346.Ql \e177
347(as
348.Ql \e^? ) .
349.It Dv \eM-C
350Represents character
351.Ql C
352with the 8th bit set.
353Spans characters
354.Ql \e241
355through
356.Ql \e376 .
357.It Dv \eM^C
358Represents control character
359.Ql C
360with the 8th bit set.
361Spans characters
362.Ql \e200
363through
364.Ql \e237 ,
365and
366.Ql \e377
367(as
368.Ql \eM^? ) .
369.It Dv \e040
370Represents
371.Tn ASCII
372space.
373.It Dv \e240
374Represents Meta-space.
375.El
376.It Dv VIS_CSTYLE
377Use C-style backslash sequences to represent standard non-printable
378characters.
379The following sequences are used to represent the indicated characters:
380.Bd -unfilled -offset indent
381.Li \ea Tn  \(em BEL No (007)
382.Li \eb Tn  \(em BS No (010)
383.Li \ef Tn  \(em NP No (014)
384.Li \en Tn  \(em NL No (012)
385.Li \er Tn  \(em CR No (015)
386.Li \es Tn  \(em SP No (040)
387.Li \et Tn  \(em HT No (011)
388.Li \ev Tn  \(em VT No (013)
389.Li \e0 Tn  \(em NUL No (000)
390.Ed
391.Pp
392When using this format, the
393.Fa nextc
394parameter is looked at to determine if a
395.Dv NUL
396character can be encoded as
397.Ql \e0
398instead of
399.Ql \e000 .
400If
401.Fa nextc
402is an octal digit, the latter representation is used to
403avoid ambiguity.
404.It Dv VIS_OCTAL
405Use a three digit octal sequence.
406The form is
407.Ql \eddd
408where
409.Em d
410represents an octal digit.
411.It Dv VIS_HTTPSTYLE
412Use URI encoding as described in RFC 1738.
413The form is
414.Ql %xx
415where
416.Em x
417represents a lower case hexadecimal digit.
418.It Dv VIS_MIMESTYLE
419Use MIME Quoted-Printable encoding as described in RFC 2045, only don't
420break lines and don't handle CRLF.
421The form is
422.Ql =XX
423where
424.Em X
425represents an upper case hexadecimal digit.
426.El
427.Pp
428There is one additional flag,
429.Dv VIS_NOSLASH ,
430which inhibits the
431doubling of backslashes and the backslash before the default
432format (that is, control characters are represented by
433.Ql ^C
434and
435meta characters as
436.Ql M-C ) .
437With this flag set, the encoding is
438ambiguous and non-invertible.
439.Sh MULTIBYTE CHARACTER SUPPORT
440These functions support multibyte character input.
441The encoding conversion is influenced by the setting of the
442.Ev LC_CTYPE
443environment variable which defines the set of characters
444that can be copied without encoding.
445.Pp
446If
447.Dv VIS_NOLOCALE
448is set, processing is done assuming the C locale and overriding
449any other environment settings.
450.Pp
451When 8-bit data is present in the input,
452.Ev LC_CTYPE
453must be set to the correct locale or to the C locale.
454If the locales of the data and the conversion are mismatched,
455multibyte character recognition may fail and encoding will be performed
456byte-by-byte instead.
457.Pp
458As noted above,
459.Fa dst
460must be four times the number of bytes processed from
461.Fa src .
462But note that each multibyte character can be up to
463.Dv MB_LEN_MAX
464bytes
465.\" (see
466.\" .Xr multibyte 3 )
467so in terms of multibyte characters,
468.Fa dst
469must be four times
470.Dv MB_LEN_MAX
471times the number of characters processed from
472.Fa src .
473.Sh ENVIRONMENT
474.Bl -tag -width ".Ev LC_CTYPE"
475.It Ev LC_CTYPE
476Specify the locale of the input data.
477Set to C if the input data locale is unknown.
478.El
479.Sh ERRORS
480The functions
481.Fn nvis
482and
483.Fn snvis
484will return
485.Dv NULL
486and the functions
487.Fn strnvis ,
488.Fn strnvisx ,
489.Fn strsnvis ,
490and
491.Fn strsnvisx ,
492will return \-1 when the
493.Fa dlen
494destination buffer size is not enough to perform the conversion while
495setting
496.Va errno
497to:
498.Bl -tag -width ".Bq Er ENOSPC"
499.It Bq Er ENOSPC
500The destination buffer size is not large enough to perform the conversion.
501.El
502.Sh SEE ALSO
503.Xr unvis 1 ,
504.Xr vis 1 ,
505.Xr glob 3 ,
506.\" .Xr multibyte 3 ,
507.Xr unvis 3
508.Rs
509.%A T. Berners-Lee
510.%T Uniform Resource Locators (URL)
511.%O "RFC 1738"
512.Re
513.Rs
514.%T "Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies"
515.%O "RFC 2045"
516.Re
517.Sh HISTORY
518The
519.Fn vis ,
520.Fn strvis ,
521and
522.Fn strvisx
523functions first appeared in
524.Bx 4.4 .
525The
526.Fn svis ,
527.Fn strsvis ,
528and
529.Fn strsvisx
530functions appeared in
531.Nx 1.5
532and
533.Fx 9.2 .
534The buffer size limited versions of the functions
535.Po Fn nvis ,
536.Fn strnvis ,
537.Fn strnvisx ,
538.Fn snvis ,
539.Fn strsnvis ,
540and
541.Fn strsnvisx Pc
542appeared in
543.Nx 6.0
544and
545.Fx 9.2 .
546Multibyte character support was added in
547.Nx 7.0
548and
549.Fx 9.2 .
550