xref: /freebsd/contrib/libc-vis/vis.c (revision ac25e23856887e4ebeef6e7da98131ad270bd38a)
1*ac25e238SBrooks Davis /*	$NetBSD: vis.c,v 1.62 2014/09/08 17:35:01 christos Exp $	*/
28ccca122SBrooks Davis 
38ccca122SBrooks Davis /*-
48ccca122SBrooks Davis  * Copyright (c) 1989, 1993
58ccca122SBrooks Davis  *	The Regents of the University of California.  All rights reserved.
68ccca122SBrooks Davis  *
78ccca122SBrooks Davis  * Redistribution and use in source and binary forms, with or without
88ccca122SBrooks Davis  * modification, are permitted provided that the following conditions
98ccca122SBrooks Davis  * are met:
108ccca122SBrooks Davis  * 1. Redistributions of source code must retain the above copyright
118ccca122SBrooks Davis  *    notice, this list of conditions and the following disclaimer.
128ccca122SBrooks Davis  * 2. Redistributions in binary form must reproduce the above copyright
138ccca122SBrooks Davis  *    notice, this list of conditions and the following disclaimer in the
148ccca122SBrooks Davis  *    documentation and/or other materials provided with the distribution.
158ccca122SBrooks Davis  * 3. Neither the name of the University nor the names of its contributors
168ccca122SBrooks Davis  *    may be used to endorse or promote products derived from this software
178ccca122SBrooks Davis  *    without specific prior written permission.
188ccca122SBrooks Davis  *
198ccca122SBrooks Davis  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
208ccca122SBrooks Davis  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
218ccca122SBrooks Davis  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
228ccca122SBrooks Davis  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
238ccca122SBrooks Davis  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
248ccca122SBrooks Davis  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
258ccca122SBrooks Davis  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
268ccca122SBrooks Davis  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
278ccca122SBrooks Davis  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
288ccca122SBrooks Davis  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
298ccca122SBrooks Davis  * SUCH DAMAGE.
308ccca122SBrooks Davis  */
318ccca122SBrooks Davis 
328ccca122SBrooks Davis /*-
338ccca122SBrooks Davis  * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc.
348ccca122SBrooks Davis  * All rights reserved.
358ccca122SBrooks Davis  *
368ccca122SBrooks Davis  * Redistribution and use in source and binary forms, with or without
378ccca122SBrooks Davis  * modification, are permitted provided that the following conditions
388ccca122SBrooks Davis  * are met:
398ccca122SBrooks Davis  * 1. Redistributions of source code must retain the above copyright
408ccca122SBrooks Davis  *    notice, this list of conditions and the following disclaimer.
418ccca122SBrooks Davis  * 2. Redistributions in binary form must reproduce the above copyright
428ccca122SBrooks Davis  *    notice, this list of conditions and the following disclaimer in the
438ccca122SBrooks Davis  *    documentation and/or other materials provided with the distribution.
448ccca122SBrooks Davis  *
458ccca122SBrooks Davis  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
468ccca122SBrooks Davis  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
478ccca122SBrooks Davis  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
488ccca122SBrooks Davis  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
498ccca122SBrooks Davis  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
508ccca122SBrooks Davis  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
518ccca122SBrooks Davis  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
528ccca122SBrooks Davis  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
538ccca122SBrooks Davis  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
548ccca122SBrooks Davis  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
558ccca122SBrooks Davis  * POSSIBILITY OF SUCH DAMAGE.
568ccca122SBrooks Davis  */
578ccca122SBrooks Davis 
588ccca122SBrooks Davis #include <sys/cdefs.h>
598ccca122SBrooks Davis #if defined(LIBC_SCCS) && !defined(lint)
60*ac25e238SBrooks Davis __RCSID("$NetBSD: vis.c,v 1.62 2014/09/08 17:35:01 christos Exp $");
618ccca122SBrooks Davis #endif /* LIBC_SCCS and not lint */
62778c12a6SBrooks Davis #ifdef __FBSDID
638ccca122SBrooks Davis __FBSDID("$FreeBSD$");
64778c12a6SBrooks Davis #define	_DIAGASSERT(x)	assert(x)
65778c12a6SBrooks Davis #endif
668ccca122SBrooks Davis 
678ccca122SBrooks Davis #include "namespace.h"
688ccca122SBrooks Davis #include <sys/types.h>
69778c12a6SBrooks Davis #include <sys/param.h>
708ccca122SBrooks Davis 
718ccca122SBrooks Davis #include <assert.h>
728ccca122SBrooks Davis #include <vis.h>
738ccca122SBrooks Davis #include <errno.h>
748ccca122SBrooks Davis #include <stdlib.h>
75778c12a6SBrooks Davis #include <wchar.h>
76778c12a6SBrooks Davis #include <wctype.h>
778ccca122SBrooks Davis 
788ccca122SBrooks Davis #ifdef __weak_alias
798ccca122SBrooks Davis __weak_alias(strvisx,_strvisx)
808ccca122SBrooks Davis #endif
818ccca122SBrooks Davis 
828ccca122SBrooks Davis #if !HAVE_VIS || !HAVE_SVIS
838ccca122SBrooks Davis #include <ctype.h>
848ccca122SBrooks Davis #include <limits.h>
858ccca122SBrooks Davis #include <stdio.h>
868ccca122SBrooks Davis #include <string.h>
878ccca122SBrooks Davis 
88778c12a6SBrooks Davis /*
89778c12a6SBrooks Davis  * The reason for going through the trouble to deal with character encodings
90778c12a6SBrooks Davis  * in vis(3), is that we use this to safe encode output of commands. This
91778c12a6SBrooks Davis  * safe encoding varies depending on the character set. For example if we
92778c12a6SBrooks Davis  * display ps output in French, we don't want to display French characters
93778c12a6SBrooks Davis  * as M-foo.
94778c12a6SBrooks Davis  */
95778c12a6SBrooks Davis 
96778c12a6SBrooks Davis static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *);
978ccca122SBrooks Davis 
988ccca122SBrooks Davis #undef BELL
99778c12a6SBrooks Davis #define BELL L'\a'
1008ccca122SBrooks Davis 
101778c12a6SBrooks Davis #define iswoctal(c)	(((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7')
102778c12a6SBrooks Davis #define iswwhite(c)	(c == L' ' || c == L'\t' || c == L'\n')
103778c12a6SBrooks Davis #define iswsafe(c)	(c == L'\b' || c == BELL || c == L'\r')
104778c12a6SBrooks Davis #define xtoa(c)		L"0123456789abcdef"[c]
105778c12a6SBrooks Davis #define XTOA(c)		L"0123456789ABCDEF"[c]
1068ccca122SBrooks Davis 
107778c12a6SBrooks Davis #define MAXEXTRAS	10
1088ccca122SBrooks Davis 
109778c12a6SBrooks Davis #if !HAVE_NBTOOL_CONFIG_H
110778c12a6SBrooks Davis #ifndef __NetBSD__
111778c12a6SBrooks Davis /*
112778c12a6SBrooks Davis  * On NetBSD MB_LEN_MAX is currently 32 which does not fit on any integer
113778c12a6SBrooks Davis  * integral type and it is probably wrong, since currently the maximum
114778c12a6SBrooks Davis  * number of bytes and character needs is 6. Until this is fixed, the
115778c12a6SBrooks Davis  * loops below are using sizeof(uint64_t) - 1 instead of MB_LEN_MAX, and
116778c12a6SBrooks Davis  * the assertion is commented out.
117778c12a6SBrooks Davis  */
118778c12a6SBrooks Davis #ifdef __FreeBSD__
119778c12a6SBrooks Davis /*
120778c12a6SBrooks Davis  * On FreeBSD including <sys/systm.h> for CTASSERT only works in kernel
121778c12a6SBrooks Davis  * mode.
122778c12a6SBrooks Davis  */
123778c12a6SBrooks Davis #ifndef CTASSERT
124778c12a6SBrooks Davis #define CTASSERT(x)             _CTASSERT(x, __LINE__)
125778c12a6SBrooks Davis #define _CTASSERT(x, y)         __CTASSERT(x, y)
126778c12a6SBrooks Davis #define __CTASSERT(x, y)        typedef char __assert ## y[(x) ? 1 : -1]
127778c12a6SBrooks Davis #endif
128778c12a6SBrooks Davis #endif /* __FreeBSD__ */
129778c12a6SBrooks Davis CTASSERT(MB_LEN_MAX <= sizeof(uint64_t));
130778c12a6SBrooks Davis #endif /* !__NetBSD__ */
131778c12a6SBrooks Davis #endif
1328ccca122SBrooks Davis 
1338ccca122SBrooks Davis /*
1348ccca122SBrooks Davis  * This is do_hvis, for HTTP style (RFC 1808)
1358ccca122SBrooks Davis  */
136778c12a6SBrooks Davis static wchar_t *
137778c12a6SBrooks Davis do_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
1388ccca122SBrooks Davis {
139778c12a6SBrooks Davis 	if (iswalnum(c)
1408ccca122SBrooks Davis 	    /* safe */
141778c12a6SBrooks Davis 	    || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+'
1428ccca122SBrooks Davis 	    /* extra */
143778c12a6SBrooks Davis 	    || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
144778c12a6SBrooks Davis 	    || c == L',')
145778c12a6SBrooks Davis 		dst = do_svis(dst, c, flags, nextc, extra);
146778c12a6SBrooks Davis 	else {
147778c12a6SBrooks Davis 		*dst++ = L'%';
1488ccca122SBrooks Davis 		*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
1498ccca122SBrooks Davis 		*dst++ = xtoa((unsigned int)c & 0xf);
1508ccca122SBrooks Davis 	}
1518ccca122SBrooks Davis 
1528ccca122SBrooks Davis 	return dst;
1538ccca122SBrooks Davis }
1548ccca122SBrooks Davis 
1558ccca122SBrooks Davis /*
1568ccca122SBrooks Davis  * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
1578ccca122SBrooks Davis  * NB: No handling of long lines or CRLF.
1588ccca122SBrooks Davis  */
159778c12a6SBrooks Davis static wchar_t *
160778c12a6SBrooks Davis do_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
1618ccca122SBrooks Davis {
162778c12a6SBrooks Davis 	if ((c != L'\n') &&
1638ccca122SBrooks Davis 	    /* Space at the end of the line */
164778c12a6SBrooks Davis 	    ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) ||
1658ccca122SBrooks Davis 	    /* Out of range */
166778c12a6SBrooks Davis 	    (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
1678ccca122SBrooks Davis 	    /* Specific char to be escaped */
168778c12a6SBrooks Davis 	    wcschr(L"#$@[\\]^`{|}~", c) != NULL)) {
169778c12a6SBrooks Davis 		*dst++ = L'=';
1708ccca122SBrooks Davis 		*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
1718ccca122SBrooks Davis 		*dst++ = XTOA((unsigned int)c & 0xf);
172778c12a6SBrooks Davis 	} else
173778c12a6SBrooks Davis 		dst = do_svis(dst, c, flags, nextc, extra);
174778c12a6SBrooks Davis 	return dst;
1758ccca122SBrooks Davis }
176778c12a6SBrooks Davis 
177778c12a6SBrooks Davis /*
178778c12a6SBrooks Davis  * Output single byte of multibyte character.
179778c12a6SBrooks Davis  */
180778c12a6SBrooks Davis static wchar_t *
181778c12a6SBrooks Davis do_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra)
182778c12a6SBrooks Davis {
183778c12a6SBrooks Davis 	if (flags & VIS_CSTYLE) {
184778c12a6SBrooks Davis 		switch (c) {
185778c12a6SBrooks Davis 		case L'\n':
186778c12a6SBrooks Davis 			*dst++ = L'\\'; *dst++ = L'n';
187778c12a6SBrooks Davis 			return dst;
188778c12a6SBrooks Davis 		case L'\r':
189778c12a6SBrooks Davis 			*dst++ = L'\\'; *dst++ = L'r';
190778c12a6SBrooks Davis 			return dst;
191778c12a6SBrooks Davis 		case L'\b':
192778c12a6SBrooks Davis 			*dst++ = L'\\'; *dst++ = L'b';
193778c12a6SBrooks Davis 			return dst;
194778c12a6SBrooks Davis 		case BELL:
195778c12a6SBrooks Davis 			*dst++ = L'\\'; *dst++ = L'a';
196778c12a6SBrooks Davis 			return dst;
197778c12a6SBrooks Davis 		case L'\v':
198778c12a6SBrooks Davis 			*dst++ = L'\\'; *dst++ = L'v';
199778c12a6SBrooks Davis 			return dst;
200778c12a6SBrooks Davis 		case L'\t':
201778c12a6SBrooks Davis 			*dst++ = L'\\'; *dst++ = L't';
202778c12a6SBrooks Davis 			return dst;
203778c12a6SBrooks Davis 		case L'\f':
204778c12a6SBrooks Davis 			*dst++ = L'\\'; *dst++ = L'f';
205778c12a6SBrooks Davis 			return dst;
206778c12a6SBrooks Davis 		case L' ':
207778c12a6SBrooks Davis 			*dst++ = L'\\'; *dst++ = L's';
208778c12a6SBrooks Davis 			return dst;
209778c12a6SBrooks Davis 		case L'\0':
210778c12a6SBrooks Davis 			*dst++ = L'\\'; *dst++ = L'0';
211778c12a6SBrooks Davis 			if (iswoctal(nextc)) {
212778c12a6SBrooks Davis 				*dst++ = L'0';
213778c12a6SBrooks Davis 				*dst++ = L'0';
214778c12a6SBrooks Davis 			}
215778c12a6SBrooks Davis 			return dst;
216778c12a6SBrooks Davis 		default:
217778c12a6SBrooks Davis 			if (iswgraph(c)) {
218778c12a6SBrooks Davis 				*dst++ = L'\\';
219778c12a6SBrooks Davis 				*dst++ = c;
220778c12a6SBrooks Davis 				return dst;
221778c12a6SBrooks Davis 			}
222778c12a6SBrooks Davis 		}
223778c12a6SBrooks Davis 	}
224778c12a6SBrooks Davis 	if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) {
225778c12a6SBrooks Davis 		*dst++ = L'\\';
226778c12a6SBrooks Davis 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
227778c12a6SBrooks Davis 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
228778c12a6SBrooks Davis 		*dst++ =			     (c	      & 07) + L'0';
229778c12a6SBrooks Davis 	} else {
230778c12a6SBrooks Davis 		if ((flags & VIS_NOSLASH) == 0)
231778c12a6SBrooks Davis 			*dst++ = L'\\';
232778c12a6SBrooks Davis 
233778c12a6SBrooks Davis 		if (c & 0200) {
234778c12a6SBrooks Davis 			c &= 0177;
235778c12a6SBrooks Davis 			*dst++ = L'M';
236778c12a6SBrooks Davis 		}
237778c12a6SBrooks Davis 
238778c12a6SBrooks Davis 		if (iswcntrl(c)) {
239778c12a6SBrooks Davis 			*dst++ = L'^';
240778c12a6SBrooks Davis 			if (c == 0177)
241778c12a6SBrooks Davis 				*dst++ = L'?';
242778c12a6SBrooks Davis 			else
243778c12a6SBrooks Davis 				*dst++ = c + L'@';
244778c12a6SBrooks Davis 		} else {
245778c12a6SBrooks Davis 			*dst++ = L'-';
246778c12a6SBrooks Davis 			*dst++ = c;
247778c12a6SBrooks Davis 		}
248778c12a6SBrooks Davis 	}
249778c12a6SBrooks Davis 
2508ccca122SBrooks Davis 	return dst;
2518ccca122SBrooks Davis }
2528ccca122SBrooks Davis 
2538ccca122SBrooks Davis /*
2548ccca122SBrooks Davis  * This is do_vis, the central code of vis.
2558ccca122SBrooks Davis  * dst:	      Pointer to the destination buffer
2568ccca122SBrooks Davis  * c:	      Character to encode
257778c12a6SBrooks Davis  * flags:     Flags word
2588ccca122SBrooks Davis  * nextc:     The character following 'c'
2598ccca122SBrooks Davis  * extra:     Pointer to the list of extra characters to be
2608ccca122SBrooks Davis  *	      backslash-protected.
2618ccca122SBrooks Davis  */
262778c12a6SBrooks Davis static wchar_t *
263778c12a6SBrooks Davis do_svis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
2648ccca122SBrooks Davis {
265778c12a6SBrooks Davis 	int iswextra, i, shft;
266778c12a6SBrooks Davis 	uint64_t bmsk, wmsk;
2678ccca122SBrooks Davis 
268778c12a6SBrooks Davis 	iswextra = wcschr(extra, c) != NULL;
269778c12a6SBrooks Davis 	if (!iswextra && (iswgraph(c) || iswwhite(c) ||
270778c12a6SBrooks Davis 	    ((flags & VIS_SAFE) && iswsafe(c)))) {
2718ccca122SBrooks Davis 		*dst++ = c;
2728ccca122SBrooks Davis 		return dst;
2738ccca122SBrooks Davis 	}
274778c12a6SBrooks Davis 
275778c12a6SBrooks Davis 	/* See comment in istrsenvisx() output loop, below. */
276778c12a6SBrooks Davis 	wmsk = 0;
277778c12a6SBrooks Davis 	for (i = sizeof(wmsk) - 1; i >= 0; i--) {
278778c12a6SBrooks Davis 		shft = i * NBBY;
279778c12a6SBrooks Davis 		bmsk = (uint64_t)0xffLL << shft;
280778c12a6SBrooks Davis 		wmsk |= bmsk;
281778c12a6SBrooks Davis 		if ((c & wmsk) || i == 0)
282778c12a6SBrooks Davis 			dst = do_mbyte(dst, (wint_t)(
283778c12a6SBrooks Davis 			    (uint64_t)(c & bmsk) >> shft),
284778c12a6SBrooks Davis 			    flags, nextc, iswextra);
2858ccca122SBrooks Davis 	}
2868ccca122SBrooks Davis 
2878ccca122SBrooks Davis 	return dst;
2888ccca122SBrooks Davis }
2898ccca122SBrooks Davis 
290778c12a6SBrooks Davis typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *);
2918ccca122SBrooks Davis 
2928ccca122SBrooks Davis /*
2938ccca122SBrooks Davis  * Return the appropriate encoding function depending on the flags given.
2948ccca122SBrooks Davis  */
2958ccca122SBrooks Davis static visfun_t
296778c12a6SBrooks Davis getvisfun(int flags)
2978ccca122SBrooks Davis {
298778c12a6SBrooks Davis 	if (flags & VIS_HTTPSTYLE)
2998ccca122SBrooks Davis 		return do_hvis;
300778c12a6SBrooks Davis 	if (flags & VIS_MIMESTYLE)
3018ccca122SBrooks Davis 		return do_mvis;
3028ccca122SBrooks Davis 	return do_svis;
3038ccca122SBrooks Davis }
3048ccca122SBrooks Davis 
3058ccca122SBrooks Davis /*
306778c12a6SBrooks Davis  * Expand list of extra characters to not visually encode.
3078ccca122SBrooks Davis  */
308778c12a6SBrooks Davis static wchar_t *
309778c12a6SBrooks Davis makeextralist(int flags, const char *src)
3108ccca122SBrooks Davis {
311778c12a6SBrooks Davis 	wchar_t *dst, *d;
312778c12a6SBrooks Davis 	size_t len;
3138ccca122SBrooks Davis 
314778c12a6SBrooks Davis 	len = strlen(src);
315778c12a6SBrooks Davis 	if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL)
3168ccca122SBrooks Davis 		return NULL;
317778c12a6SBrooks Davis 
318778c12a6SBrooks Davis 	if (mbstowcs(dst, src, len) == (size_t)-1) {
319778c12a6SBrooks Davis 		size_t i;
320778c12a6SBrooks Davis 		for (i = 0; i < len; i++)
321778c12a6SBrooks Davis 			dst[i] = (wint_t)(u_char)src[i];
322778c12a6SBrooks Davis 		d = dst + len;
323778c12a6SBrooks Davis 	} else
324778c12a6SBrooks Davis 		d = dst + wcslen(dst);
325778c12a6SBrooks Davis 
326778c12a6SBrooks Davis 	if (flags & VIS_GLOB) {
327778c12a6SBrooks Davis 		*d++ = L'*';
328778c12a6SBrooks Davis 		*d++ = L'?';
329778c12a6SBrooks Davis 		*d++ = L'[';
330778c12a6SBrooks Davis 		*d++ = L'#';
3318ccca122SBrooks Davis 	}
332778c12a6SBrooks Davis 
333778c12a6SBrooks Davis 	if (flags & VIS_SP) *d++ = L' ';
334778c12a6SBrooks Davis 	if (flags & VIS_TAB) *d++ = L'\t';
335778c12a6SBrooks Davis 	if (flags & VIS_NL) *d++ = L'\n';
336778c12a6SBrooks Davis 	if ((flags & VIS_NOSLASH) == 0) *d++ = L'\\';
337778c12a6SBrooks Davis 	*d = L'\0';
338778c12a6SBrooks Davis 
3398ccca122SBrooks Davis 	return dst;
3408ccca122SBrooks Davis }
3418ccca122SBrooks Davis 
3428ccca122SBrooks Davis /*
343778c12a6SBrooks Davis  * istrsenvisx()
344778c12a6SBrooks Davis  * 	The main internal function.
345778c12a6SBrooks Davis  *	All user-visible functions call this one.
3468ccca122SBrooks Davis  */
3478ccca122SBrooks Davis static int
348778c12a6SBrooks Davis istrsenvisx(char *mbdst, size_t *dlen, const char *mbsrc, size_t mblength,
349778c12a6SBrooks Davis     int flags, const char *mbextra, int *cerr_ptr)
3508ccca122SBrooks Davis {
351778c12a6SBrooks Davis 	wchar_t *dst, *src, *pdst, *psrc, *start, *extra;
352778c12a6SBrooks Davis 	size_t len, olen;
353778c12a6SBrooks Davis 	uint64_t bmsk, wmsk;
354778c12a6SBrooks Davis 	wint_t c;
3558ccca122SBrooks Davis 	visfun_t f;
356778c12a6SBrooks Davis 	int clen = 0, cerr = 0, error = -1, i, shft;
357778c12a6SBrooks Davis 	ssize_t mbslength, maxolen;
3588ccca122SBrooks Davis 
359778c12a6SBrooks Davis 	_DIAGASSERT(mbdst != NULL);
360*ac25e238SBrooks Davis 	_DIAGASSERT(mbsrc != NULL || mblength == 0);
361778c12a6SBrooks Davis 	_DIAGASSERT(mbextra != NULL);
362778c12a6SBrooks Davis 
363778c12a6SBrooks Davis 	/*
364778c12a6SBrooks Davis 	 * Input (mbsrc) is a char string considered to be multibyte
365778c12a6SBrooks Davis 	 * characters.  The input loop will read this string pulling
366778c12a6SBrooks Davis 	 * one character, possibly multiple bytes, from mbsrc and
367778c12a6SBrooks Davis 	 * converting each to wchar_t in src.
368778c12a6SBrooks Davis 	 *
369778c12a6SBrooks Davis 	 * The vis conversion will be done using the wide char
370778c12a6SBrooks Davis 	 * wchar_t string.
371778c12a6SBrooks Davis 	 *
372778c12a6SBrooks Davis 	 * This will then be converted back to a multibyte string to
373778c12a6SBrooks Davis 	 * return to the caller.
374778c12a6SBrooks Davis 	 */
375778c12a6SBrooks Davis 
376778c12a6SBrooks Davis 	/* Allocate space for the wide char strings */
377778c12a6SBrooks Davis 	psrc = pdst = extra = NULL;
378778c12a6SBrooks Davis 	if ((psrc = calloc(mblength + 1, sizeof(*psrc))) == NULL)
3798ccca122SBrooks Davis 		return -1;
380778c12a6SBrooks Davis 	if ((pdst = calloc((4 * mblength) + 1, sizeof(*pdst))) == NULL)
381778c12a6SBrooks Davis 		goto out;
382778c12a6SBrooks Davis 	dst = pdst;
383778c12a6SBrooks Davis 	src = psrc;
384778c12a6SBrooks Davis 
385778c12a6SBrooks Davis 	/* Use caller's multibyte conversion error flag. */
386778c12a6SBrooks Davis 	if (cerr_ptr)
387778c12a6SBrooks Davis 		cerr = *cerr_ptr;
388778c12a6SBrooks Davis 
389778c12a6SBrooks Davis 	/*
390778c12a6SBrooks Davis 	 * Input loop.
391778c12a6SBrooks Davis 	 * Handle up to mblength characters (not bytes).  We do not
392778c12a6SBrooks Davis 	 * stop at NULs because we may be processing a block of data
393778c12a6SBrooks Davis 	 * that includes NULs.
394778c12a6SBrooks Davis 	 */
395778c12a6SBrooks Davis 	mbslength = (ssize_t)mblength;
396778c12a6SBrooks Davis 	/*
397778c12a6SBrooks Davis 	 * When inputing a single character, must also read in the
398778c12a6SBrooks Davis 	 * next character for nextc, the look-ahead character.
399778c12a6SBrooks Davis 	 */
400778c12a6SBrooks Davis 	if (mbslength == 1)
401778c12a6SBrooks Davis 		mbslength++;
402778c12a6SBrooks Davis 	while (mbslength > 0) {
403778c12a6SBrooks Davis 		/* Convert one multibyte character to wchar_t. */
404778c12a6SBrooks Davis 		if (!cerr)
405778c12a6SBrooks Davis 			clen = mbtowc(src, mbsrc, MB_LEN_MAX);
406778c12a6SBrooks Davis 		if (cerr || clen < 0) {
407778c12a6SBrooks Davis 			/* Conversion error, process as a byte instead. */
408778c12a6SBrooks Davis 			*src = (wint_t)(u_char)*mbsrc;
409778c12a6SBrooks Davis 			clen = 1;
410778c12a6SBrooks Davis 			cerr = 1;
4118ccca122SBrooks Davis 		}
412778c12a6SBrooks Davis 		if (clen == 0)
413778c12a6SBrooks Davis 			/*
414778c12a6SBrooks Davis 			 * NUL in input gives 0 return value. process
415778c12a6SBrooks Davis 			 * as single NUL byte and keep going.
416778c12a6SBrooks Davis 			 */
417778c12a6SBrooks Davis 			clen = 1;
418778c12a6SBrooks Davis 		/* Advance buffer character pointer. */
419778c12a6SBrooks Davis 		src++;
420778c12a6SBrooks Davis 		/* Advance input pointer by number of bytes read. */
421778c12a6SBrooks Davis 		mbsrc += clen;
422778c12a6SBrooks Davis 		/* Decrement input byte count. */
423778c12a6SBrooks Davis 		mbslength -= clen;
4248ccca122SBrooks Davis 	}
425778c12a6SBrooks Davis 	len = src - psrc;
426778c12a6SBrooks Davis 	src = psrc;
427778c12a6SBrooks Davis 	/*
428778c12a6SBrooks Davis 	 * In the single character input case, we will have actually
429778c12a6SBrooks Davis 	 * processed two characters, c and nextc.  Reset len back to
430778c12a6SBrooks Davis 	 * just a single character.
431778c12a6SBrooks Davis 	 */
432778c12a6SBrooks Davis 	if (mblength < len)
433778c12a6SBrooks Davis 		len = mblength;
434778c12a6SBrooks Davis 
435778c12a6SBrooks Davis 	/* Convert extra argument to list of characters for this mode. */
436778c12a6SBrooks Davis 	extra = makeextralist(flags, mbextra);
437778c12a6SBrooks Davis 	if (!extra) {
4388ccca122SBrooks Davis 		if (dlen && *dlen == 0) {
4398ccca122SBrooks Davis 			errno = ENOSPC;
440778c12a6SBrooks Davis 			goto out;
4418ccca122SBrooks Davis 		}
442778c12a6SBrooks Davis 		*mbdst = '\0';		/* can't create extra, return "" */
443778c12a6SBrooks Davis 		error = 0;
444778c12a6SBrooks Davis 		goto out;
4458ccca122SBrooks Davis 	}
4468ccca122SBrooks Davis 
447778c12a6SBrooks Davis 	/* Look up which processing function to call. */
448778c12a6SBrooks Davis 	f = getvisfun(flags);
4498ccca122SBrooks Davis 
450778c12a6SBrooks Davis 	/*
451778c12a6SBrooks Davis 	 * Main processing loop.
452778c12a6SBrooks Davis 	 * Call do_Xvis processing function one character at a time
453778c12a6SBrooks Davis 	 * with next character available for look-ahead.
454778c12a6SBrooks Davis 	 */
4558ccca122SBrooks Davis 	for (start = dst; len > 0; len--) {
4568ccca122SBrooks Davis 		c = *src++;
457778c12a6SBrooks Davis 		dst = (*f)(dst, c, flags, len >= 1 ? *src : L'\0', extra);
4588ccca122SBrooks Davis 		if (dst == NULL) {
4598ccca122SBrooks Davis 			errno = ENOSPC;
460778c12a6SBrooks Davis 			goto out;
4618ccca122SBrooks Davis 		}
4628ccca122SBrooks Davis 	}
463778c12a6SBrooks Davis 
464778c12a6SBrooks Davis 	/* Terminate the string in the buffer. */
465778c12a6SBrooks Davis 	*dst = L'\0';
466778c12a6SBrooks Davis 
467778c12a6SBrooks Davis 	/*
468778c12a6SBrooks Davis 	 * Output loop.
469778c12a6SBrooks Davis 	 * Convert wchar_t string back to multibyte output string.
470778c12a6SBrooks Davis 	 * If we have hit a multi-byte conversion error on input,
471778c12a6SBrooks Davis 	 * output byte-by-byte here.  Else use wctomb().
472778c12a6SBrooks Davis 	 */
473778c12a6SBrooks Davis 	len = wcslen(start);
474778c12a6SBrooks Davis 	maxolen = dlen ? *dlen : (wcslen(start) * MB_LEN_MAX + 1);
475778c12a6SBrooks Davis 	olen = 0;
476778c12a6SBrooks Davis 	for (dst = start; len > 0; len--) {
477778c12a6SBrooks Davis 		if (!cerr)
478778c12a6SBrooks Davis 			clen = wctomb(mbdst, *dst);
479778c12a6SBrooks Davis 		if (cerr || clen < 0) {
480778c12a6SBrooks Davis 			/*
481778c12a6SBrooks Davis 			 * Conversion error, process as a byte(s) instead.
482778c12a6SBrooks Davis 			 * Examine each byte and higher-order bytes for
483778c12a6SBrooks Davis 			 * data.  E.g.,
484778c12a6SBrooks Davis 			 *	0x000000000000a264 -> a2 64
485778c12a6SBrooks Davis 			 *	0x000000001f00a264 -> 1f 00 a2 64
486778c12a6SBrooks Davis 			 */
487778c12a6SBrooks Davis 			clen = 0;
488778c12a6SBrooks Davis 			wmsk = 0;
489778c12a6SBrooks Davis 			for (i = sizeof(wmsk) - 1; i >= 0; i--) {
490778c12a6SBrooks Davis 				shft = i * NBBY;
491778c12a6SBrooks Davis 				bmsk = (uint64_t)0xffLL << shft;
492778c12a6SBrooks Davis 				wmsk |= bmsk;
493778c12a6SBrooks Davis 				if ((*dst & wmsk) || i == 0)
494778c12a6SBrooks Davis 					mbdst[clen++] = (char)(
495778c12a6SBrooks Davis 					    (uint64_t)(*dst & bmsk) >>
496778c12a6SBrooks Davis 					    shft);
4978ccca122SBrooks Davis 			}
498778c12a6SBrooks Davis 			cerr = 1;
499778c12a6SBrooks Davis 		}
500778c12a6SBrooks Davis 		/* If this character would exceed our output limit, stop. */
501778c12a6SBrooks Davis 		if (olen + clen > (size_t)maxolen)
502778c12a6SBrooks Davis 			break;
503778c12a6SBrooks Davis 		/* Advance output pointer by number of bytes written. */
504778c12a6SBrooks Davis 		mbdst += clen;
505778c12a6SBrooks Davis 		/* Advance buffer character pointer. */
506778c12a6SBrooks Davis 		dst++;
507778c12a6SBrooks Davis 		/* Incrment output character count. */
508778c12a6SBrooks Davis 		olen += clen;
509778c12a6SBrooks Davis 	}
510778c12a6SBrooks Davis 
511778c12a6SBrooks Davis 	/* Terminate the output string. */
512778c12a6SBrooks Davis 	*mbdst = '\0';
513778c12a6SBrooks Davis 
514778c12a6SBrooks Davis 	/* Pass conversion error flag out. */
515778c12a6SBrooks Davis 	if (cerr_ptr)
516778c12a6SBrooks Davis 		*cerr_ptr = cerr;
517778c12a6SBrooks Davis 
518778c12a6SBrooks Davis 	free(extra);
519778c12a6SBrooks Davis 	free(pdst);
520778c12a6SBrooks Davis 	free(psrc);
521778c12a6SBrooks Davis 
522778c12a6SBrooks Davis 	return (int)olen;
523778c12a6SBrooks Davis out:
524778c12a6SBrooks Davis 	free(extra);
525778c12a6SBrooks Davis 	free(pdst);
526778c12a6SBrooks Davis 	free(psrc);
527778c12a6SBrooks Davis 	return error;
528778c12a6SBrooks Davis }
529*ac25e238SBrooks Davis 
530*ac25e238SBrooks Davis static int
531*ac25e238SBrooks Davis istrsenvisxl(char *mbdst, size_t *dlen, const char *mbsrc,
532*ac25e238SBrooks Davis     int flags, const char *mbextra, int *cerr_ptr)
533*ac25e238SBrooks Davis {
534*ac25e238SBrooks Davis 	return istrsenvisx(mbdst, dlen, mbsrc,
535*ac25e238SBrooks Davis 	    mbsrc != NULL ? strlen(mbsrc) : 0, flags, mbextra, cerr_ptr);
536*ac25e238SBrooks Davis }
537*ac25e238SBrooks Davis 
538778c12a6SBrooks Davis #endif
539778c12a6SBrooks Davis 
540778c12a6SBrooks Davis #if !HAVE_SVIS
541778c12a6SBrooks Davis /*
542778c12a6SBrooks Davis  *	The "svis" variants all take an "extra" arg that is a pointer
543778c12a6SBrooks Davis  *	to a NUL-terminated list of characters to be encoded, too.
544778c12a6SBrooks Davis  *	These functions are useful e. g. to encode strings in such a
545778c12a6SBrooks Davis  *	way so that they are not interpreted by a shell.
546778c12a6SBrooks Davis  */
547778c12a6SBrooks Davis 
548778c12a6SBrooks Davis char *
549778c12a6SBrooks Davis svis(char *mbdst, int c, int flags, int nextc, const char *mbextra)
550778c12a6SBrooks Davis {
551778c12a6SBrooks Davis 	char cc[2];
552778c12a6SBrooks Davis 	int ret;
553778c12a6SBrooks Davis 
554778c12a6SBrooks Davis 	cc[0] = c;
555778c12a6SBrooks Davis 	cc[1] = nextc;
556778c12a6SBrooks Davis 
557778c12a6SBrooks Davis 	ret = istrsenvisx(mbdst, NULL, cc, 1, flags, mbextra, NULL);
558778c12a6SBrooks Davis 	if (ret < 0)
559778c12a6SBrooks Davis 		return NULL;
560778c12a6SBrooks Davis 	return mbdst + ret;
561778c12a6SBrooks Davis }
562778c12a6SBrooks Davis 
563778c12a6SBrooks Davis char *
564778c12a6SBrooks Davis snvis(char *mbdst, size_t dlen, int c, int flags, int nextc, const char *mbextra)
565778c12a6SBrooks Davis {
566778c12a6SBrooks Davis 	char cc[2];
567778c12a6SBrooks Davis 	int ret;
568778c12a6SBrooks Davis 
569778c12a6SBrooks Davis 	cc[0] = c;
570778c12a6SBrooks Davis 	cc[1] = nextc;
571778c12a6SBrooks Davis 
572778c12a6SBrooks Davis 	ret = istrsenvisx(mbdst, &dlen, cc, 1, flags, mbextra, NULL);
573778c12a6SBrooks Davis 	if (ret < 0)
574778c12a6SBrooks Davis 		return NULL;
575778c12a6SBrooks Davis 	return mbdst + ret;
5768ccca122SBrooks Davis }
5778ccca122SBrooks Davis 
5788ccca122SBrooks Davis int
579778c12a6SBrooks Davis strsvis(char *mbdst, const char *mbsrc, int flags, const char *mbextra)
5808ccca122SBrooks Davis {
581*ac25e238SBrooks Davis 	return istrsenvisxl(mbdst, NULL, mbsrc, flags, mbextra, NULL);
5828ccca122SBrooks Davis }
5838ccca122SBrooks Davis 
5848ccca122SBrooks Davis int
585778c12a6SBrooks Davis strsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags, const char *mbextra)
5868ccca122SBrooks Davis {
587*ac25e238SBrooks Davis 	return istrsenvisxl(mbdst, &dlen, mbsrc, flags, mbextra, NULL);
588778c12a6SBrooks Davis }
589778c12a6SBrooks Davis 
590778c12a6SBrooks Davis int
591778c12a6SBrooks Davis strsvisx(char *mbdst, const char *mbsrc, size_t len, int flags, const char *mbextra)
592778c12a6SBrooks Davis {
593778c12a6SBrooks Davis 	return istrsenvisx(mbdst, NULL, mbsrc, len, flags, mbextra, NULL);
594778c12a6SBrooks Davis }
595778c12a6SBrooks Davis 
596778c12a6SBrooks Davis int
597778c12a6SBrooks Davis strsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
598778c12a6SBrooks Davis     const char *mbextra)
599778c12a6SBrooks Davis {
600778c12a6SBrooks Davis 	return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, mbextra, NULL);
601778c12a6SBrooks Davis }
602778c12a6SBrooks Davis 
603778c12a6SBrooks Davis int
604778c12a6SBrooks Davis strsenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
605778c12a6SBrooks Davis     const char *mbextra, int *cerr_ptr)
606778c12a6SBrooks Davis {
607778c12a6SBrooks Davis 	return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, mbextra, cerr_ptr);
6088ccca122SBrooks Davis }
6098ccca122SBrooks Davis #endif
6108ccca122SBrooks Davis 
6118ccca122SBrooks Davis #if !HAVE_VIS
6128ccca122SBrooks Davis /*
6138ccca122SBrooks Davis  * vis - visually encode characters
6148ccca122SBrooks Davis  */
615778c12a6SBrooks Davis char *
616778c12a6SBrooks Davis vis(char *mbdst, int c, int flags, int nextc)
6178ccca122SBrooks Davis {
618778c12a6SBrooks Davis 	char cc[2];
619778c12a6SBrooks Davis 	int ret;
6208ccca122SBrooks Davis 
621778c12a6SBrooks Davis 	cc[0] = c;
622778c12a6SBrooks Davis 	cc[1] = nextc;
6238ccca122SBrooks Davis 
624778c12a6SBrooks Davis 	ret = istrsenvisx(mbdst, NULL, cc, 1, flags, "", NULL);
625778c12a6SBrooks Davis 	if (ret < 0)
6268ccca122SBrooks Davis 		return NULL;
627778c12a6SBrooks Davis 	return mbdst + ret;
6288ccca122SBrooks Davis }
6298ccca122SBrooks Davis 
6308ccca122SBrooks Davis char *
631778c12a6SBrooks Davis nvis(char *mbdst, size_t dlen, int c, int flags, int nextc)
6328ccca122SBrooks Davis {
633778c12a6SBrooks Davis 	char cc[2];
634778c12a6SBrooks Davis 	int ret;
6358ccca122SBrooks Davis 
636778c12a6SBrooks Davis 	cc[0] = c;
637778c12a6SBrooks Davis 	cc[1] = nextc;
6388ccca122SBrooks Davis 
639778c12a6SBrooks Davis 	ret = istrsenvisx(mbdst, &dlen, cc, 1, flags, "", NULL);
640778c12a6SBrooks Davis 	if (ret < 0)
641778c12a6SBrooks Davis 		return NULL;
642778c12a6SBrooks Davis 	return mbdst + ret;
643778c12a6SBrooks Davis }
6448ccca122SBrooks Davis 
6458ccca122SBrooks Davis /*
646778c12a6SBrooks Davis  * strvis - visually encode characters from src into dst
647778c12a6SBrooks Davis  *
648778c12a6SBrooks Davis  *	Dst must be 4 times the size of src to account for possible
649778c12a6SBrooks Davis  *	expansion.  The length of dst, not including the trailing NULL,
650778c12a6SBrooks Davis  *	is returned.
651778c12a6SBrooks Davis  */
652778c12a6SBrooks Davis 
653778c12a6SBrooks Davis int
654778c12a6SBrooks Davis strvis(char *mbdst, const char *mbsrc, int flags)
655778c12a6SBrooks Davis {
656*ac25e238SBrooks Davis 	return istrsenvisxl(mbdst, NULL, mbsrc, flags, "", NULL);
657778c12a6SBrooks Davis }
658778c12a6SBrooks Davis 
659778c12a6SBrooks Davis int
660778c12a6SBrooks Davis strnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags)
661778c12a6SBrooks Davis {
662*ac25e238SBrooks Davis 	return istrsenvisxl(mbdst, &dlen, mbsrc, flags, "", NULL);
663778c12a6SBrooks Davis }
664778c12a6SBrooks Davis 
665778c12a6SBrooks Davis /*
666778c12a6SBrooks Davis  * strvisx - visually encode characters from src into dst
6678ccca122SBrooks Davis  *
6688ccca122SBrooks Davis  *	Dst must be 4 times the size of src to account for possible
6698ccca122SBrooks Davis  *	expansion.  The length of dst, not including the trailing NULL,
6708ccca122SBrooks Davis  *	is returned.
6718ccca122SBrooks Davis  *
672778c12a6SBrooks Davis  *	Strvisx encodes exactly len characters from src into dst.
6738ccca122SBrooks Davis  *	This is useful for encoding a block of data.
6748ccca122SBrooks Davis  */
6758ccca122SBrooks Davis 
676778c12a6SBrooks Davis int
677778c12a6SBrooks Davis strvisx(char *mbdst, const char *mbsrc, size_t len, int flags)
678778c12a6SBrooks Davis {
679778c12a6SBrooks Davis 	return istrsenvisx(mbdst, NULL, mbsrc, len, flags, "", NULL);
6808ccca122SBrooks Davis }
6818ccca122SBrooks Davis 
6828ccca122SBrooks Davis int
683778c12a6SBrooks Davis strnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags)
6848ccca122SBrooks Davis {
685778c12a6SBrooks Davis 	return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, "", NULL);
6868ccca122SBrooks Davis }
6878ccca122SBrooks Davis 
6888ccca122SBrooks Davis int
689778c12a6SBrooks Davis strenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
690778c12a6SBrooks Davis     int *cerr_ptr)
6918ccca122SBrooks Davis {
692778c12a6SBrooks Davis 	return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, "", cerr_ptr);
6938ccca122SBrooks Davis }
6948ccca122SBrooks Davis #endif
695