1*ac25e238SBrooks Davis /* $NetBSD: vis.c,v 1.62 2014/09/08 17:35:01 christos Exp $ */ 28ccca122SBrooks Davis 38ccca122SBrooks Davis /*- 48ccca122SBrooks Davis * Copyright (c) 1989, 1993 58ccca122SBrooks Davis * The Regents of the University of California. All rights reserved. 68ccca122SBrooks Davis * 78ccca122SBrooks Davis * Redistribution and use in source and binary forms, with or without 88ccca122SBrooks Davis * modification, are permitted provided that the following conditions 98ccca122SBrooks Davis * are met: 108ccca122SBrooks Davis * 1. Redistributions of source code must retain the above copyright 118ccca122SBrooks Davis * notice, this list of conditions and the following disclaimer. 128ccca122SBrooks Davis * 2. Redistributions in binary form must reproduce the above copyright 138ccca122SBrooks Davis * notice, this list of conditions and the following disclaimer in the 148ccca122SBrooks Davis * documentation and/or other materials provided with the distribution. 158ccca122SBrooks Davis * 3. Neither the name of the University nor the names of its contributors 168ccca122SBrooks Davis * may be used to endorse or promote products derived from this software 178ccca122SBrooks Davis * without specific prior written permission. 188ccca122SBrooks Davis * 198ccca122SBrooks Davis * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 208ccca122SBrooks Davis * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 218ccca122SBrooks Davis * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 228ccca122SBrooks Davis * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 238ccca122SBrooks Davis * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 248ccca122SBrooks Davis * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 258ccca122SBrooks Davis * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 268ccca122SBrooks Davis * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 278ccca122SBrooks Davis * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 288ccca122SBrooks Davis * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 298ccca122SBrooks Davis * SUCH DAMAGE. 308ccca122SBrooks Davis */ 318ccca122SBrooks Davis 328ccca122SBrooks Davis /*- 338ccca122SBrooks Davis * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc. 348ccca122SBrooks Davis * All rights reserved. 358ccca122SBrooks Davis * 368ccca122SBrooks Davis * Redistribution and use in source and binary forms, with or without 378ccca122SBrooks Davis * modification, are permitted provided that the following conditions 388ccca122SBrooks Davis * are met: 398ccca122SBrooks Davis * 1. Redistributions of source code must retain the above copyright 408ccca122SBrooks Davis * notice, this list of conditions and the following disclaimer. 418ccca122SBrooks Davis * 2. Redistributions in binary form must reproduce the above copyright 428ccca122SBrooks Davis * notice, this list of conditions and the following disclaimer in the 438ccca122SBrooks Davis * documentation and/or other materials provided with the distribution. 448ccca122SBrooks Davis * 458ccca122SBrooks Davis * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 468ccca122SBrooks Davis * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 478ccca122SBrooks Davis * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 488ccca122SBrooks Davis * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 498ccca122SBrooks Davis * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 508ccca122SBrooks Davis * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 518ccca122SBrooks Davis * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 528ccca122SBrooks Davis * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 538ccca122SBrooks Davis * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 548ccca122SBrooks Davis * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 558ccca122SBrooks Davis * POSSIBILITY OF SUCH DAMAGE. 568ccca122SBrooks Davis */ 578ccca122SBrooks Davis 588ccca122SBrooks Davis #include <sys/cdefs.h> 598ccca122SBrooks Davis #if defined(LIBC_SCCS) && !defined(lint) 60*ac25e238SBrooks Davis __RCSID("$NetBSD: vis.c,v 1.62 2014/09/08 17:35:01 christos Exp $"); 618ccca122SBrooks Davis #endif /* LIBC_SCCS and not lint */ 62778c12a6SBrooks Davis #ifdef __FBSDID 638ccca122SBrooks Davis __FBSDID("$FreeBSD$"); 64778c12a6SBrooks Davis #define _DIAGASSERT(x) assert(x) 65778c12a6SBrooks Davis #endif 668ccca122SBrooks Davis 678ccca122SBrooks Davis #include "namespace.h" 688ccca122SBrooks Davis #include <sys/types.h> 69778c12a6SBrooks Davis #include <sys/param.h> 708ccca122SBrooks Davis 718ccca122SBrooks Davis #include <assert.h> 728ccca122SBrooks Davis #include <vis.h> 738ccca122SBrooks Davis #include <errno.h> 748ccca122SBrooks Davis #include <stdlib.h> 75778c12a6SBrooks Davis #include <wchar.h> 76778c12a6SBrooks Davis #include <wctype.h> 778ccca122SBrooks Davis 788ccca122SBrooks Davis #ifdef __weak_alias 798ccca122SBrooks Davis __weak_alias(strvisx,_strvisx) 808ccca122SBrooks Davis #endif 818ccca122SBrooks Davis 828ccca122SBrooks Davis #if !HAVE_VIS || !HAVE_SVIS 838ccca122SBrooks Davis #include <ctype.h> 848ccca122SBrooks Davis #include <limits.h> 858ccca122SBrooks Davis #include <stdio.h> 868ccca122SBrooks Davis #include <string.h> 878ccca122SBrooks Davis 88778c12a6SBrooks Davis /* 89778c12a6SBrooks Davis * The reason for going through the trouble to deal with character encodings 90778c12a6SBrooks Davis * in vis(3), is that we use this to safe encode output of commands. This 91778c12a6SBrooks Davis * safe encoding varies depending on the character set. For example if we 92778c12a6SBrooks Davis * display ps output in French, we don't want to display French characters 93778c12a6SBrooks Davis * as M-foo. 94778c12a6SBrooks Davis */ 95778c12a6SBrooks Davis 96778c12a6SBrooks Davis static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *); 978ccca122SBrooks Davis 988ccca122SBrooks Davis #undef BELL 99778c12a6SBrooks Davis #define BELL L'\a' 1008ccca122SBrooks Davis 101778c12a6SBrooks Davis #define iswoctal(c) (((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7') 102778c12a6SBrooks Davis #define iswwhite(c) (c == L' ' || c == L'\t' || c == L'\n') 103778c12a6SBrooks Davis #define iswsafe(c) (c == L'\b' || c == BELL || c == L'\r') 104778c12a6SBrooks Davis #define xtoa(c) L"0123456789abcdef"[c] 105778c12a6SBrooks Davis #define XTOA(c) L"0123456789ABCDEF"[c] 1068ccca122SBrooks Davis 107778c12a6SBrooks Davis #define MAXEXTRAS 10 1088ccca122SBrooks Davis 109778c12a6SBrooks Davis #if !HAVE_NBTOOL_CONFIG_H 110778c12a6SBrooks Davis #ifndef __NetBSD__ 111778c12a6SBrooks Davis /* 112778c12a6SBrooks Davis * On NetBSD MB_LEN_MAX is currently 32 which does not fit on any integer 113778c12a6SBrooks Davis * integral type and it is probably wrong, since currently the maximum 114778c12a6SBrooks Davis * number of bytes and character needs is 6. Until this is fixed, the 115778c12a6SBrooks Davis * loops below are using sizeof(uint64_t) - 1 instead of MB_LEN_MAX, and 116778c12a6SBrooks Davis * the assertion is commented out. 117778c12a6SBrooks Davis */ 118778c12a6SBrooks Davis #ifdef __FreeBSD__ 119778c12a6SBrooks Davis /* 120778c12a6SBrooks Davis * On FreeBSD including <sys/systm.h> for CTASSERT only works in kernel 121778c12a6SBrooks Davis * mode. 122778c12a6SBrooks Davis */ 123778c12a6SBrooks Davis #ifndef CTASSERT 124778c12a6SBrooks Davis #define CTASSERT(x) _CTASSERT(x, __LINE__) 125778c12a6SBrooks Davis #define _CTASSERT(x, y) __CTASSERT(x, y) 126778c12a6SBrooks Davis #define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1] 127778c12a6SBrooks Davis #endif 128778c12a6SBrooks Davis #endif /* __FreeBSD__ */ 129778c12a6SBrooks Davis CTASSERT(MB_LEN_MAX <= sizeof(uint64_t)); 130778c12a6SBrooks Davis #endif /* !__NetBSD__ */ 131778c12a6SBrooks Davis #endif 1328ccca122SBrooks Davis 1338ccca122SBrooks Davis /* 1348ccca122SBrooks Davis * This is do_hvis, for HTTP style (RFC 1808) 1358ccca122SBrooks Davis */ 136778c12a6SBrooks Davis static wchar_t * 137778c12a6SBrooks Davis do_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra) 1388ccca122SBrooks Davis { 139778c12a6SBrooks Davis if (iswalnum(c) 1408ccca122SBrooks Davis /* safe */ 141778c12a6SBrooks Davis || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+' 1428ccca122SBrooks Davis /* extra */ 143778c12a6SBrooks Davis || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')' 144778c12a6SBrooks Davis || c == L',') 145778c12a6SBrooks Davis dst = do_svis(dst, c, flags, nextc, extra); 146778c12a6SBrooks Davis else { 147778c12a6SBrooks Davis *dst++ = L'%'; 1488ccca122SBrooks Davis *dst++ = xtoa(((unsigned int)c >> 4) & 0xf); 1498ccca122SBrooks Davis *dst++ = xtoa((unsigned int)c & 0xf); 1508ccca122SBrooks Davis } 1518ccca122SBrooks Davis 1528ccca122SBrooks Davis return dst; 1538ccca122SBrooks Davis } 1548ccca122SBrooks Davis 1558ccca122SBrooks Davis /* 1568ccca122SBrooks Davis * This is do_mvis, for Quoted-Printable MIME (RFC 2045) 1578ccca122SBrooks Davis * NB: No handling of long lines or CRLF. 1588ccca122SBrooks Davis */ 159778c12a6SBrooks Davis static wchar_t * 160778c12a6SBrooks Davis do_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra) 1618ccca122SBrooks Davis { 162778c12a6SBrooks Davis if ((c != L'\n') && 1638ccca122SBrooks Davis /* Space at the end of the line */ 164778c12a6SBrooks Davis ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) || 1658ccca122SBrooks Davis /* Out of range */ 166778c12a6SBrooks Davis (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) || 1678ccca122SBrooks Davis /* Specific char to be escaped */ 168778c12a6SBrooks Davis wcschr(L"#$@[\\]^`{|}~", c) != NULL)) { 169778c12a6SBrooks Davis *dst++ = L'='; 1708ccca122SBrooks Davis *dst++ = XTOA(((unsigned int)c >> 4) & 0xf); 1718ccca122SBrooks Davis *dst++ = XTOA((unsigned int)c & 0xf); 172778c12a6SBrooks Davis } else 173778c12a6SBrooks Davis dst = do_svis(dst, c, flags, nextc, extra); 174778c12a6SBrooks Davis return dst; 1758ccca122SBrooks Davis } 176778c12a6SBrooks Davis 177778c12a6SBrooks Davis /* 178778c12a6SBrooks Davis * Output single byte of multibyte character. 179778c12a6SBrooks Davis */ 180778c12a6SBrooks Davis static wchar_t * 181778c12a6SBrooks Davis do_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra) 182778c12a6SBrooks Davis { 183778c12a6SBrooks Davis if (flags & VIS_CSTYLE) { 184778c12a6SBrooks Davis switch (c) { 185778c12a6SBrooks Davis case L'\n': 186778c12a6SBrooks Davis *dst++ = L'\\'; *dst++ = L'n'; 187778c12a6SBrooks Davis return dst; 188778c12a6SBrooks Davis case L'\r': 189778c12a6SBrooks Davis *dst++ = L'\\'; *dst++ = L'r'; 190778c12a6SBrooks Davis return dst; 191778c12a6SBrooks Davis case L'\b': 192778c12a6SBrooks Davis *dst++ = L'\\'; *dst++ = L'b'; 193778c12a6SBrooks Davis return dst; 194778c12a6SBrooks Davis case BELL: 195778c12a6SBrooks Davis *dst++ = L'\\'; *dst++ = L'a'; 196778c12a6SBrooks Davis return dst; 197778c12a6SBrooks Davis case L'\v': 198778c12a6SBrooks Davis *dst++ = L'\\'; *dst++ = L'v'; 199778c12a6SBrooks Davis return dst; 200778c12a6SBrooks Davis case L'\t': 201778c12a6SBrooks Davis *dst++ = L'\\'; *dst++ = L't'; 202778c12a6SBrooks Davis return dst; 203778c12a6SBrooks Davis case L'\f': 204778c12a6SBrooks Davis *dst++ = L'\\'; *dst++ = L'f'; 205778c12a6SBrooks Davis return dst; 206778c12a6SBrooks Davis case L' ': 207778c12a6SBrooks Davis *dst++ = L'\\'; *dst++ = L's'; 208778c12a6SBrooks Davis return dst; 209778c12a6SBrooks Davis case L'\0': 210778c12a6SBrooks Davis *dst++ = L'\\'; *dst++ = L'0'; 211778c12a6SBrooks Davis if (iswoctal(nextc)) { 212778c12a6SBrooks Davis *dst++ = L'0'; 213778c12a6SBrooks Davis *dst++ = L'0'; 214778c12a6SBrooks Davis } 215778c12a6SBrooks Davis return dst; 216778c12a6SBrooks Davis default: 217778c12a6SBrooks Davis if (iswgraph(c)) { 218778c12a6SBrooks Davis *dst++ = L'\\'; 219778c12a6SBrooks Davis *dst++ = c; 220778c12a6SBrooks Davis return dst; 221778c12a6SBrooks Davis } 222778c12a6SBrooks Davis } 223778c12a6SBrooks Davis } 224778c12a6SBrooks Davis if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) { 225778c12a6SBrooks Davis *dst++ = L'\\'; 226778c12a6SBrooks Davis *dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0'; 227778c12a6SBrooks Davis *dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0'; 228778c12a6SBrooks Davis *dst++ = (c & 07) + L'0'; 229778c12a6SBrooks Davis } else { 230778c12a6SBrooks Davis if ((flags & VIS_NOSLASH) == 0) 231778c12a6SBrooks Davis *dst++ = L'\\'; 232778c12a6SBrooks Davis 233778c12a6SBrooks Davis if (c & 0200) { 234778c12a6SBrooks Davis c &= 0177; 235778c12a6SBrooks Davis *dst++ = L'M'; 236778c12a6SBrooks Davis } 237778c12a6SBrooks Davis 238778c12a6SBrooks Davis if (iswcntrl(c)) { 239778c12a6SBrooks Davis *dst++ = L'^'; 240778c12a6SBrooks Davis if (c == 0177) 241778c12a6SBrooks Davis *dst++ = L'?'; 242778c12a6SBrooks Davis else 243778c12a6SBrooks Davis *dst++ = c + L'@'; 244778c12a6SBrooks Davis } else { 245778c12a6SBrooks Davis *dst++ = L'-'; 246778c12a6SBrooks Davis *dst++ = c; 247778c12a6SBrooks Davis } 248778c12a6SBrooks Davis } 249778c12a6SBrooks Davis 2508ccca122SBrooks Davis return dst; 2518ccca122SBrooks Davis } 2528ccca122SBrooks Davis 2538ccca122SBrooks Davis /* 2548ccca122SBrooks Davis * This is do_vis, the central code of vis. 2558ccca122SBrooks Davis * dst: Pointer to the destination buffer 2568ccca122SBrooks Davis * c: Character to encode 257778c12a6SBrooks Davis * flags: Flags word 2588ccca122SBrooks Davis * nextc: The character following 'c' 2598ccca122SBrooks Davis * extra: Pointer to the list of extra characters to be 2608ccca122SBrooks Davis * backslash-protected. 2618ccca122SBrooks Davis */ 262778c12a6SBrooks Davis static wchar_t * 263778c12a6SBrooks Davis do_svis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra) 2648ccca122SBrooks Davis { 265778c12a6SBrooks Davis int iswextra, i, shft; 266778c12a6SBrooks Davis uint64_t bmsk, wmsk; 2678ccca122SBrooks Davis 268778c12a6SBrooks Davis iswextra = wcschr(extra, c) != NULL; 269778c12a6SBrooks Davis if (!iswextra && (iswgraph(c) || iswwhite(c) || 270778c12a6SBrooks Davis ((flags & VIS_SAFE) && iswsafe(c)))) { 2718ccca122SBrooks Davis *dst++ = c; 2728ccca122SBrooks Davis return dst; 2738ccca122SBrooks Davis } 274778c12a6SBrooks Davis 275778c12a6SBrooks Davis /* See comment in istrsenvisx() output loop, below. */ 276778c12a6SBrooks Davis wmsk = 0; 277778c12a6SBrooks Davis for (i = sizeof(wmsk) - 1; i >= 0; i--) { 278778c12a6SBrooks Davis shft = i * NBBY; 279778c12a6SBrooks Davis bmsk = (uint64_t)0xffLL << shft; 280778c12a6SBrooks Davis wmsk |= bmsk; 281778c12a6SBrooks Davis if ((c & wmsk) || i == 0) 282778c12a6SBrooks Davis dst = do_mbyte(dst, (wint_t)( 283778c12a6SBrooks Davis (uint64_t)(c & bmsk) >> shft), 284778c12a6SBrooks Davis flags, nextc, iswextra); 2858ccca122SBrooks Davis } 2868ccca122SBrooks Davis 2878ccca122SBrooks Davis return dst; 2888ccca122SBrooks Davis } 2898ccca122SBrooks Davis 290778c12a6SBrooks Davis typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *); 2918ccca122SBrooks Davis 2928ccca122SBrooks Davis /* 2938ccca122SBrooks Davis * Return the appropriate encoding function depending on the flags given. 2948ccca122SBrooks Davis */ 2958ccca122SBrooks Davis static visfun_t 296778c12a6SBrooks Davis getvisfun(int flags) 2978ccca122SBrooks Davis { 298778c12a6SBrooks Davis if (flags & VIS_HTTPSTYLE) 2998ccca122SBrooks Davis return do_hvis; 300778c12a6SBrooks Davis if (flags & VIS_MIMESTYLE) 3018ccca122SBrooks Davis return do_mvis; 3028ccca122SBrooks Davis return do_svis; 3038ccca122SBrooks Davis } 3048ccca122SBrooks Davis 3058ccca122SBrooks Davis /* 306778c12a6SBrooks Davis * Expand list of extra characters to not visually encode. 3078ccca122SBrooks Davis */ 308778c12a6SBrooks Davis static wchar_t * 309778c12a6SBrooks Davis makeextralist(int flags, const char *src) 3108ccca122SBrooks Davis { 311778c12a6SBrooks Davis wchar_t *dst, *d; 312778c12a6SBrooks Davis size_t len; 3138ccca122SBrooks Davis 314778c12a6SBrooks Davis len = strlen(src); 315778c12a6SBrooks Davis if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL) 3168ccca122SBrooks Davis return NULL; 317778c12a6SBrooks Davis 318778c12a6SBrooks Davis if (mbstowcs(dst, src, len) == (size_t)-1) { 319778c12a6SBrooks Davis size_t i; 320778c12a6SBrooks Davis for (i = 0; i < len; i++) 321778c12a6SBrooks Davis dst[i] = (wint_t)(u_char)src[i]; 322778c12a6SBrooks Davis d = dst + len; 323778c12a6SBrooks Davis } else 324778c12a6SBrooks Davis d = dst + wcslen(dst); 325778c12a6SBrooks Davis 326778c12a6SBrooks Davis if (flags & VIS_GLOB) { 327778c12a6SBrooks Davis *d++ = L'*'; 328778c12a6SBrooks Davis *d++ = L'?'; 329778c12a6SBrooks Davis *d++ = L'['; 330778c12a6SBrooks Davis *d++ = L'#'; 3318ccca122SBrooks Davis } 332778c12a6SBrooks Davis 333778c12a6SBrooks Davis if (flags & VIS_SP) *d++ = L' '; 334778c12a6SBrooks Davis if (flags & VIS_TAB) *d++ = L'\t'; 335778c12a6SBrooks Davis if (flags & VIS_NL) *d++ = L'\n'; 336778c12a6SBrooks Davis if ((flags & VIS_NOSLASH) == 0) *d++ = L'\\'; 337778c12a6SBrooks Davis *d = L'\0'; 338778c12a6SBrooks Davis 3398ccca122SBrooks Davis return dst; 3408ccca122SBrooks Davis } 3418ccca122SBrooks Davis 3428ccca122SBrooks Davis /* 343778c12a6SBrooks Davis * istrsenvisx() 344778c12a6SBrooks Davis * The main internal function. 345778c12a6SBrooks Davis * All user-visible functions call this one. 3468ccca122SBrooks Davis */ 3478ccca122SBrooks Davis static int 348778c12a6SBrooks Davis istrsenvisx(char *mbdst, size_t *dlen, const char *mbsrc, size_t mblength, 349778c12a6SBrooks Davis int flags, const char *mbextra, int *cerr_ptr) 3508ccca122SBrooks Davis { 351778c12a6SBrooks Davis wchar_t *dst, *src, *pdst, *psrc, *start, *extra; 352778c12a6SBrooks Davis size_t len, olen; 353778c12a6SBrooks Davis uint64_t bmsk, wmsk; 354778c12a6SBrooks Davis wint_t c; 3558ccca122SBrooks Davis visfun_t f; 356778c12a6SBrooks Davis int clen = 0, cerr = 0, error = -1, i, shft; 357778c12a6SBrooks Davis ssize_t mbslength, maxolen; 3588ccca122SBrooks Davis 359778c12a6SBrooks Davis _DIAGASSERT(mbdst != NULL); 360*ac25e238SBrooks Davis _DIAGASSERT(mbsrc != NULL || mblength == 0); 361778c12a6SBrooks Davis _DIAGASSERT(mbextra != NULL); 362778c12a6SBrooks Davis 363778c12a6SBrooks Davis /* 364778c12a6SBrooks Davis * Input (mbsrc) is a char string considered to be multibyte 365778c12a6SBrooks Davis * characters. The input loop will read this string pulling 366778c12a6SBrooks Davis * one character, possibly multiple bytes, from mbsrc and 367778c12a6SBrooks Davis * converting each to wchar_t in src. 368778c12a6SBrooks Davis * 369778c12a6SBrooks Davis * The vis conversion will be done using the wide char 370778c12a6SBrooks Davis * wchar_t string. 371778c12a6SBrooks Davis * 372778c12a6SBrooks Davis * This will then be converted back to a multibyte string to 373778c12a6SBrooks Davis * return to the caller. 374778c12a6SBrooks Davis */ 375778c12a6SBrooks Davis 376778c12a6SBrooks Davis /* Allocate space for the wide char strings */ 377778c12a6SBrooks Davis psrc = pdst = extra = NULL; 378778c12a6SBrooks Davis if ((psrc = calloc(mblength + 1, sizeof(*psrc))) == NULL) 3798ccca122SBrooks Davis return -1; 380778c12a6SBrooks Davis if ((pdst = calloc((4 * mblength) + 1, sizeof(*pdst))) == NULL) 381778c12a6SBrooks Davis goto out; 382778c12a6SBrooks Davis dst = pdst; 383778c12a6SBrooks Davis src = psrc; 384778c12a6SBrooks Davis 385778c12a6SBrooks Davis /* Use caller's multibyte conversion error flag. */ 386778c12a6SBrooks Davis if (cerr_ptr) 387778c12a6SBrooks Davis cerr = *cerr_ptr; 388778c12a6SBrooks Davis 389778c12a6SBrooks Davis /* 390778c12a6SBrooks Davis * Input loop. 391778c12a6SBrooks Davis * Handle up to mblength characters (not bytes). We do not 392778c12a6SBrooks Davis * stop at NULs because we may be processing a block of data 393778c12a6SBrooks Davis * that includes NULs. 394778c12a6SBrooks Davis */ 395778c12a6SBrooks Davis mbslength = (ssize_t)mblength; 396778c12a6SBrooks Davis /* 397778c12a6SBrooks Davis * When inputing a single character, must also read in the 398778c12a6SBrooks Davis * next character for nextc, the look-ahead character. 399778c12a6SBrooks Davis */ 400778c12a6SBrooks Davis if (mbslength == 1) 401778c12a6SBrooks Davis mbslength++; 402778c12a6SBrooks Davis while (mbslength > 0) { 403778c12a6SBrooks Davis /* Convert one multibyte character to wchar_t. */ 404778c12a6SBrooks Davis if (!cerr) 405778c12a6SBrooks Davis clen = mbtowc(src, mbsrc, MB_LEN_MAX); 406778c12a6SBrooks Davis if (cerr || clen < 0) { 407778c12a6SBrooks Davis /* Conversion error, process as a byte instead. */ 408778c12a6SBrooks Davis *src = (wint_t)(u_char)*mbsrc; 409778c12a6SBrooks Davis clen = 1; 410778c12a6SBrooks Davis cerr = 1; 4118ccca122SBrooks Davis } 412778c12a6SBrooks Davis if (clen == 0) 413778c12a6SBrooks Davis /* 414778c12a6SBrooks Davis * NUL in input gives 0 return value. process 415778c12a6SBrooks Davis * as single NUL byte and keep going. 416778c12a6SBrooks Davis */ 417778c12a6SBrooks Davis clen = 1; 418778c12a6SBrooks Davis /* Advance buffer character pointer. */ 419778c12a6SBrooks Davis src++; 420778c12a6SBrooks Davis /* Advance input pointer by number of bytes read. */ 421778c12a6SBrooks Davis mbsrc += clen; 422778c12a6SBrooks Davis /* Decrement input byte count. */ 423778c12a6SBrooks Davis mbslength -= clen; 4248ccca122SBrooks Davis } 425778c12a6SBrooks Davis len = src - psrc; 426778c12a6SBrooks Davis src = psrc; 427778c12a6SBrooks Davis /* 428778c12a6SBrooks Davis * In the single character input case, we will have actually 429778c12a6SBrooks Davis * processed two characters, c and nextc. Reset len back to 430778c12a6SBrooks Davis * just a single character. 431778c12a6SBrooks Davis */ 432778c12a6SBrooks Davis if (mblength < len) 433778c12a6SBrooks Davis len = mblength; 434778c12a6SBrooks Davis 435778c12a6SBrooks Davis /* Convert extra argument to list of characters for this mode. */ 436778c12a6SBrooks Davis extra = makeextralist(flags, mbextra); 437778c12a6SBrooks Davis if (!extra) { 4388ccca122SBrooks Davis if (dlen && *dlen == 0) { 4398ccca122SBrooks Davis errno = ENOSPC; 440778c12a6SBrooks Davis goto out; 4418ccca122SBrooks Davis } 442778c12a6SBrooks Davis *mbdst = '\0'; /* can't create extra, return "" */ 443778c12a6SBrooks Davis error = 0; 444778c12a6SBrooks Davis goto out; 4458ccca122SBrooks Davis } 4468ccca122SBrooks Davis 447778c12a6SBrooks Davis /* Look up which processing function to call. */ 448778c12a6SBrooks Davis f = getvisfun(flags); 4498ccca122SBrooks Davis 450778c12a6SBrooks Davis /* 451778c12a6SBrooks Davis * Main processing loop. 452778c12a6SBrooks Davis * Call do_Xvis processing function one character at a time 453778c12a6SBrooks Davis * with next character available for look-ahead. 454778c12a6SBrooks Davis */ 4558ccca122SBrooks Davis for (start = dst; len > 0; len--) { 4568ccca122SBrooks Davis c = *src++; 457778c12a6SBrooks Davis dst = (*f)(dst, c, flags, len >= 1 ? *src : L'\0', extra); 4588ccca122SBrooks Davis if (dst == NULL) { 4598ccca122SBrooks Davis errno = ENOSPC; 460778c12a6SBrooks Davis goto out; 4618ccca122SBrooks Davis } 4628ccca122SBrooks Davis } 463778c12a6SBrooks Davis 464778c12a6SBrooks Davis /* Terminate the string in the buffer. */ 465778c12a6SBrooks Davis *dst = L'\0'; 466778c12a6SBrooks Davis 467778c12a6SBrooks Davis /* 468778c12a6SBrooks Davis * Output loop. 469778c12a6SBrooks Davis * Convert wchar_t string back to multibyte output string. 470778c12a6SBrooks Davis * If we have hit a multi-byte conversion error on input, 471778c12a6SBrooks Davis * output byte-by-byte here. Else use wctomb(). 472778c12a6SBrooks Davis */ 473778c12a6SBrooks Davis len = wcslen(start); 474778c12a6SBrooks Davis maxolen = dlen ? *dlen : (wcslen(start) * MB_LEN_MAX + 1); 475778c12a6SBrooks Davis olen = 0; 476778c12a6SBrooks Davis for (dst = start; len > 0; len--) { 477778c12a6SBrooks Davis if (!cerr) 478778c12a6SBrooks Davis clen = wctomb(mbdst, *dst); 479778c12a6SBrooks Davis if (cerr || clen < 0) { 480778c12a6SBrooks Davis /* 481778c12a6SBrooks Davis * Conversion error, process as a byte(s) instead. 482778c12a6SBrooks Davis * Examine each byte and higher-order bytes for 483778c12a6SBrooks Davis * data. E.g., 484778c12a6SBrooks Davis * 0x000000000000a264 -> a2 64 485778c12a6SBrooks Davis * 0x000000001f00a264 -> 1f 00 a2 64 486778c12a6SBrooks Davis */ 487778c12a6SBrooks Davis clen = 0; 488778c12a6SBrooks Davis wmsk = 0; 489778c12a6SBrooks Davis for (i = sizeof(wmsk) - 1; i >= 0; i--) { 490778c12a6SBrooks Davis shft = i * NBBY; 491778c12a6SBrooks Davis bmsk = (uint64_t)0xffLL << shft; 492778c12a6SBrooks Davis wmsk |= bmsk; 493778c12a6SBrooks Davis if ((*dst & wmsk) || i == 0) 494778c12a6SBrooks Davis mbdst[clen++] = (char)( 495778c12a6SBrooks Davis (uint64_t)(*dst & bmsk) >> 496778c12a6SBrooks Davis shft); 4978ccca122SBrooks Davis } 498778c12a6SBrooks Davis cerr = 1; 499778c12a6SBrooks Davis } 500778c12a6SBrooks Davis /* If this character would exceed our output limit, stop. */ 501778c12a6SBrooks Davis if (olen + clen > (size_t)maxolen) 502778c12a6SBrooks Davis break; 503778c12a6SBrooks Davis /* Advance output pointer by number of bytes written. */ 504778c12a6SBrooks Davis mbdst += clen; 505778c12a6SBrooks Davis /* Advance buffer character pointer. */ 506778c12a6SBrooks Davis dst++; 507778c12a6SBrooks Davis /* Incrment output character count. */ 508778c12a6SBrooks Davis olen += clen; 509778c12a6SBrooks Davis } 510778c12a6SBrooks Davis 511778c12a6SBrooks Davis /* Terminate the output string. */ 512778c12a6SBrooks Davis *mbdst = '\0'; 513778c12a6SBrooks Davis 514778c12a6SBrooks Davis /* Pass conversion error flag out. */ 515778c12a6SBrooks Davis if (cerr_ptr) 516778c12a6SBrooks Davis *cerr_ptr = cerr; 517778c12a6SBrooks Davis 518778c12a6SBrooks Davis free(extra); 519778c12a6SBrooks Davis free(pdst); 520778c12a6SBrooks Davis free(psrc); 521778c12a6SBrooks Davis 522778c12a6SBrooks Davis return (int)olen; 523778c12a6SBrooks Davis out: 524778c12a6SBrooks Davis free(extra); 525778c12a6SBrooks Davis free(pdst); 526778c12a6SBrooks Davis free(psrc); 527778c12a6SBrooks Davis return error; 528778c12a6SBrooks Davis } 529*ac25e238SBrooks Davis 530*ac25e238SBrooks Davis static int 531*ac25e238SBrooks Davis istrsenvisxl(char *mbdst, size_t *dlen, const char *mbsrc, 532*ac25e238SBrooks Davis int flags, const char *mbextra, int *cerr_ptr) 533*ac25e238SBrooks Davis { 534*ac25e238SBrooks Davis return istrsenvisx(mbdst, dlen, mbsrc, 535*ac25e238SBrooks Davis mbsrc != NULL ? strlen(mbsrc) : 0, flags, mbextra, cerr_ptr); 536*ac25e238SBrooks Davis } 537*ac25e238SBrooks Davis 538778c12a6SBrooks Davis #endif 539778c12a6SBrooks Davis 540778c12a6SBrooks Davis #if !HAVE_SVIS 541778c12a6SBrooks Davis /* 542778c12a6SBrooks Davis * The "svis" variants all take an "extra" arg that is a pointer 543778c12a6SBrooks Davis * to a NUL-terminated list of characters to be encoded, too. 544778c12a6SBrooks Davis * These functions are useful e. g. to encode strings in such a 545778c12a6SBrooks Davis * way so that they are not interpreted by a shell. 546778c12a6SBrooks Davis */ 547778c12a6SBrooks Davis 548778c12a6SBrooks Davis char * 549778c12a6SBrooks Davis svis(char *mbdst, int c, int flags, int nextc, const char *mbextra) 550778c12a6SBrooks Davis { 551778c12a6SBrooks Davis char cc[2]; 552778c12a6SBrooks Davis int ret; 553778c12a6SBrooks Davis 554778c12a6SBrooks Davis cc[0] = c; 555778c12a6SBrooks Davis cc[1] = nextc; 556778c12a6SBrooks Davis 557778c12a6SBrooks Davis ret = istrsenvisx(mbdst, NULL, cc, 1, flags, mbextra, NULL); 558778c12a6SBrooks Davis if (ret < 0) 559778c12a6SBrooks Davis return NULL; 560778c12a6SBrooks Davis return mbdst + ret; 561778c12a6SBrooks Davis } 562778c12a6SBrooks Davis 563778c12a6SBrooks Davis char * 564778c12a6SBrooks Davis snvis(char *mbdst, size_t dlen, int c, int flags, int nextc, const char *mbextra) 565778c12a6SBrooks Davis { 566778c12a6SBrooks Davis char cc[2]; 567778c12a6SBrooks Davis int ret; 568778c12a6SBrooks Davis 569778c12a6SBrooks Davis cc[0] = c; 570778c12a6SBrooks Davis cc[1] = nextc; 571778c12a6SBrooks Davis 572778c12a6SBrooks Davis ret = istrsenvisx(mbdst, &dlen, cc, 1, flags, mbextra, NULL); 573778c12a6SBrooks Davis if (ret < 0) 574778c12a6SBrooks Davis return NULL; 575778c12a6SBrooks Davis return mbdst + ret; 5768ccca122SBrooks Davis } 5778ccca122SBrooks Davis 5788ccca122SBrooks Davis int 579778c12a6SBrooks Davis strsvis(char *mbdst, const char *mbsrc, int flags, const char *mbextra) 5808ccca122SBrooks Davis { 581*ac25e238SBrooks Davis return istrsenvisxl(mbdst, NULL, mbsrc, flags, mbextra, NULL); 5828ccca122SBrooks Davis } 5838ccca122SBrooks Davis 5848ccca122SBrooks Davis int 585778c12a6SBrooks Davis strsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags, const char *mbextra) 5868ccca122SBrooks Davis { 587*ac25e238SBrooks Davis return istrsenvisxl(mbdst, &dlen, mbsrc, flags, mbextra, NULL); 588778c12a6SBrooks Davis } 589778c12a6SBrooks Davis 590778c12a6SBrooks Davis int 591778c12a6SBrooks Davis strsvisx(char *mbdst, const char *mbsrc, size_t len, int flags, const char *mbextra) 592778c12a6SBrooks Davis { 593778c12a6SBrooks Davis return istrsenvisx(mbdst, NULL, mbsrc, len, flags, mbextra, NULL); 594778c12a6SBrooks Davis } 595778c12a6SBrooks Davis 596778c12a6SBrooks Davis int 597778c12a6SBrooks Davis strsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags, 598778c12a6SBrooks Davis const char *mbextra) 599778c12a6SBrooks Davis { 600778c12a6SBrooks Davis return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, mbextra, NULL); 601778c12a6SBrooks Davis } 602778c12a6SBrooks Davis 603778c12a6SBrooks Davis int 604778c12a6SBrooks Davis strsenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags, 605778c12a6SBrooks Davis const char *mbextra, int *cerr_ptr) 606778c12a6SBrooks Davis { 607778c12a6SBrooks Davis return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, mbextra, cerr_ptr); 6088ccca122SBrooks Davis } 6098ccca122SBrooks Davis #endif 6108ccca122SBrooks Davis 6118ccca122SBrooks Davis #if !HAVE_VIS 6128ccca122SBrooks Davis /* 6138ccca122SBrooks Davis * vis - visually encode characters 6148ccca122SBrooks Davis */ 615778c12a6SBrooks Davis char * 616778c12a6SBrooks Davis vis(char *mbdst, int c, int flags, int nextc) 6178ccca122SBrooks Davis { 618778c12a6SBrooks Davis char cc[2]; 619778c12a6SBrooks Davis int ret; 6208ccca122SBrooks Davis 621778c12a6SBrooks Davis cc[0] = c; 622778c12a6SBrooks Davis cc[1] = nextc; 6238ccca122SBrooks Davis 624778c12a6SBrooks Davis ret = istrsenvisx(mbdst, NULL, cc, 1, flags, "", NULL); 625778c12a6SBrooks Davis if (ret < 0) 6268ccca122SBrooks Davis return NULL; 627778c12a6SBrooks Davis return mbdst + ret; 6288ccca122SBrooks Davis } 6298ccca122SBrooks Davis 6308ccca122SBrooks Davis char * 631778c12a6SBrooks Davis nvis(char *mbdst, size_t dlen, int c, int flags, int nextc) 6328ccca122SBrooks Davis { 633778c12a6SBrooks Davis char cc[2]; 634778c12a6SBrooks Davis int ret; 6358ccca122SBrooks Davis 636778c12a6SBrooks Davis cc[0] = c; 637778c12a6SBrooks Davis cc[1] = nextc; 6388ccca122SBrooks Davis 639778c12a6SBrooks Davis ret = istrsenvisx(mbdst, &dlen, cc, 1, flags, "", NULL); 640778c12a6SBrooks Davis if (ret < 0) 641778c12a6SBrooks Davis return NULL; 642778c12a6SBrooks Davis return mbdst + ret; 643778c12a6SBrooks Davis } 6448ccca122SBrooks Davis 6458ccca122SBrooks Davis /* 646778c12a6SBrooks Davis * strvis - visually encode characters from src into dst 647778c12a6SBrooks Davis * 648778c12a6SBrooks Davis * Dst must be 4 times the size of src to account for possible 649778c12a6SBrooks Davis * expansion. The length of dst, not including the trailing NULL, 650778c12a6SBrooks Davis * is returned. 651778c12a6SBrooks Davis */ 652778c12a6SBrooks Davis 653778c12a6SBrooks Davis int 654778c12a6SBrooks Davis strvis(char *mbdst, const char *mbsrc, int flags) 655778c12a6SBrooks Davis { 656*ac25e238SBrooks Davis return istrsenvisxl(mbdst, NULL, mbsrc, flags, "", NULL); 657778c12a6SBrooks Davis } 658778c12a6SBrooks Davis 659778c12a6SBrooks Davis int 660778c12a6SBrooks Davis strnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags) 661778c12a6SBrooks Davis { 662*ac25e238SBrooks Davis return istrsenvisxl(mbdst, &dlen, mbsrc, flags, "", NULL); 663778c12a6SBrooks Davis } 664778c12a6SBrooks Davis 665778c12a6SBrooks Davis /* 666778c12a6SBrooks Davis * strvisx - visually encode characters from src into dst 6678ccca122SBrooks Davis * 6688ccca122SBrooks Davis * Dst must be 4 times the size of src to account for possible 6698ccca122SBrooks Davis * expansion. The length of dst, not including the trailing NULL, 6708ccca122SBrooks Davis * is returned. 6718ccca122SBrooks Davis * 672778c12a6SBrooks Davis * Strvisx encodes exactly len characters from src into dst. 6738ccca122SBrooks Davis * This is useful for encoding a block of data. 6748ccca122SBrooks Davis */ 6758ccca122SBrooks Davis 676778c12a6SBrooks Davis int 677778c12a6SBrooks Davis strvisx(char *mbdst, const char *mbsrc, size_t len, int flags) 678778c12a6SBrooks Davis { 679778c12a6SBrooks Davis return istrsenvisx(mbdst, NULL, mbsrc, len, flags, "", NULL); 6808ccca122SBrooks Davis } 6818ccca122SBrooks Davis 6828ccca122SBrooks Davis int 683778c12a6SBrooks Davis strnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags) 6848ccca122SBrooks Davis { 685778c12a6SBrooks Davis return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, "", NULL); 6868ccca122SBrooks Davis } 6878ccca122SBrooks Davis 6888ccca122SBrooks Davis int 689778c12a6SBrooks Davis strenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags, 690778c12a6SBrooks Davis int *cerr_ptr) 6918ccca122SBrooks Davis { 692778c12a6SBrooks Davis return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, "", cerr_ptr); 6938ccca122SBrooks Davis } 6948ccca122SBrooks Davis #endif 695