1*f1f89080SBrooks Davis /* $NetBSD: vis.c,v 1.22 2013/02/20 17:04:45 christos Exp $ */ 2*f1f89080SBrooks Davis 3*f1f89080SBrooks Davis /*- 4*f1f89080SBrooks Davis * Copyright (c) 1989, 1993 5*f1f89080SBrooks Davis * The Regents of the University of California. All rights reserved. 6*f1f89080SBrooks Davis * 7*f1f89080SBrooks Davis * Redistribution and use in source and binary forms, with or without 8*f1f89080SBrooks Davis * modification, are permitted provided that the following conditions 9*f1f89080SBrooks Davis * are met: 10*f1f89080SBrooks Davis * 1. Redistributions of source code must retain the above copyright 11*f1f89080SBrooks Davis * notice, this list of conditions and the following disclaimer. 12*f1f89080SBrooks Davis * 2. Redistributions in binary form must reproduce the above copyright 13*f1f89080SBrooks Davis * notice, this list of conditions and the following disclaimer in the 14*f1f89080SBrooks Davis * documentation and/or other materials provided with the distribution. 15*f1f89080SBrooks Davis * 3. Neither the name of the University nor the names of its contributors 16*f1f89080SBrooks Davis * may be used to endorse or promote products derived from this software 17*f1f89080SBrooks Davis * without specific prior written permission. 18*f1f89080SBrooks Davis * 19*f1f89080SBrooks Davis * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20*f1f89080SBrooks Davis * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21*f1f89080SBrooks Davis * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22*f1f89080SBrooks Davis * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23*f1f89080SBrooks Davis * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24*f1f89080SBrooks Davis * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25*f1f89080SBrooks Davis * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26*f1f89080SBrooks Davis * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27*f1f89080SBrooks Davis * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28*f1f89080SBrooks Davis * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29*f1f89080SBrooks Davis * SUCH DAMAGE. 30*f1f89080SBrooks Davis */ 31*f1f89080SBrooks Davis 32*f1f89080SBrooks Davis #include <sys/cdefs.h> 33*f1f89080SBrooks Davis #ifndef lint 34*f1f89080SBrooks Davis __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 35*f1f89080SBrooks Davis The Regents of the University of California. All rights reserved."); 36*f1f89080SBrooks Davis #endif /* not lint */ 37*f1f89080SBrooks Davis 38*f1f89080SBrooks Davis #ifndef lint 39*f1f89080SBrooks Davis #if 0 40*f1f89080SBrooks Davis static char sccsid[] = "@(#)vis.c 8.1 (Berkeley) 6/6/93"; 41*f1f89080SBrooks Davis #endif 42*f1f89080SBrooks Davis __RCSID("$NetBSD: vis.c,v 1.22 2013/02/20 17:04:45 christos Exp $"); 43*f1f89080SBrooks Davis #endif /* not lint */ 44*f1f89080SBrooks Davis 45*f1f89080SBrooks Davis #include <stdio.h> 46*f1f89080SBrooks Davis #include <string.h> 47*f1f89080SBrooks Davis #include <stdlib.h> 48*f1f89080SBrooks Davis #include <string.h> 49*f1f89080SBrooks Davis #include <errno.h> 50*f1f89080SBrooks Davis #include <wchar.h> 51*f1f89080SBrooks Davis #include <limits.h> 52*f1f89080SBrooks Davis #include <unistd.h> 53*f1f89080SBrooks Davis #include <err.h> 54*f1f89080SBrooks Davis #include <vis.h> 55*f1f89080SBrooks Davis 56*f1f89080SBrooks Davis #include "extern.h" 57*f1f89080SBrooks Davis 58*f1f89080SBrooks Davis static int eflags, fold, foldwidth = 80, none, markeol; 59*f1f89080SBrooks Davis #ifdef DEBUG 60*f1f89080SBrooks Davis int debug; 61*f1f89080SBrooks Davis #endif 62*f1f89080SBrooks Davis static const char *extra = ""; 63*f1f89080SBrooks Davis 64*f1f89080SBrooks Davis static void process(FILE *); 65*f1f89080SBrooks Davis 66*f1f89080SBrooks Davis int 67*f1f89080SBrooks Davis main(int argc, char *argv[]) 68*f1f89080SBrooks Davis { 69*f1f89080SBrooks Davis FILE *fp; 70*f1f89080SBrooks Davis int ch; 71*f1f89080SBrooks Davis int rval; 72*f1f89080SBrooks Davis 73*f1f89080SBrooks Davis while ((ch = getopt(argc, argv, "bcde:F:fhlmnostw")) != -1) 74*f1f89080SBrooks Davis switch((char)ch) { 75*f1f89080SBrooks Davis case 'b': 76*f1f89080SBrooks Davis eflags |= VIS_NOSLASH; 77*f1f89080SBrooks Davis break; 78*f1f89080SBrooks Davis case 'c': 79*f1f89080SBrooks Davis eflags |= VIS_CSTYLE; 80*f1f89080SBrooks Davis break; 81*f1f89080SBrooks Davis #ifdef DEBUG 82*f1f89080SBrooks Davis case 'd': 83*f1f89080SBrooks Davis debug++; 84*f1f89080SBrooks Davis break; 85*f1f89080SBrooks Davis #endif 86*f1f89080SBrooks Davis case 'e': 87*f1f89080SBrooks Davis extra = optarg; 88*f1f89080SBrooks Davis break; 89*f1f89080SBrooks Davis case 'F': 90*f1f89080SBrooks Davis if ((foldwidth = atoi(optarg)) < 5) { 91*f1f89080SBrooks Davis errx(1, "can't fold lines to less than 5 cols"); 92*f1f89080SBrooks Davis /* NOTREACHED */ 93*f1f89080SBrooks Davis } 94*f1f89080SBrooks Davis markeol++; 95*f1f89080SBrooks Davis break; 96*f1f89080SBrooks Davis case 'f': 97*f1f89080SBrooks Davis fold++; /* fold output lines to 80 cols */ 98*f1f89080SBrooks Davis break; /* using hidden newline */ 99*f1f89080SBrooks Davis case 'h': 100*f1f89080SBrooks Davis eflags |= VIS_HTTPSTYLE; 101*f1f89080SBrooks Davis break; 102*f1f89080SBrooks Davis case 'l': 103*f1f89080SBrooks Davis markeol++; /* mark end of line with \$ */ 104*f1f89080SBrooks Davis break; 105*f1f89080SBrooks Davis case 'm': 106*f1f89080SBrooks Davis eflags |= VIS_MIMESTYLE; 107*f1f89080SBrooks Davis if (foldwidth == 80) 108*f1f89080SBrooks Davis foldwidth = 76; 109*f1f89080SBrooks Davis break; 110*f1f89080SBrooks Davis case 'n': 111*f1f89080SBrooks Davis none++; 112*f1f89080SBrooks Davis break; 113*f1f89080SBrooks Davis case 'o': 114*f1f89080SBrooks Davis eflags |= VIS_OCTAL; 115*f1f89080SBrooks Davis break; 116*f1f89080SBrooks Davis case 's': 117*f1f89080SBrooks Davis eflags |= VIS_SAFE; 118*f1f89080SBrooks Davis break; 119*f1f89080SBrooks Davis case 't': 120*f1f89080SBrooks Davis eflags |= VIS_TAB; 121*f1f89080SBrooks Davis break; 122*f1f89080SBrooks Davis case 'w': 123*f1f89080SBrooks Davis eflags |= VIS_WHITE; 124*f1f89080SBrooks Davis break; 125*f1f89080SBrooks Davis case '?': 126*f1f89080SBrooks Davis default: 127*f1f89080SBrooks Davis (void)fprintf(stderr, 128*f1f89080SBrooks Davis "Usage: %s [-bcfhlmnostw] [-e extra]" 129*f1f89080SBrooks Davis " [-F foldwidth] [file ...]\n", getprogname()); 130*f1f89080SBrooks Davis return 1; 131*f1f89080SBrooks Davis } 132*f1f89080SBrooks Davis 133*f1f89080SBrooks Davis if ((eflags & (VIS_HTTPSTYLE|VIS_MIMESTYLE)) == 134*f1f89080SBrooks Davis (VIS_HTTPSTYLE|VIS_MIMESTYLE)) 135*f1f89080SBrooks Davis errx(1, "Can't specify -m and -h at the same time"); 136*f1f89080SBrooks Davis 137*f1f89080SBrooks Davis argc -= optind; 138*f1f89080SBrooks Davis argv += optind; 139*f1f89080SBrooks Davis 140*f1f89080SBrooks Davis rval = 0; 141*f1f89080SBrooks Davis 142*f1f89080SBrooks Davis if (*argv) 143*f1f89080SBrooks Davis while (*argv) { 144*f1f89080SBrooks Davis if ((fp = fopen(*argv, "r")) != NULL) { 145*f1f89080SBrooks Davis process(fp); 146*f1f89080SBrooks Davis (void)fclose(fp); 147*f1f89080SBrooks Davis } else { 148*f1f89080SBrooks Davis warn("%s", *argv); 149*f1f89080SBrooks Davis rval = 1; 150*f1f89080SBrooks Davis } 151*f1f89080SBrooks Davis argv++; 152*f1f89080SBrooks Davis } 153*f1f89080SBrooks Davis else 154*f1f89080SBrooks Davis process(stdin); 155*f1f89080SBrooks Davis return rval; 156*f1f89080SBrooks Davis } 157*f1f89080SBrooks Davis 158*f1f89080SBrooks Davis static void 159*f1f89080SBrooks Davis process(FILE *fp) 160*f1f89080SBrooks Davis { 161*f1f89080SBrooks Davis static int col = 0; 162*f1f89080SBrooks Davis static char nul[] = "\0"; 163*f1f89080SBrooks Davis char *cp = nul + 1; /* so *(cp-1) starts out != '\n' */ 164*f1f89080SBrooks Davis wint_t c, c1, rachar; 165*f1f89080SBrooks Davis char mbibuff[2 * MB_LEN_MAX + 1]; /* max space for 2 wchars */ 166*f1f89080SBrooks Davis char buff[4 * MB_LEN_MAX + 1]; /* max encoding length for one char */ 167*f1f89080SBrooks Davis int mbilen, cerr = 0, raerr = 0; 168*f1f89080SBrooks Davis 169*f1f89080SBrooks Davis /* 170*f1f89080SBrooks Davis * The input stream is considered to be multibyte characters. 171*f1f89080SBrooks Davis * The input loop will read this data inputing one character, 172*f1f89080SBrooks Davis * possibly multiple bytes, at a time and converting each to 173*f1f89080SBrooks Davis * a wide character wchar_t. 174*f1f89080SBrooks Davis * 175*f1f89080SBrooks Davis * The vis(3) functions, however, require single either bytes 176*f1f89080SBrooks Davis * or a multibyte string as their arguments. So we convert 177*f1f89080SBrooks Davis * our input wchar_t and the following look-ahead wchar_t to 178*f1f89080SBrooks Davis * a multibyte string for processing by vis(3). 179*f1f89080SBrooks Davis */ 180*f1f89080SBrooks Davis 181*f1f89080SBrooks Davis /* Read one multibyte character, store as wchar_t */ 182*f1f89080SBrooks Davis c = getwc(fp); 183*f1f89080SBrooks Davis if (c == WEOF && errno == EILSEQ) { 184*f1f89080SBrooks Davis /* Error in multibyte data. Read one byte. */ 185*f1f89080SBrooks Davis c = (wint_t)getc(fp); 186*f1f89080SBrooks Davis cerr = 1; 187*f1f89080SBrooks Davis } 188*f1f89080SBrooks Davis while (c != WEOF) { 189*f1f89080SBrooks Davis /* Clear multibyte input buffer. */ 190*f1f89080SBrooks Davis memset(mbibuff, 0, sizeof(mbibuff)); 191*f1f89080SBrooks Davis /* Read-ahead next multibyte character. */ 192*f1f89080SBrooks Davis if (!cerr) 193*f1f89080SBrooks Davis rachar = getwc(fp); 194*f1f89080SBrooks Davis if (cerr || (rachar == WEOF && errno == EILSEQ)) { 195*f1f89080SBrooks Davis /* Error in multibyte data. Read one byte. */ 196*f1f89080SBrooks Davis rachar = (wint_t)getc(fp); 197*f1f89080SBrooks Davis raerr = 1; 198*f1f89080SBrooks Davis } 199*f1f89080SBrooks Davis if (none) { 200*f1f89080SBrooks Davis /* Handle -n flag. */ 201*f1f89080SBrooks Davis cp = buff; 202*f1f89080SBrooks Davis *cp++ = c; 203*f1f89080SBrooks Davis if (c == '\\') 204*f1f89080SBrooks Davis *cp++ = '\\'; 205*f1f89080SBrooks Davis *cp = '\0'; 206*f1f89080SBrooks Davis } else if (markeol && c == '\n') { 207*f1f89080SBrooks Davis /* Handle -l flag. */ 208*f1f89080SBrooks Davis cp = buff; 209*f1f89080SBrooks Davis if ((eflags & VIS_NOSLASH) == 0) 210*f1f89080SBrooks Davis *cp++ = '\\'; 211*f1f89080SBrooks Davis *cp++ = '$'; 212*f1f89080SBrooks Davis *cp++ = '\n'; 213*f1f89080SBrooks Davis *cp = '\0'; 214*f1f89080SBrooks Davis } else { 215*f1f89080SBrooks Davis /* 216*f1f89080SBrooks Davis * Convert character using vis(3) library. 217*f1f89080SBrooks Davis * At this point we will process one character. 218*f1f89080SBrooks Davis * But we must pass the vis(3) library this 219*f1f89080SBrooks Davis * character plus the next one because the next 220*f1f89080SBrooks Davis * one is used as a look-ahead to decide how to 221*f1f89080SBrooks Davis * encode this one under certain circumstances. 222*f1f89080SBrooks Davis * 223*f1f89080SBrooks Davis * Since our characters may be multibyte, e.g., 224*f1f89080SBrooks Davis * in the UTF-8 locale, we cannot use vis() and 225*f1f89080SBrooks Davis * svis() which require byte input, so we must 226*f1f89080SBrooks Davis * create a multibyte string and use strvisx(). 227*f1f89080SBrooks Davis */ 228*f1f89080SBrooks Davis /* Treat EOF as a NUL char. */ 229*f1f89080SBrooks Davis c1 = rachar; 230*f1f89080SBrooks Davis if (c1 == WEOF) 231*f1f89080SBrooks Davis c1 = L'\0'; 232*f1f89080SBrooks Davis /* 233*f1f89080SBrooks Davis * If we hit a multibyte conversion error above, 234*f1f89080SBrooks Davis * insert byte directly into string buff because 235*f1f89080SBrooks Davis * wctomb() will fail. Else convert wchar_t to 236*f1f89080SBrooks Davis * multibyte using wctomb(). 237*f1f89080SBrooks Davis */ 238*f1f89080SBrooks Davis if (cerr) { 239*f1f89080SBrooks Davis *mbibuff = (char)c; 240*f1f89080SBrooks Davis mbilen = 1; 241*f1f89080SBrooks Davis } else 242*f1f89080SBrooks Davis mbilen = wctomb(mbibuff, c); 243*f1f89080SBrooks Davis /* Same for look-ahead character. */ 244*f1f89080SBrooks Davis if (raerr) 245*f1f89080SBrooks Davis mbibuff[mbilen] = (char)c1; 246*f1f89080SBrooks Davis else 247*f1f89080SBrooks Davis wctomb(mbibuff + mbilen, c1); 248*f1f89080SBrooks Davis /* Perform encoding on just first character. */ 249*f1f89080SBrooks Davis (void) strsenvisx(buff, 4 * MB_LEN_MAX, mbibuff, 250*f1f89080SBrooks Davis 1, eflags, extra, &cerr); 251*f1f89080SBrooks Davis } 252*f1f89080SBrooks Davis 253*f1f89080SBrooks Davis cp = buff; 254*f1f89080SBrooks Davis if (fold) { 255*f1f89080SBrooks Davis #ifdef DEBUG 256*f1f89080SBrooks Davis if (debug) 257*f1f89080SBrooks Davis (void)printf("<%02d,", col); 258*f1f89080SBrooks Davis #endif 259*f1f89080SBrooks Davis col = foldit(cp, col, foldwidth, eflags); 260*f1f89080SBrooks Davis #ifdef DEBUG 261*f1f89080SBrooks Davis if (debug) 262*f1f89080SBrooks Davis (void)printf("%02d>", col); 263*f1f89080SBrooks Davis #endif 264*f1f89080SBrooks Davis } 265*f1f89080SBrooks Davis do { 266*f1f89080SBrooks Davis (void)putchar(*cp); 267*f1f89080SBrooks Davis } while (*++cp); 268*f1f89080SBrooks Davis c = rachar; 269*f1f89080SBrooks Davis cerr = raerr; 270*f1f89080SBrooks Davis } 271*f1f89080SBrooks Davis /* 272*f1f89080SBrooks Davis * terminate partial line with a hidden newline 273*f1f89080SBrooks Davis */ 274*f1f89080SBrooks Davis if (fold && *(cp - 1) != '\n') 275*f1f89080SBrooks Davis (void)printf(eflags & VIS_MIMESTYLE ? "=\n" : "\\\n"); 276*f1f89080SBrooks Davis } 277