1 /* $NetBSD: vis.c,v 1.22 2013/02/20 17:04:45 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #ifndef lint 34 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 35 The Regents of the University of California. All rights reserved."); 36 #endif /* not lint */ 37 38 #ifndef lint 39 #if 0 40 static char sccsid[] = "@(#)vis.c 8.1 (Berkeley) 6/6/93"; 41 #endif 42 __RCSID("$NetBSD: vis.c,v 1.22 2013/02/20 17:04:45 christos Exp $"); 43 #endif /* not lint */ 44 45 #include <stdio.h> 46 #include <string.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <errno.h> 50 #include <wchar.h> 51 #include <limits.h> 52 #include <unistd.h> 53 #include <err.h> 54 #include <vis.h> 55 56 #include "extern.h" 57 58 static int eflags, fold, foldwidth = 80, none, markeol; 59 #ifdef DEBUG 60 int debug; 61 #endif 62 static const char *extra = ""; 63 64 static void process(FILE *); 65 66 int 67 main(int argc, char *argv[]) 68 { 69 FILE *fp; 70 int ch; 71 int rval; 72 73 while ((ch = getopt(argc, argv, "bcde:F:fhlmnostw")) != -1) 74 switch((char)ch) { 75 case 'b': 76 eflags |= VIS_NOSLASH; 77 break; 78 case 'c': 79 eflags |= VIS_CSTYLE; 80 break; 81 #ifdef DEBUG 82 case 'd': 83 debug++; 84 break; 85 #endif 86 case 'e': 87 extra = optarg; 88 break; 89 case 'F': 90 if ((foldwidth = atoi(optarg)) < 5) { 91 errx(1, "can't fold lines to less than 5 cols"); 92 /* NOTREACHED */ 93 } 94 markeol++; 95 break; 96 case 'f': 97 fold++; /* fold output lines to 80 cols */ 98 break; /* using hidden newline */ 99 case 'h': 100 eflags |= VIS_HTTPSTYLE; 101 break; 102 case 'l': 103 markeol++; /* mark end of line with \$ */ 104 break; 105 case 'm': 106 eflags |= VIS_MIMESTYLE; 107 if (foldwidth == 80) 108 foldwidth = 76; 109 break; 110 case 'n': 111 none++; 112 break; 113 case 'o': 114 eflags |= VIS_OCTAL; 115 break; 116 case 's': 117 eflags |= VIS_SAFE; 118 break; 119 case 't': 120 eflags |= VIS_TAB; 121 break; 122 case 'w': 123 eflags |= VIS_WHITE; 124 break; 125 case '?': 126 default: 127 (void)fprintf(stderr, 128 "Usage: %s [-bcfhlmnostw] [-e extra]" 129 " [-F foldwidth] [file ...]\n", getprogname()); 130 return 1; 131 } 132 133 if ((eflags & (VIS_HTTPSTYLE|VIS_MIMESTYLE)) == 134 (VIS_HTTPSTYLE|VIS_MIMESTYLE)) 135 errx(1, "Can't specify -m and -h at the same time"); 136 137 argc -= optind; 138 argv += optind; 139 140 rval = 0; 141 142 if (*argv) 143 while (*argv) { 144 if ((fp = fopen(*argv, "r")) != NULL) { 145 process(fp); 146 (void)fclose(fp); 147 } else { 148 warn("%s", *argv); 149 rval = 1; 150 } 151 argv++; 152 } 153 else 154 process(stdin); 155 return rval; 156 } 157 158 static void 159 process(FILE *fp) 160 { 161 static int col = 0; 162 static char nul[] = "\0"; 163 char *cp = nul + 1; /* so *(cp-1) starts out != '\n' */ 164 wint_t c, c1, rachar; 165 char mbibuff[2 * MB_LEN_MAX + 1]; /* max space for 2 wchars */ 166 char buff[4 * MB_LEN_MAX + 1]; /* max encoding length for one char */ 167 int mbilen, cerr = 0, raerr = 0; 168 169 /* 170 * The input stream is considered to be multibyte characters. 171 * The input loop will read this data inputing one character, 172 * possibly multiple bytes, at a time and converting each to 173 * a wide character wchar_t. 174 * 175 * The vis(3) functions, however, require single either bytes 176 * or a multibyte string as their arguments. So we convert 177 * our input wchar_t and the following look-ahead wchar_t to 178 * a multibyte string for processing by vis(3). 179 */ 180 181 /* Read one multibyte character, store as wchar_t */ 182 c = getwc(fp); 183 if (c == WEOF && errno == EILSEQ) { 184 /* Error in multibyte data. Read one byte. */ 185 c = (wint_t)getc(fp); 186 cerr = 1; 187 } 188 while (c != WEOF) { 189 /* Clear multibyte input buffer. */ 190 memset(mbibuff, 0, sizeof(mbibuff)); 191 /* Read-ahead next multibyte character. */ 192 if (!cerr) 193 rachar = getwc(fp); 194 if (cerr || (rachar == WEOF && errno == EILSEQ)) { 195 /* Error in multibyte data. Read one byte. */ 196 rachar = (wint_t)getc(fp); 197 raerr = 1; 198 } 199 if (none) { 200 /* Handle -n flag. */ 201 cp = buff; 202 *cp++ = c; 203 if (c == '\\') 204 *cp++ = '\\'; 205 *cp = '\0'; 206 } else if (markeol && c == '\n') { 207 /* Handle -l flag. */ 208 cp = buff; 209 if ((eflags & VIS_NOSLASH) == 0) 210 *cp++ = '\\'; 211 *cp++ = '$'; 212 *cp++ = '\n'; 213 *cp = '\0'; 214 } else { 215 /* 216 * Convert character using vis(3) library. 217 * At this point we will process one character. 218 * But we must pass the vis(3) library this 219 * character plus the next one because the next 220 * one is used as a look-ahead to decide how to 221 * encode this one under certain circumstances. 222 * 223 * Since our characters may be multibyte, e.g., 224 * in the UTF-8 locale, we cannot use vis() and 225 * svis() which require byte input, so we must 226 * create a multibyte string and use strvisx(). 227 */ 228 /* Treat EOF as a NUL char. */ 229 c1 = rachar; 230 if (c1 == WEOF) 231 c1 = L'\0'; 232 /* 233 * If we hit a multibyte conversion error above, 234 * insert byte directly into string buff because 235 * wctomb() will fail. Else convert wchar_t to 236 * multibyte using wctomb(). 237 */ 238 if (cerr) { 239 *mbibuff = (char)c; 240 mbilen = 1; 241 } else 242 mbilen = wctomb(mbibuff, c); 243 /* Same for look-ahead character. */ 244 if (raerr) 245 mbibuff[mbilen] = (char)c1; 246 else 247 wctomb(mbibuff + mbilen, c1); 248 /* Perform encoding on just first character. */ 249 (void) strsenvisx(buff, 4 * MB_LEN_MAX, mbibuff, 250 1, eflags, extra, &cerr); 251 } 252 253 cp = buff; 254 if (fold) { 255 #ifdef DEBUG 256 if (debug) 257 (void)printf("<%02d,", col); 258 #endif 259 col = foldit(cp, col, foldwidth, eflags); 260 #ifdef DEBUG 261 if (debug) 262 (void)printf("%02d>", col); 263 #endif 264 } 265 do { 266 (void)putchar(*cp); 267 } while (*++cp); 268 c = rachar; 269 cerr = raerr; 270 } 271 /* 272 * terminate partial line with a hidden newline 273 */ 274 if (fold && *(cp - 1) != '\n') 275 (void)printf(eflags & VIS_MIMESTYLE ? "=\n" : "\\\n"); 276 } 277