1 /* 2 * Copyright (c) 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 32 __FBSDID("$FreeBSD$"); 33 34 #ifndef lint 35 static const char copyright[] = 36 "@(#) Copyright (c) 1988, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38 #endif 39 40 #ifndef lint 41 static const char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95"; 42 #endif 43 44 #include <sys/types.h> 45 #include <sys/capsicum.h> 46 47 #include <capsicum_helpers.h> 48 #include <ctype.h> 49 #include <err.h> 50 #include <limits.h> 51 #include <locale.h> 52 #include <stdint.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 #include <unistd.h> 57 #include <wchar.h> 58 #include <wctype.h> 59 60 #include "cmap.h" 61 #include "cset.h" 62 #include "extern.h" 63 64 static STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 65 static STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 66 67 static struct cset *setup(char *, STR *, int, int); 68 static void usage(void); 69 70 int 71 main(int argc, char **argv) 72 { 73 static int carray[NCHARS_SB]; 74 struct cmap *map; 75 struct cset *delete, *squeeze; 76 int n, *p; 77 int Cflag, cflag, dflag, sflag, isstring2; 78 wint_t ch, cnt, lastch; 79 80 (void)setlocale(LC_ALL, ""); 81 82 if (caph_limit_stdio() == -1) 83 err(1, "unable to limit stdio"); 84 85 if (cap_enter() < 0 && errno != ENOSYS) 86 err(1, "unable to enter capability mode"); 87 88 Cflag = cflag = dflag = sflag = 0; 89 while ((ch = getopt(argc, argv, "Ccdsu")) != -1) 90 switch((char)ch) { 91 case 'C': 92 Cflag = 1; 93 cflag = 0; 94 break; 95 case 'c': 96 cflag = 1; 97 Cflag = 0; 98 break; 99 case 'd': 100 dflag = 1; 101 break; 102 case 's': 103 sflag = 1; 104 break; 105 case 'u': 106 setbuf(stdout, (char *)NULL); 107 break; 108 case '?': 109 default: 110 usage(); 111 } 112 argc -= optind; 113 argv += optind; 114 115 switch(argc) { 116 case 0: 117 default: 118 usage(); 119 /* NOTREACHED */ 120 case 1: 121 isstring2 = 0; 122 break; 123 case 2: 124 isstring2 = 1; 125 break; 126 } 127 128 /* 129 * tr -ds [-Cc] string1 string2 130 * Delete all characters (or complemented characters) in string1. 131 * Squeeze all characters in string2. 132 */ 133 if (dflag && sflag) { 134 if (!isstring2) 135 usage(); 136 137 delete = setup(argv[0], &s1, cflag, Cflag); 138 squeeze = setup(argv[1], &s2, 0, 0); 139 140 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 141 if (!cset_in(delete, ch) && 142 (lastch != ch || !cset_in(squeeze, ch))) { 143 lastch = ch; 144 (void)putwchar(ch); 145 } 146 if (ferror(stdin)) 147 err(1, NULL); 148 exit(0); 149 } 150 151 /* 152 * tr -d [-Cc] string1 153 * Delete all characters (or complemented characters) in string1. 154 */ 155 if (dflag) { 156 if (isstring2) 157 usage(); 158 159 delete = setup(argv[0], &s1, cflag, Cflag); 160 161 while ((ch = getwchar()) != WEOF) 162 if (!cset_in(delete, ch)) 163 (void)putwchar(ch); 164 if (ferror(stdin)) 165 err(1, NULL); 166 exit(0); 167 } 168 169 /* 170 * tr -s [-Cc] string1 171 * Squeeze all characters (or complemented characters) in string1. 172 */ 173 if (sflag && !isstring2) { 174 squeeze = setup(argv[0], &s1, cflag, Cflag); 175 176 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 177 if (lastch != ch || !cset_in(squeeze, ch)) { 178 lastch = ch; 179 (void)putwchar(ch); 180 } 181 if (ferror(stdin)) 182 err(1, NULL); 183 exit(0); 184 } 185 186 /* 187 * tr [-Ccs] string1 string2 188 * Replace all characters (or complemented characters) in string1 with 189 * the character in the same position in string2. If the -s option is 190 * specified, squeeze all the characters in string2. 191 */ 192 if (!isstring2) 193 usage(); 194 195 map = cmap_alloc(); 196 if (map == NULL) 197 err(1, NULL); 198 squeeze = cset_alloc(); 199 if (squeeze == NULL) 200 err(1, NULL); 201 202 s1.str = argv[0]; 203 204 if (Cflag || cflag) { 205 cmap_default(map, OOBCH); 206 if ((s2.str = strdup(argv[1])) == NULL) 207 errx(1, "strdup(argv[1])"); 208 } else 209 s2.str = argv[1]; 210 211 if (!next(&s2)) 212 errx(1, "empty string2"); 213 214 /* 215 * For -s result will contain only those characters defined 216 * as the second characters in each of the toupper or tolower 217 * pairs. 218 */ 219 220 /* If string2 runs out of characters, use the last one specified. */ 221 while (next(&s1)) { 222 again: 223 if (s1.state == CCLASS_LOWER && 224 s2.state == CCLASS_UPPER && 225 s1.cnt == 1 && s2.cnt == 1) { 226 do { 227 ch = towupper(s1.lastch); 228 cmap_add(map, s1.lastch, ch); 229 if (sflag && iswupper(ch)) 230 cset_add(squeeze, ch); 231 if (!next(&s1)) 232 goto endloop; 233 } while (s1.state == CCLASS_LOWER && s1.cnt > 1); 234 /* skip upper set */ 235 do { 236 if (!next(&s2)) 237 break; 238 } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 239 goto again; 240 } else if (s1.state == CCLASS_UPPER && 241 s2.state == CCLASS_LOWER && 242 s1.cnt == 1 && s2.cnt == 1) { 243 do { 244 ch = towlower(s1.lastch); 245 cmap_add(map, s1.lastch, ch); 246 if (sflag && iswlower(ch)) 247 cset_add(squeeze, ch); 248 if (!next(&s1)) 249 goto endloop; 250 } while (s1.state == CCLASS_UPPER && s1.cnt > 1); 251 /* skip lower set */ 252 do { 253 if (!next(&s2)) 254 break; 255 } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 256 goto again; 257 } else { 258 cmap_add(map, s1.lastch, s2.lastch); 259 if (sflag) 260 cset_add(squeeze, s2.lastch); 261 } 262 (void)next(&s2); 263 } 264 endloop: 265 if (cflag || (Cflag && MB_CUR_MAX > 1)) { 266 /* 267 * This is somewhat tricky: since the character set is 268 * potentially huge, we need to avoid allocating a map 269 * entry for every character. Our strategy is to set the 270 * default mapping to the last character of string #2 271 * (= the one that gets automatically repeated), then to 272 * add back identity mappings for characters that should 273 * remain unchanged. We don't waste space on identity mappings 274 * for non-characters with the -C option; those are simulated 275 * in the I/O loop. 276 */ 277 s2.str = argv[1]; 278 s2.state = NORMAL; 279 for (cnt = 0; cnt < WINT_MAX; cnt++) { 280 if (Cflag && !iswrune(cnt)) 281 continue; 282 if (cmap_lookup(map, cnt) == OOBCH) { 283 if (next(&s2)) { 284 cmap_add(map, cnt, s2.lastch); 285 if (sflag) 286 cset_add(squeeze, s2.lastch); 287 } 288 } else 289 cmap_add(map, cnt, cnt); 290 if ((s2.state == EOS || s2.state == INFINITE) && 291 cnt >= cmap_max(map)) 292 break; 293 } 294 cmap_default(map, s2.lastch); 295 } else if (Cflag) { 296 for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { 297 if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) 298 *p++ = cnt; 299 else 300 cmap_add(map, cnt, cnt); 301 } 302 n = p - carray; 303 if (Cflag && n > 1) 304 (void)mergesort(carray, n, sizeof(*carray), charcoll); 305 306 s2.str = argv[1]; 307 s2.state = NORMAL; 308 for (cnt = 0; cnt < n; cnt++) { 309 (void)next(&s2); 310 cmap_add(map, carray[cnt], s2.lastch); 311 /* 312 * Chars taken from s2 can be different this time 313 * due to lack of complex upper/lower processing, 314 * so fill string2 again to not miss some. 315 */ 316 if (sflag) 317 cset_add(squeeze, s2.lastch); 318 } 319 } 320 321 cset_cache(squeeze); 322 cmap_cache(map); 323 324 if (sflag) 325 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) { 326 if (!Cflag || iswrune(ch)) 327 ch = cmap_lookup(map, ch); 328 if (lastch != ch || !cset_in(squeeze, ch)) { 329 lastch = ch; 330 (void)putwchar(ch); 331 } 332 } 333 else 334 while ((ch = getwchar()) != WEOF) { 335 if (!Cflag || iswrune(ch)) 336 ch = cmap_lookup(map, ch); 337 (void)putwchar(ch); 338 } 339 if (ferror(stdin)) 340 err(1, NULL); 341 exit (0); 342 } 343 344 static struct cset * 345 setup(char *arg, STR *str, int cflag, int Cflag) 346 { 347 struct cset *cs; 348 349 cs = cset_alloc(); 350 if (cs == NULL) 351 err(1, NULL); 352 str->str = arg; 353 while (next(str)) 354 cset_add(cs, str->lastch); 355 if (Cflag) 356 cset_addclass(cs, wctype("rune"), true); 357 if (cflag || Cflag) 358 cset_invert(cs); 359 cset_cache(cs); 360 return (cs); 361 } 362 363 int 364 charcoll(const void *a, const void *b) 365 { 366 static char sa[2], sb[2]; 367 368 sa[0] = *(const int *)a; 369 sb[0] = *(const int *)b; 370 return (strcoll(sa, sb)); 371 } 372 373 static void 374 usage(void) 375 { 376 (void)fprintf(stderr, "%s\n%s\n%s\n%s\n", 377 "usage: tr [-Ccsu] string1 string2", 378 " tr [-Ccu] -d string1", 379 " tr [-Ccu] -s string1", 380 " tr [-Ccu] -ds string1 string2"); 381 exit(1); 382 } 383