1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1988, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/types.h> 33 #include <sys/capsicum.h> 34 35 #include <capsicum_helpers.h> 36 #include <ctype.h> 37 #include <err.h> 38 #include <limits.h> 39 #include <locale.h> 40 #include <stdint.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <unistd.h> 45 #include <wchar.h> 46 #include <wctype.h> 47 48 #include "cmap.h" 49 #include "cset.h" 50 #include "extern.h" 51 52 static STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 53 static STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 54 55 static struct cset *setup(char *, STR *, int, int); 56 static void usage(void) __dead2; 57 58 int 59 main(int argc, char **argv) 60 { 61 static int carray[NCHARS_SB]; 62 struct cmap *map; 63 struct cset *delete, *squeeze; 64 int n, *p; 65 int Cflag, cflag, dflag, sflag, isstring2; 66 wint_t ch, cnt, lastch; 67 68 (void)setlocale(LC_ALL, ""); 69 70 if (caph_limit_stdio() == -1) 71 err(1, "unable to limit stdio"); 72 73 if (caph_enter() < 0) 74 err(1, "unable to enter capability mode"); 75 76 Cflag = cflag = dflag = sflag = 0; 77 while ((ch = getopt(argc, argv, "Ccdsu")) != -1) 78 switch((char)ch) { 79 case 'C': 80 Cflag = 1; 81 cflag = 0; 82 break; 83 case 'c': 84 cflag = 1; 85 Cflag = 0; 86 break; 87 case 'd': 88 dflag = 1; 89 break; 90 case 's': 91 sflag = 1; 92 break; 93 case 'u': 94 setbuf(stdout, (char *)NULL); 95 break; 96 case '?': 97 default: 98 usage(); 99 } 100 argc -= optind; 101 argv += optind; 102 103 switch(argc) { 104 case 0: 105 default: 106 usage(); 107 /* NOTREACHED */ 108 case 1: 109 isstring2 = 0; 110 break; 111 case 2: 112 isstring2 = 1; 113 break; 114 } 115 116 /* 117 * tr -ds [-Cc] string1 string2 118 * Delete all characters (or complemented characters) in string1. 119 * Squeeze all characters in string2. 120 */ 121 if (dflag && sflag) { 122 if (!isstring2) 123 usage(); 124 125 delete = setup(argv[0], &s1, cflag, Cflag); 126 squeeze = setup(argv[1], &s2, 0, 0); 127 128 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 129 if (!cset_in(delete, ch) && 130 (lastch != ch || !cset_in(squeeze, ch))) { 131 lastch = ch; 132 (void)putwchar(ch); 133 } 134 if (ferror(stdin)) 135 err(1, NULL); 136 exit(0); 137 } 138 139 /* 140 * tr -d [-Cc] string1 141 * Delete all characters (or complemented characters) in string1. 142 */ 143 if (dflag) { 144 if (isstring2) 145 usage(); 146 147 delete = setup(argv[0], &s1, cflag, Cflag); 148 149 while ((ch = getwchar()) != WEOF) 150 if (!cset_in(delete, ch)) 151 (void)putwchar(ch); 152 if (ferror(stdin)) 153 err(1, NULL); 154 exit(0); 155 } 156 157 /* 158 * tr -s [-Cc] string1 159 * Squeeze all characters (or complemented characters) in string1. 160 */ 161 if (sflag && !isstring2) { 162 squeeze = setup(argv[0], &s1, cflag, Cflag); 163 164 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 165 if (lastch != ch || !cset_in(squeeze, ch)) { 166 lastch = ch; 167 (void)putwchar(ch); 168 } 169 if (ferror(stdin)) 170 err(1, NULL); 171 exit(0); 172 } 173 174 /* 175 * tr [-Ccs] string1 string2 176 * Replace all characters (or complemented characters) in string1 with 177 * the character in the same position in string2. If the -s option is 178 * specified, squeeze all the characters in string2. 179 */ 180 if (!isstring2) 181 usage(); 182 183 map = cmap_alloc(); 184 if (map == NULL) 185 err(1, NULL); 186 squeeze = cset_alloc(); 187 if (squeeze == NULL) 188 err(1, NULL); 189 190 s1.str = argv[0]; 191 192 if (Cflag || cflag) { 193 cmap_default(map, OOBCH); 194 if ((s2.str = strdup(argv[1])) == NULL) 195 errx(1, "strdup(argv[1])"); 196 } else 197 s2.str = argv[1]; 198 199 if (!next(&s2)) 200 errx(1, "empty string2"); 201 202 /* 203 * For -s result will contain only those characters defined 204 * as the second characters in each of the toupper or tolower 205 * pairs. 206 */ 207 208 /* If string2 runs out of characters, use the last one specified. */ 209 while (next(&s1)) { 210 again: 211 if (s1.state == CCLASS_LOWER && 212 s2.state == CCLASS_UPPER && 213 s1.cnt == 1 && s2.cnt == 1) { 214 do { 215 ch = towupper(s1.lastch); 216 cmap_add(map, s1.lastch, ch); 217 if (sflag && iswupper(ch)) 218 cset_add(squeeze, ch); 219 if (!next(&s1)) 220 goto endloop; 221 } while (s1.state == CCLASS_LOWER && s1.cnt > 1); 222 /* skip upper set */ 223 do { 224 if (!next(&s2)) 225 break; 226 } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 227 goto again; 228 } else if (s1.state == CCLASS_UPPER && 229 s2.state == CCLASS_LOWER && 230 s1.cnt == 1 && s2.cnt == 1) { 231 do { 232 ch = towlower(s1.lastch); 233 cmap_add(map, s1.lastch, ch); 234 if (sflag && iswlower(ch)) 235 cset_add(squeeze, ch); 236 if (!next(&s1)) 237 goto endloop; 238 } while (s1.state == CCLASS_UPPER && s1.cnt > 1); 239 /* skip lower set */ 240 do { 241 if (!next(&s2)) 242 break; 243 } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 244 goto again; 245 } else if (s1.state == CCLASS && 246 s2.state == CCLASS_UPPER && 247 s1.cnt == 1 && s2.cnt == 1) { 248 do { 249 ch = towupper(s1.lastch); 250 cmap_add(map, s1.lastch, ch); 251 if (sflag && iswupper(ch)) 252 cset_add(squeeze, ch); 253 if (!next(&s1)) 254 goto endloop; 255 } while (s1.state == CCLASS && s1.cnt > 1); 256 /* skip upper set */ 257 do { 258 if (!next(&s2)) 259 break; 260 } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 261 goto again; 262 } else if (s1.state == CCLASS && 263 s2.state == CCLASS_LOWER && 264 s1.cnt == 1 && s2.cnt == 1) { 265 do { 266 ch = towlower(s1.lastch); 267 cmap_add(map, s1.lastch, ch); 268 if (sflag && iswlower(ch)) 269 cset_add(squeeze, ch); 270 if (!next(&s1)) 271 goto endloop; 272 } while (s1.state == CCLASS && s1.cnt > 1); 273 /* skip lower set */ 274 do { 275 if (!next(&s2)) 276 break; 277 } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 278 goto again; 279 } else { 280 cmap_add(map, s1.lastch, s2.lastch); 281 if (sflag) 282 cset_add(squeeze, s2.lastch); 283 } 284 (void)next(&s2); 285 } 286 endloop: 287 if (cflag || (Cflag && MB_CUR_MAX > 1)) { 288 /* 289 * This is somewhat tricky: since the character set is 290 * potentially huge, we need to avoid allocating a map 291 * entry for every character. Our strategy is to set the 292 * default mapping to the last character of string #2 293 * (= the one that gets automatically repeated), then to 294 * add back identity mappings for characters that should 295 * remain unchanged. We don't waste space on identity mappings 296 * for non-characters with the -C option; those are simulated 297 * in the I/O loop. 298 */ 299 s2.str = argv[1]; 300 s2.state = NORMAL; 301 for (cnt = 0; cnt < WINT_MAX; cnt++) { 302 if (Cflag && !iswrune(cnt)) 303 continue; 304 if (cmap_lookup(map, cnt) == OOBCH) { 305 if (next(&s2)) { 306 cmap_add(map, cnt, s2.lastch); 307 if (sflag) 308 cset_add(squeeze, s2.lastch); 309 } 310 } else 311 cmap_add(map, cnt, cnt); 312 if ((s2.state == EOS || s2.state == INFINITE) && 313 cnt >= cmap_max(map)) 314 break; 315 } 316 cmap_default(map, s2.lastch); 317 } else if (Cflag) { 318 for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { 319 if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) 320 *p++ = cnt; 321 else 322 cmap_add(map, cnt, cnt); 323 } 324 n = p - carray; 325 if (Cflag && n > 1) 326 (void)mergesort(carray, n, sizeof(*carray), charcoll); 327 328 s2.str = argv[1]; 329 s2.state = NORMAL; 330 for (cnt = 0; cnt < n; cnt++) { 331 (void)next(&s2); 332 cmap_add(map, carray[cnt], s2.lastch); 333 /* 334 * Chars taken from s2 can be different this time 335 * due to lack of complex upper/lower processing, 336 * so fill string2 again to not miss some. 337 */ 338 if (sflag) 339 cset_add(squeeze, s2.lastch); 340 } 341 } 342 343 cset_cache(squeeze); 344 cmap_cache(map); 345 346 if (sflag) 347 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) { 348 if (!Cflag || iswrune(ch)) 349 ch = cmap_lookup(map, ch); 350 if (lastch != ch || !cset_in(squeeze, ch)) { 351 lastch = ch; 352 (void)putwchar(ch); 353 } 354 } 355 else 356 while ((ch = getwchar()) != WEOF) { 357 if (!Cflag || iswrune(ch)) 358 ch = cmap_lookup(map, ch); 359 (void)putwchar(ch); 360 } 361 if (ferror(stdin)) 362 err(1, NULL); 363 exit (0); 364 } 365 366 static struct cset * 367 setup(char *arg, STR *str, int cflag, int Cflag) 368 { 369 struct cset *cs; 370 371 cs = cset_alloc(); 372 if (cs == NULL) 373 err(1, NULL); 374 str->str = arg; 375 while (next(str)) 376 cset_add(cs, str->lastch); 377 if (Cflag) 378 cset_addclass(cs, wctype("rune"), true); 379 if (cflag || Cflag) 380 cset_invert(cs); 381 cset_cache(cs); 382 return (cs); 383 } 384 385 int 386 charcoll(const void *a, const void *b) 387 { 388 static char sa[2], sb[2]; 389 390 sa[0] = *(const int *)a; 391 sb[0] = *(const int *)b; 392 return (strcoll(sa, sb)); 393 } 394 395 static void 396 usage(void) 397 { 398 (void)fprintf(stderr, "%s\n%s\n%s\n%s\n", 399 "usage: tr [-Ccsu] string1 string2", 400 " tr [-Ccu] -d string1", 401 " tr [-Ccu] -s string1", 402 " tr [-Ccu] -ds string1 string2"); 403 exit(1); 404 } 405