1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1988, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 33 #ifndef lint 34 static const char copyright[] = 35 "@(#) Copyright (c) 1988, 1993\n\ 36 The Regents of the University of California. All rights reserved.\n"; 37 #endif 38 39 #ifndef lint 40 static const char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95"; 41 #endif 42 43 #include <sys/types.h> 44 #include <sys/capsicum.h> 45 46 #include <capsicum_helpers.h> 47 #include <ctype.h> 48 #include <err.h> 49 #include <limits.h> 50 #include <locale.h> 51 #include <stdint.h> 52 #include <stdio.h> 53 #include <stdlib.h> 54 #include <string.h> 55 #include <unistd.h> 56 #include <wchar.h> 57 #include <wctype.h> 58 59 #include "cmap.h" 60 #include "cset.h" 61 #include "extern.h" 62 63 static STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 64 static STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 65 66 static struct cset *setup(char *, STR *, int, int); 67 static void usage(void) __dead2; 68 69 int 70 main(int argc, char **argv) 71 { 72 static int carray[NCHARS_SB]; 73 struct cmap *map; 74 struct cset *delete, *squeeze; 75 int n, *p; 76 int Cflag, cflag, dflag, sflag, isstring2; 77 wint_t ch, cnt, lastch; 78 79 (void)setlocale(LC_ALL, ""); 80 81 if (caph_limit_stdio() == -1) 82 err(1, "unable to limit stdio"); 83 84 if (caph_enter() < 0) 85 err(1, "unable to enter capability mode"); 86 87 Cflag = cflag = dflag = sflag = 0; 88 while ((ch = getopt(argc, argv, "Ccdsu")) != -1) 89 switch((char)ch) { 90 case 'C': 91 Cflag = 1; 92 cflag = 0; 93 break; 94 case 'c': 95 cflag = 1; 96 Cflag = 0; 97 break; 98 case 'd': 99 dflag = 1; 100 break; 101 case 's': 102 sflag = 1; 103 break; 104 case 'u': 105 setbuf(stdout, (char *)NULL); 106 break; 107 case '?': 108 default: 109 usage(); 110 } 111 argc -= optind; 112 argv += optind; 113 114 switch(argc) { 115 case 0: 116 default: 117 usage(); 118 /* NOTREACHED */ 119 case 1: 120 isstring2 = 0; 121 break; 122 case 2: 123 isstring2 = 1; 124 break; 125 } 126 127 /* 128 * tr -ds [-Cc] string1 string2 129 * Delete all characters (or complemented characters) in string1. 130 * Squeeze all characters in string2. 131 */ 132 if (dflag && sflag) { 133 if (!isstring2) 134 usage(); 135 136 delete = setup(argv[0], &s1, cflag, Cflag); 137 squeeze = setup(argv[1], &s2, 0, 0); 138 139 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 140 if (!cset_in(delete, ch) && 141 (lastch != ch || !cset_in(squeeze, ch))) { 142 lastch = ch; 143 (void)putwchar(ch); 144 } 145 if (ferror(stdin)) 146 err(1, NULL); 147 exit(0); 148 } 149 150 /* 151 * tr -d [-Cc] string1 152 * Delete all characters (or complemented characters) in string1. 153 */ 154 if (dflag) { 155 if (isstring2) 156 usage(); 157 158 delete = setup(argv[0], &s1, cflag, Cflag); 159 160 while ((ch = getwchar()) != WEOF) 161 if (!cset_in(delete, ch)) 162 (void)putwchar(ch); 163 if (ferror(stdin)) 164 err(1, NULL); 165 exit(0); 166 } 167 168 /* 169 * tr -s [-Cc] string1 170 * Squeeze all characters (or complemented characters) in string1. 171 */ 172 if (sflag && !isstring2) { 173 squeeze = setup(argv[0], &s1, cflag, Cflag); 174 175 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 176 if (lastch != ch || !cset_in(squeeze, ch)) { 177 lastch = ch; 178 (void)putwchar(ch); 179 } 180 if (ferror(stdin)) 181 err(1, NULL); 182 exit(0); 183 } 184 185 /* 186 * tr [-Ccs] string1 string2 187 * Replace all characters (or complemented characters) in string1 with 188 * the character in the same position in string2. If the -s option is 189 * specified, squeeze all the characters in string2. 190 */ 191 if (!isstring2) 192 usage(); 193 194 map = cmap_alloc(); 195 if (map == NULL) 196 err(1, NULL); 197 squeeze = cset_alloc(); 198 if (squeeze == NULL) 199 err(1, NULL); 200 201 s1.str = argv[0]; 202 203 if (Cflag || cflag) { 204 cmap_default(map, OOBCH); 205 if ((s2.str = strdup(argv[1])) == NULL) 206 errx(1, "strdup(argv[1])"); 207 } else 208 s2.str = argv[1]; 209 210 if (!next(&s2)) 211 errx(1, "empty string2"); 212 213 /* 214 * For -s result will contain only those characters defined 215 * as the second characters in each of the toupper or tolower 216 * pairs. 217 */ 218 219 /* If string2 runs out of characters, use the last one specified. */ 220 while (next(&s1)) { 221 again: 222 if (s1.state == CCLASS_LOWER && 223 s2.state == CCLASS_UPPER && 224 s1.cnt == 1 && s2.cnt == 1) { 225 do { 226 ch = towupper(s1.lastch); 227 cmap_add(map, s1.lastch, ch); 228 if (sflag && iswupper(ch)) 229 cset_add(squeeze, ch); 230 if (!next(&s1)) 231 goto endloop; 232 } while (s1.state == CCLASS_LOWER && s1.cnt > 1); 233 /* skip upper set */ 234 do { 235 if (!next(&s2)) 236 break; 237 } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 238 goto again; 239 } else if (s1.state == CCLASS_UPPER && 240 s2.state == CCLASS_LOWER && 241 s1.cnt == 1 && s2.cnt == 1) { 242 do { 243 ch = towlower(s1.lastch); 244 cmap_add(map, s1.lastch, ch); 245 if (sflag && iswlower(ch)) 246 cset_add(squeeze, ch); 247 if (!next(&s1)) 248 goto endloop; 249 } while (s1.state == CCLASS_UPPER && s1.cnt > 1); 250 /* skip lower set */ 251 do { 252 if (!next(&s2)) 253 break; 254 } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 255 goto again; 256 } else { 257 cmap_add(map, s1.lastch, s2.lastch); 258 if (sflag) 259 cset_add(squeeze, s2.lastch); 260 } 261 (void)next(&s2); 262 } 263 endloop: 264 if (cflag || (Cflag && MB_CUR_MAX > 1)) { 265 /* 266 * This is somewhat tricky: since the character set is 267 * potentially huge, we need to avoid allocating a map 268 * entry for every character. Our strategy is to set the 269 * default mapping to the last character of string #2 270 * (= the one that gets automatically repeated), then to 271 * add back identity mappings for characters that should 272 * remain unchanged. We don't waste space on identity mappings 273 * for non-characters with the -C option; those are simulated 274 * in the I/O loop. 275 */ 276 s2.str = argv[1]; 277 s2.state = NORMAL; 278 for (cnt = 0; cnt < WINT_MAX; cnt++) { 279 if (Cflag && !iswrune(cnt)) 280 continue; 281 if (cmap_lookup(map, cnt) == OOBCH) { 282 if (next(&s2)) { 283 cmap_add(map, cnt, s2.lastch); 284 if (sflag) 285 cset_add(squeeze, s2.lastch); 286 } 287 } else 288 cmap_add(map, cnt, cnt); 289 if ((s2.state == EOS || s2.state == INFINITE) && 290 cnt >= cmap_max(map)) 291 break; 292 } 293 cmap_default(map, s2.lastch); 294 } else if (Cflag) { 295 for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { 296 if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) 297 *p++ = cnt; 298 else 299 cmap_add(map, cnt, cnt); 300 } 301 n = p - carray; 302 if (Cflag && n > 1) 303 (void)mergesort(carray, n, sizeof(*carray), charcoll); 304 305 s2.str = argv[1]; 306 s2.state = NORMAL; 307 for (cnt = 0; cnt < n; cnt++) { 308 (void)next(&s2); 309 cmap_add(map, carray[cnt], s2.lastch); 310 /* 311 * Chars taken from s2 can be different this time 312 * due to lack of complex upper/lower processing, 313 * so fill string2 again to not miss some. 314 */ 315 if (sflag) 316 cset_add(squeeze, s2.lastch); 317 } 318 } 319 320 cset_cache(squeeze); 321 cmap_cache(map); 322 323 if (sflag) 324 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) { 325 if (!Cflag || iswrune(ch)) 326 ch = cmap_lookup(map, ch); 327 if (lastch != ch || !cset_in(squeeze, ch)) { 328 lastch = ch; 329 (void)putwchar(ch); 330 } 331 } 332 else 333 while ((ch = getwchar()) != WEOF) { 334 if (!Cflag || iswrune(ch)) 335 ch = cmap_lookup(map, ch); 336 (void)putwchar(ch); 337 } 338 if (ferror(stdin)) 339 err(1, NULL); 340 exit (0); 341 } 342 343 static struct cset * 344 setup(char *arg, STR *str, int cflag, int Cflag) 345 { 346 struct cset *cs; 347 348 cs = cset_alloc(); 349 if (cs == NULL) 350 err(1, NULL); 351 str->str = arg; 352 while (next(str)) 353 cset_add(cs, str->lastch); 354 if (Cflag) 355 cset_addclass(cs, wctype("rune"), true); 356 if (cflag || Cflag) 357 cset_invert(cs); 358 cset_cache(cs); 359 return (cs); 360 } 361 362 int 363 charcoll(const void *a, const void *b) 364 { 365 static char sa[2], sb[2]; 366 367 sa[0] = *(const int *)a; 368 sb[0] = *(const int *)b; 369 return (strcoll(sa, sb)); 370 } 371 372 static void 373 usage(void) 374 { 375 (void)fprintf(stderr, "%s\n%s\n%s\n%s\n", 376 "usage: tr [-Ccsu] string1 string2", 377 " tr [-Ccu] -d string1", 378 " tr [-Ccu] -s string1", 379 " tr [-Ccu] -ds string1 string2"); 380 exit(1); 381 } 382