1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1988, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 34 __FBSDID("$FreeBSD$"); 35 36 #ifndef lint 37 static const char copyright[] = 38 "@(#) Copyright (c) 1988, 1993\n\ 39 The Regents of the University of California. All rights reserved.\n"; 40 #endif 41 42 #ifndef lint 43 static const char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95"; 44 #endif 45 46 #include <sys/types.h> 47 #include <sys/capsicum.h> 48 49 #include <capsicum_helpers.h> 50 #include <ctype.h> 51 #include <err.h> 52 #include <limits.h> 53 #include <locale.h> 54 #include <stdint.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <unistd.h> 59 #include <wchar.h> 60 #include <wctype.h> 61 62 #include "cmap.h" 63 #include "cset.h" 64 #include "extern.h" 65 66 static STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 67 static STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 68 69 static struct cset *setup(char *, STR *, int, int); 70 static void usage(void) __dead2; 71 72 int 73 main(int argc, char **argv) 74 { 75 static int carray[NCHARS_SB]; 76 struct cmap *map; 77 struct cset *delete, *squeeze; 78 int n, *p; 79 int Cflag, cflag, dflag, sflag, isstring2; 80 wint_t ch, cnt, lastch; 81 82 (void)setlocale(LC_ALL, ""); 83 84 if (caph_limit_stdio() == -1) 85 err(1, "unable to limit stdio"); 86 87 if (caph_enter() < 0) 88 err(1, "unable to enter capability mode"); 89 90 Cflag = cflag = dflag = sflag = 0; 91 while ((ch = getopt(argc, argv, "Ccdsu")) != -1) 92 switch((char)ch) { 93 case 'C': 94 Cflag = 1; 95 cflag = 0; 96 break; 97 case 'c': 98 cflag = 1; 99 Cflag = 0; 100 break; 101 case 'd': 102 dflag = 1; 103 break; 104 case 's': 105 sflag = 1; 106 break; 107 case 'u': 108 setbuf(stdout, (char *)NULL); 109 break; 110 case '?': 111 default: 112 usage(); 113 } 114 argc -= optind; 115 argv += optind; 116 117 switch(argc) { 118 case 0: 119 default: 120 usage(); 121 /* NOTREACHED */ 122 case 1: 123 isstring2 = 0; 124 break; 125 case 2: 126 isstring2 = 1; 127 break; 128 } 129 130 /* 131 * tr -ds [-Cc] string1 string2 132 * Delete all characters (or complemented characters) in string1. 133 * Squeeze all characters in string2. 134 */ 135 if (dflag && sflag) { 136 if (!isstring2) 137 usage(); 138 139 delete = setup(argv[0], &s1, cflag, Cflag); 140 squeeze = setup(argv[1], &s2, 0, 0); 141 142 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 143 if (!cset_in(delete, ch) && 144 (lastch != ch || !cset_in(squeeze, ch))) { 145 lastch = ch; 146 (void)putwchar(ch); 147 } 148 if (ferror(stdin)) 149 err(1, NULL); 150 exit(0); 151 } 152 153 /* 154 * tr -d [-Cc] string1 155 * Delete all characters (or complemented characters) in string1. 156 */ 157 if (dflag) { 158 if (isstring2) 159 usage(); 160 161 delete = setup(argv[0], &s1, cflag, Cflag); 162 163 while ((ch = getwchar()) != WEOF) 164 if (!cset_in(delete, ch)) 165 (void)putwchar(ch); 166 if (ferror(stdin)) 167 err(1, NULL); 168 exit(0); 169 } 170 171 /* 172 * tr -s [-Cc] string1 173 * Squeeze all characters (or complemented characters) in string1. 174 */ 175 if (sflag && !isstring2) { 176 squeeze = setup(argv[0], &s1, cflag, Cflag); 177 178 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 179 if (lastch != ch || !cset_in(squeeze, ch)) { 180 lastch = ch; 181 (void)putwchar(ch); 182 } 183 if (ferror(stdin)) 184 err(1, NULL); 185 exit(0); 186 } 187 188 /* 189 * tr [-Ccs] string1 string2 190 * Replace all characters (or complemented characters) in string1 with 191 * the character in the same position in string2. If the -s option is 192 * specified, squeeze all the characters in string2. 193 */ 194 if (!isstring2) 195 usage(); 196 197 map = cmap_alloc(); 198 if (map == NULL) 199 err(1, NULL); 200 squeeze = cset_alloc(); 201 if (squeeze == NULL) 202 err(1, NULL); 203 204 s1.str = argv[0]; 205 206 if (Cflag || cflag) { 207 cmap_default(map, OOBCH); 208 if ((s2.str = strdup(argv[1])) == NULL) 209 errx(1, "strdup(argv[1])"); 210 } else 211 s2.str = argv[1]; 212 213 if (!next(&s2)) 214 errx(1, "empty string2"); 215 216 /* 217 * For -s result will contain only those characters defined 218 * as the second characters in each of the toupper or tolower 219 * pairs. 220 */ 221 222 /* If string2 runs out of characters, use the last one specified. */ 223 while (next(&s1)) { 224 again: 225 if (s1.state == CCLASS_LOWER && 226 s2.state == CCLASS_UPPER && 227 s1.cnt == 1 && s2.cnt == 1) { 228 do { 229 ch = towupper(s1.lastch); 230 cmap_add(map, s1.lastch, ch); 231 if (sflag && iswupper(ch)) 232 cset_add(squeeze, ch); 233 if (!next(&s1)) 234 goto endloop; 235 } while (s1.state == CCLASS_LOWER && s1.cnt > 1); 236 /* skip upper set */ 237 do { 238 if (!next(&s2)) 239 break; 240 } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 241 goto again; 242 } else if (s1.state == CCLASS_UPPER && 243 s2.state == CCLASS_LOWER && 244 s1.cnt == 1 && s2.cnt == 1) { 245 do { 246 ch = towlower(s1.lastch); 247 cmap_add(map, s1.lastch, ch); 248 if (sflag && iswlower(ch)) 249 cset_add(squeeze, ch); 250 if (!next(&s1)) 251 goto endloop; 252 } while (s1.state == CCLASS_UPPER && s1.cnt > 1); 253 /* skip lower set */ 254 do { 255 if (!next(&s2)) 256 break; 257 } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 258 goto again; 259 } else { 260 cmap_add(map, s1.lastch, s2.lastch); 261 if (sflag) 262 cset_add(squeeze, s2.lastch); 263 } 264 (void)next(&s2); 265 } 266 endloop: 267 if (cflag || (Cflag && MB_CUR_MAX > 1)) { 268 /* 269 * This is somewhat tricky: since the character set is 270 * potentially huge, we need to avoid allocating a map 271 * entry for every character. Our strategy is to set the 272 * default mapping to the last character of string #2 273 * (= the one that gets automatically repeated), then to 274 * add back identity mappings for characters that should 275 * remain unchanged. We don't waste space on identity mappings 276 * for non-characters with the -C option; those are simulated 277 * in the I/O loop. 278 */ 279 s2.str = argv[1]; 280 s2.state = NORMAL; 281 for (cnt = 0; cnt < WINT_MAX; cnt++) { 282 if (Cflag && !iswrune(cnt)) 283 continue; 284 if (cmap_lookup(map, cnt) == OOBCH) { 285 if (next(&s2)) { 286 cmap_add(map, cnt, s2.lastch); 287 if (sflag) 288 cset_add(squeeze, s2.lastch); 289 } 290 } else 291 cmap_add(map, cnt, cnt); 292 if ((s2.state == EOS || s2.state == INFINITE) && 293 cnt >= cmap_max(map)) 294 break; 295 } 296 cmap_default(map, s2.lastch); 297 } else if (Cflag) { 298 for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { 299 if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) 300 *p++ = cnt; 301 else 302 cmap_add(map, cnt, cnt); 303 } 304 n = p - carray; 305 if (Cflag && n > 1) 306 (void)mergesort(carray, n, sizeof(*carray), charcoll); 307 308 s2.str = argv[1]; 309 s2.state = NORMAL; 310 for (cnt = 0; cnt < n; cnt++) { 311 (void)next(&s2); 312 cmap_add(map, carray[cnt], s2.lastch); 313 /* 314 * Chars taken from s2 can be different this time 315 * due to lack of complex upper/lower processing, 316 * so fill string2 again to not miss some. 317 */ 318 if (sflag) 319 cset_add(squeeze, s2.lastch); 320 } 321 } 322 323 cset_cache(squeeze); 324 cmap_cache(map); 325 326 if (sflag) 327 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) { 328 if (!Cflag || iswrune(ch)) 329 ch = cmap_lookup(map, ch); 330 if (lastch != ch || !cset_in(squeeze, ch)) { 331 lastch = ch; 332 (void)putwchar(ch); 333 } 334 } 335 else 336 while ((ch = getwchar()) != WEOF) { 337 if (!Cflag || iswrune(ch)) 338 ch = cmap_lookup(map, ch); 339 (void)putwchar(ch); 340 } 341 if (ferror(stdin)) 342 err(1, NULL); 343 exit (0); 344 } 345 346 static struct cset * 347 setup(char *arg, STR *str, int cflag, int Cflag) 348 { 349 struct cset *cs; 350 351 cs = cset_alloc(); 352 if (cs == NULL) 353 err(1, NULL); 354 str->str = arg; 355 while (next(str)) 356 cset_add(cs, str->lastch); 357 if (Cflag) 358 cset_addclass(cs, wctype("rune"), true); 359 if (cflag || Cflag) 360 cset_invert(cs); 361 cset_cache(cs); 362 return (cs); 363 } 364 365 int 366 charcoll(const void *a, const void *b) 367 { 368 static char sa[2], sb[2]; 369 370 sa[0] = *(const int *)a; 371 sb[0] = *(const int *)b; 372 return (strcoll(sa, sb)); 373 } 374 375 static void 376 usage(void) 377 { 378 (void)fprintf(stderr, "%s\n%s\n%s\n%s\n", 379 "usage: tr [-Ccsu] string1 string2", 380 " tr [-Ccu] -d string1", 381 " tr [-Ccu] -s string1", 382 " tr [-Ccu] -ds string1 string2"); 383 exit(1); 384 } 385