1 /* 2 * Copyright (c) 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 32 __FBSDID("$FreeBSD$"); 33 34 #ifndef lint 35 static const char copyright[] = 36 "@(#) Copyright (c) 1988, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38 #endif 39 40 #ifndef lint 41 static const char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95"; 42 #endif 43 44 #include <sys/types.h> 45 #include <sys/capsicum.h> 46 47 #include <ctype.h> 48 #include <err.h> 49 #include <errno.h> 50 #include <limits.h> 51 #include <locale.h> 52 #include <stdint.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 #include <termios.h> 57 #include <unistd.h> 58 #include <wchar.h> 59 #include <wctype.h> 60 61 #include "cmap.h" 62 #include "cset.h" 63 #include "extern.h" 64 65 static STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 66 static STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 67 68 static struct cset *setup(char *, STR *, int, int); 69 static void usage(void); 70 71 int 72 main(int argc, char **argv) 73 { 74 static int carray[NCHARS_SB]; 75 cap_rights_t rights; 76 unsigned long cmd; 77 struct cmap *map; 78 struct cset *delete, *squeeze; 79 int n, *p; 80 int Cflag, cflag, dflag, sflag, isstring2; 81 wint_t ch, cnt, lastch; 82 83 (void)setlocale(LC_ALL, ""); 84 85 cap_rights_init(&rights, CAP_FSTAT, CAP_IOCTL, CAP_READ); 86 if (cap_rights_limit(STDIN_FILENO, &rights) < 0 && errno != ENOSYS) 87 err(1, "unable to limit rights for stdin"); 88 cap_rights_init(&rights, CAP_FSTAT, CAP_IOCTL, CAP_WRITE); 89 if (cap_rights_limit(STDOUT_FILENO, &rights) < 0 && errno != ENOSYS) 90 err(1, "unable to limit rights for stdout"); 91 if (cap_rights_limit(STDERR_FILENO, &rights) < 0 && errno != ENOSYS) 92 err(1, "unable to limit rights for stderr"); 93 94 /* Required for isatty(3). */ 95 cmd = TIOCGETA; 96 if (cap_ioctls_limit(STDIN_FILENO, &cmd, 1) < 0 && errno != ENOSYS) 97 err(1, "unable to limit ioctls for stdin"); 98 if (cap_ioctls_limit(STDOUT_FILENO, &cmd, 1) < 0 && errno != ENOSYS) 99 err(1, "unable to limit ioctls for stdout"); 100 if (cap_ioctls_limit(STDERR_FILENO, &cmd, 1) < 0 && errno != ENOSYS) 101 err(1, "unable to limit ioctls for stderr"); 102 103 if (cap_enter() < 0 && errno != ENOSYS) 104 err(1, "unable to enter capability mode"); 105 106 Cflag = cflag = dflag = sflag = 0; 107 while ((ch = getopt(argc, argv, "Ccdsu")) != -1) 108 switch((char)ch) { 109 case 'C': 110 Cflag = 1; 111 cflag = 0; 112 break; 113 case 'c': 114 cflag = 1; 115 Cflag = 0; 116 break; 117 case 'd': 118 dflag = 1; 119 break; 120 case 's': 121 sflag = 1; 122 break; 123 case 'u': 124 setbuf(stdout, (char *)NULL); 125 break; 126 case '?': 127 default: 128 usage(); 129 } 130 argc -= optind; 131 argv += optind; 132 133 switch(argc) { 134 case 0: 135 default: 136 usage(); 137 /* NOTREACHED */ 138 case 1: 139 isstring2 = 0; 140 break; 141 case 2: 142 isstring2 = 1; 143 break; 144 } 145 146 /* 147 * tr -ds [-Cc] string1 string2 148 * Delete all characters (or complemented characters) in string1. 149 * Squeeze all characters in string2. 150 */ 151 if (dflag && sflag) { 152 if (!isstring2) 153 usage(); 154 155 delete = setup(argv[0], &s1, cflag, Cflag); 156 squeeze = setup(argv[1], &s2, 0, 0); 157 158 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 159 if (!cset_in(delete, ch) && 160 (lastch != ch || !cset_in(squeeze, ch))) { 161 lastch = ch; 162 (void)putwchar(ch); 163 } 164 if (ferror(stdin)) 165 err(1, NULL); 166 exit(0); 167 } 168 169 /* 170 * tr -d [-Cc] string1 171 * Delete all characters (or complemented characters) in string1. 172 */ 173 if (dflag) { 174 if (isstring2) 175 usage(); 176 177 delete = setup(argv[0], &s1, cflag, Cflag); 178 179 while ((ch = getwchar()) != WEOF) 180 if (!cset_in(delete, ch)) 181 (void)putwchar(ch); 182 if (ferror(stdin)) 183 err(1, NULL); 184 exit(0); 185 } 186 187 /* 188 * tr -s [-Cc] string1 189 * Squeeze all characters (or complemented characters) in string1. 190 */ 191 if (sflag && !isstring2) { 192 squeeze = setup(argv[0], &s1, cflag, Cflag); 193 194 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 195 if (lastch != ch || !cset_in(squeeze, ch)) { 196 lastch = ch; 197 (void)putwchar(ch); 198 } 199 if (ferror(stdin)) 200 err(1, NULL); 201 exit(0); 202 } 203 204 /* 205 * tr [-Ccs] string1 string2 206 * Replace all characters (or complemented characters) in string1 with 207 * the character in the same position in string2. If the -s option is 208 * specified, squeeze all the characters in string2. 209 */ 210 if (!isstring2) 211 usage(); 212 213 map = cmap_alloc(); 214 if (map == NULL) 215 err(1, NULL); 216 squeeze = cset_alloc(); 217 if (squeeze == NULL) 218 err(1, NULL); 219 220 s1.str = argv[0]; 221 222 if (Cflag || cflag) { 223 cmap_default(map, OOBCH); 224 if ((s2.str = strdup(argv[1])) == NULL) 225 errx(1, "strdup(argv[1])"); 226 } else 227 s2.str = argv[1]; 228 229 if (!next(&s2)) 230 errx(1, "empty string2"); 231 232 /* 233 * For -s result will contain only those characters defined 234 * as the second characters in each of the toupper or tolower 235 * pairs. 236 */ 237 238 /* If string2 runs out of characters, use the last one specified. */ 239 while (next(&s1)) { 240 again: 241 if (s1.state == CCLASS_LOWER && 242 s2.state == CCLASS_UPPER && 243 s1.cnt == 1 && s2.cnt == 1) { 244 do { 245 ch = towupper(s1.lastch); 246 cmap_add(map, s1.lastch, ch); 247 if (sflag && iswupper(ch)) 248 cset_add(squeeze, ch); 249 if (!next(&s1)) 250 goto endloop; 251 } while (s1.state == CCLASS_LOWER && s1.cnt > 1); 252 /* skip upper set */ 253 do { 254 if (!next(&s2)) 255 break; 256 } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 257 goto again; 258 } else if (s1.state == CCLASS_UPPER && 259 s2.state == CCLASS_LOWER && 260 s1.cnt == 1 && s2.cnt == 1) { 261 do { 262 ch = towlower(s1.lastch); 263 cmap_add(map, s1.lastch, ch); 264 if (sflag && iswlower(ch)) 265 cset_add(squeeze, ch); 266 if (!next(&s1)) 267 goto endloop; 268 } while (s1.state == CCLASS_UPPER && s1.cnt > 1); 269 /* skip lower set */ 270 do { 271 if (!next(&s2)) 272 break; 273 } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 274 goto again; 275 } else { 276 cmap_add(map, s1.lastch, s2.lastch); 277 if (sflag) 278 cset_add(squeeze, s2.lastch); 279 } 280 (void)next(&s2); 281 } 282 endloop: 283 if (cflag || (Cflag && MB_CUR_MAX > 1)) { 284 /* 285 * This is somewhat tricky: since the character set is 286 * potentially huge, we need to avoid allocating a map 287 * entry for every character. Our strategy is to set the 288 * default mapping to the last character of string #2 289 * (= the one that gets automatically repeated), then to 290 * add back identity mappings for characters that should 291 * remain unchanged. We don't waste space on identity mappings 292 * for non-characters with the -C option; those are simulated 293 * in the I/O loop. 294 */ 295 s2.str = argv[1]; 296 s2.state = NORMAL; 297 for (cnt = 0; cnt < WINT_MAX; cnt++) { 298 if (Cflag && !iswrune(cnt)) 299 continue; 300 if (cmap_lookup(map, cnt) == OOBCH) { 301 if (next(&s2)) { 302 cmap_add(map, cnt, s2.lastch); 303 if (sflag) 304 cset_add(squeeze, s2.lastch); 305 } 306 } else 307 cmap_add(map, cnt, cnt); 308 if ((s2.state == EOS || s2.state == INFINITE) && 309 cnt >= cmap_max(map)) 310 break; 311 } 312 cmap_default(map, s2.lastch); 313 } else if (Cflag) { 314 for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { 315 if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) 316 *p++ = cnt; 317 else 318 cmap_add(map, cnt, cnt); 319 } 320 n = p - carray; 321 if (Cflag && n > 1) 322 (void)mergesort(carray, n, sizeof(*carray), charcoll); 323 324 s2.str = argv[1]; 325 s2.state = NORMAL; 326 for (cnt = 0; cnt < n; cnt++) { 327 (void)next(&s2); 328 cmap_add(map, carray[cnt], s2.lastch); 329 /* 330 * Chars taken from s2 can be different this time 331 * due to lack of complex upper/lower processing, 332 * so fill string2 again to not miss some. 333 */ 334 if (sflag) 335 cset_add(squeeze, s2.lastch); 336 } 337 } 338 339 cset_cache(squeeze); 340 cmap_cache(map); 341 342 if (sflag) 343 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) { 344 if (!Cflag || iswrune(ch)) 345 ch = cmap_lookup(map, ch); 346 if (lastch != ch || !cset_in(squeeze, ch)) { 347 lastch = ch; 348 (void)putwchar(ch); 349 } 350 } 351 else 352 while ((ch = getwchar()) != WEOF) { 353 if (!Cflag || iswrune(ch)) 354 ch = cmap_lookup(map, ch); 355 (void)putwchar(ch); 356 } 357 if (ferror(stdin)) 358 err(1, NULL); 359 exit (0); 360 } 361 362 static struct cset * 363 setup(char *arg, STR *str, int cflag, int Cflag) 364 { 365 struct cset *cs; 366 367 cs = cset_alloc(); 368 if (cs == NULL) 369 err(1, NULL); 370 str->str = arg; 371 while (next(str)) 372 cset_add(cs, str->lastch); 373 if (Cflag) 374 cset_addclass(cs, wctype("rune"), true); 375 if (cflag || Cflag) 376 cset_invert(cs); 377 cset_cache(cs); 378 return (cs); 379 } 380 381 int 382 charcoll(const void *a, const void *b) 383 { 384 static char sa[2], sb[2]; 385 386 sa[0] = *(const int *)a; 387 sb[0] = *(const int *)b; 388 return (strcoll(sa, sb)); 389 } 390 391 static void 392 usage(void) 393 { 394 (void)fprintf(stderr, "%s\n%s\n%s\n%s\n", 395 "usage: tr [-Ccsu] string1 string2", 396 " tr [-Ccu] -d string1", 397 " tr [-Ccu] -s string1", 398 " tr [-Ccu] -ds string1 string2"); 399 exit(1); 400 } 401