1 /* 2 * Copyright (c) 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #include <sys/types.h> 35 36 #include <ctype.h> 37 #include <err.h> 38 #include <limits.h> 39 #include <locale.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <unistd.h> 44 #include <wchar.h> 45 #include <wctype.h> 46 47 #include "cmap.h" 48 #include "cset.h" 49 #include "extern.h" 50 51 STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 52 STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 53 54 static struct cset *setup(char *, STR *, int, int); 55 static void usage(void) __NORETURN; 56 57 static wint_t 58 cmap_lookup(struct cmap *cm, wint_t from) 59 { 60 61 if (from < CM_CACHE_SIZE && cm->cm_havecache) 62 return (cm->cm_cache[from]); 63 return (cmap_lookup_hard(cm, from)); 64 } 65 66 static wint_t 67 cmap_max(struct cmap *cm) 68 { 69 return (cm->cm_max); 70 } 71 72 static inline bool 73 cset_in(struct cset *cs, wchar_t ch) 74 { 75 76 if (ch < CS_CACHE_SIZE && cs->cs_havecache) 77 return (cs->cs_cache[ch]); 78 return (cset_in_hard(cs, ch)); 79 } 80 81 int 82 main(int argc, char **argv) 83 { 84 static int carray[NCHARS_SB]; 85 struct cmap *map; 86 struct cset *delete, *squeeze; 87 int n, *p; 88 int Cflag, cflag, dflag, sflag, isstring2; 89 wint_t ch, cnt, lastch; 90 int c; 91 92 (void) setlocale(LC_ALL, ""); 93 94 Cflag = cflag = dflag = sflag = 0; 95 while ((c = getopt(argc, argv, "Ccdsu")) != -1) 96 switch (c) { 97 case 'C': 98 Cflag = 1; 99 cflag = 0; 100 break; 101 case 'c': 102 cflag = 1; 103 Cflag = 0; 104 break; 105 case 'd': 106 dflag = 1; 107 break; 108 case 's': 109 sflag = 1; 110 break; 111 case 'u': 112 setbuf(stdout, (char *)NULL); 113 break; 114 case '?': 115 default: 116 usage(); 117 } 118 argc -= optind; 119 argv += optind; 120 121 switch (argc) { 122 case 0: 123 default: 124 usage(); 125 /* NOTREACHED */ 126 case 1: 127 isstring2 = 0; 128 break; 129 case 2: 130 isstring2 = 1; 131 break; 132 } 133 134 /* 135 * tr -ds [-Cc] string1 string2 136 * Delete all characters (or complemented characters) in string1. 137 * Squeeze all characters in string2. 138 */ 139 if (dflag && sflag) { 140 if (!isstring2) 141 usage(); 142 143 delete = setup(argv[0], &s1, cflag, Cflag); 144 squeeze = setup(argv[1], &s2, 0, 0); 145 146 for (lastch = OOBCH; (ch = getwchar()) != WEOF; ) 147 if (!cset_in(delete, ch) && 148 (lastch != ch || !cset_in(squeeze, ch))) { 149 lastch = ch; 150 (void) putwchar(ch); 151 } 152 if (ferror(stdin)) 153 err(1, NULL); 154 exit(0); 155 } 156 157 /* 158 * tr -d [-Cc] string1 159 * Delete all characters (or complemented characters) in string1. 160 */ 161 if (dflag) { 162 if (isstring2) 163 usage(); 164 165 delete = setup(argv[0], &s1, cflag, Cflag); 166 167 while ((ch = getwchar()) != WEOF) 168 if (!cset_in(delete, ch)) 169 (void) putwchar(ch); 170 if (ferror(stdin)) 171 err(1, NULL); 172 exit(0); 173 } 174 175 /* 176 * tr -s [-Cc] string1 177 * Squeeze all characters (or complemented characters) in string1. 178 */ 179 if (sflag && !isstring2) { 180 squeeze = setup(argv[0], &s1, cflag, Cflag); 181 182 for (lastch = OOBCH; (ch = getwchar()) != WEOF; ) 183 if (lastch != ch || !cset_in(squeeze, ch)) { 184 lastch = ch; 185 (void) putwchar(ch); 186 } 187 if (ferror(stdin)) 188 err(1, NULL); 189 exit(0); 190 } 191 192 /* 193 * tr [-Ccs] string1 string2 194 * Replace all characters (or complemented characters) in string1 with 195 * the character in the same position in string2. If the -s option is 196 * specified, squeeze all the characters in string2. 197 */ 198 if (!isstring2) 199 usage(); 200 201 map = cmap_alloc(); 202 if (map == NULL) 203 err(1, NULL); 204 squeeze = cset_alloc(); 205 if (squeeze == NULL) 206 err(1, NULL); 207 208 s1.str = argv[0]; 209 210 if (Cflag || cflag) { 211 (void) cmap_default(map, OOBCH); 212 if ((s2.str = strdup(argv[1])) == NULL) 213 errx(1, "strdup(argv[1])"); 214 } else 215 s2.str = argv[1]; 216 217 if (!next(&s2)) 218 errx(1, "empty string2"); 219 220 /* 221 * For -s result will contain only those characters defined 222 * as the second characters in each of the toupper or tolower 223 * pairs. 224 */ 225 226 /* If string2 runs out of characters, use the last one specified. */ 227 while (next(&s1)) { 228 again: 229 if (s1.state == CCLASS_LOWER && 230 s2.state == CCLASS_UPPER && 231 s1.cnt == 1 && s2.cnt == 1) { 232 do { 233 ch = towupper(s1.lastch); 234 (void) cmap_add(map, s1.lastch, ch); 235 if (sflag && iswupper(ch)) 236 (void) cset_add(squeeze, ch); 237 if (!next(&s1)) 238 goto endloop; 239 } while (s1.state == CCLASS_LOWER && s1.cnt > 1); 240 /* skip upper set */ 241 do { 242 if (!next(&s2)) 243 break; 244 } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 245 goto again; 246 } else if (s1.state == CCLASS_UPPER && 247 s2.state == CCLASS_LOWER && 248 s1.cnt == 1 && s2.cnt == 1) { 249 do { 250 ch = towlower(s1.lastch); 251 (void) cmap_add(map, s1.lastch, ch); 252 if (sflag && iswlower(ch)) 253 (void) cset_add(squeeze, ch); 254 if (!next(&s1)) 255 goto endloop; 256 } while (s1.state == CCLASS_UPPER && s1.cnt > 1); 257 /* skip lower set */ 258 do { 259 if (!next(&s2)) 260 break; 261 } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 262 goto again; 263 } else { 264 (void) cmap_add(map, s1.lastch, s2.lastch); 265 if (sflag) 266 (void) cset_add(squeeze, s2.lastch); 267 } 268 (void) next(&s2); 269 } 270 endloop: 271 if (cflag || (Cflag && MB_CUR_MAX > 1)) { 272 /* 273 * This is somewhat tricky: since the character set is 274 * potentially huge, we need to avoid allocating a map 275 * entry for every character. Our strategy is to set the 276 * default mapping to the last character of string #2 277 * (= the one that gets automatically repeated), then to 278 * add back identity mappings for characters that should 279 * remain unchanged. We don't waste space on identity mappings 280 * for non-characters with the -C option; those are simulated 281 * in the I/O loop. 282 */ 283 s2.str = argv[1]; 284 s2.state = NORMAL; 285 for (cnt = 0; cnt < WCHAR_MAX; cnt++) { 286 if (Cflag && !iswrune(cnt)) 287 continue; 288 if (cmap_lookup(map, cnt) == OOBCH) { 289 if (next(&s2)) 290 (void) cmap_add(map, cnt, s2.lastch); 291 if (sflag) 292 (void) cset_add(squeeze, s2.lastch); 293 } else 294 (void) cmap_add(map, cnt, cnt); 295 if ((s2.state == EOS || s2.state == INFINITE) && 296 cnt >= cmap_max(map)) 297 break; 298 } 299 (void) cmap_default(map, s2.lastch); 300 } else if (Cflag) { 301 for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { 302 if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) 303 *p++ = cnt; 304 else 305 (void) cmap_add(map, cnt, cnt); 306 } 307 n = p - carray; 308 if (Cflag && n > 1) 309 (void) qsort(carray, n, sizeof (*carray), charcoll); 310 311 s2.str = argv[1]; 312 s2.state = NORMAL; 313 for (cnt = 0; cnt < n; cnt++) { 314 (void) next(&s2); 315 (void) cmap_add(map, carray[cnt], s2.lastch); 316 /* 317 * Chars taken from s2 can be different this time 318 * due to lack of complex upper/lower processing, 319 * so fill string2 again to not miss some. 320 */ 321 if (sflag) 322 (void) cset_add(squeeze, s2.lastch); 323 } 324 } 325 326 cset_cache(squeeze); 327 cmap_cache(map); 328 329 if (sflag) 330 for (lastch = OOBCH; (ch = getwchar()) != WEOF; ) { 331 if (!Cflag || iswrune(ch)) 332 ch = cmap_lookup(map, ch); 333 if (lastch != ch || !cset_in(squeeze, ch)) { 334 lastch = ch; 335 (void) putwchar(ch); 336 } 337 } 338 else 339 while ((ch = getwchar()) != WEOF) { 340 if (!Cflag || iswrune(ch)) 341 ch = cmap_lookup(map, ch); 342 (void) putwchar(ch); 343 } 344 if (ferror(stdin)) 345 err(1, NULL); 346 exit(0); 347 } 348 349 static struct cset * 350 setup(char *arg, STR *str, int cflag, int Cflag) 351 { 352 struct cset *cs; 353 354 cs = cset_alloc(); 355 if (cs == NULL) 356 err(1, NULL); 357 str->str = arg; 358 while (next(str)) 359 (void) cset_add(cs, str->lastch); 360 if (Cflag) 361 (void) cset_addclass(cs, wctype("rune"), true); 362 if (cflag || Cflag) 363 cset_invert(cs); 364 cset_cache(cs); 365 return (cs); 366 } 367 368 int 369 charcoll(const void *a, const void *b) 370 { 371 static char sa[2], sb[2]; 372 373 sa[0] = *(const int *)a; 374 sb[0] = *(const int *)b; 375 return (strcoll(sa, sb)); 376 } 377 378 static void 379 usage(void) 380 { 381 (void) fprintf(stderr, "%s\n%s\n%s\n%s\n", 382 "usage: tr [-Ccsu] string1 string2", 383 " tr [-Ccu] -d string1", 384 " tr [-Ccu] -s string1", 385 " tr [-Ccu] -ds string1 string2"); 386 exit(1); 387 } 388