1 /* 2 * Copyright (c) 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #include <sys/types.h> 35 36 #include <ctype.h> 37 #include <err.h> 38 #include <limits.h> 39 #include <locale.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <unistd.h> 44 #include <wchar.h> 45 #include <wctype.h> 46 47 #include "cmap.h" 48 #include "cset.h" 49 #include "extern.h" 50 51 STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 52 STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 53 54 static struct cset *setup(char *, STR *, int, int); 55 static void usage(void); 56 57 static wint_t 58 cmap_lookup(struct cmap *cm, wint_t from) 59 { 60 61 if (from < CM_CACHE_SIZE && cm->cm_havecache) 62 return (cm->cm_cache[from]); 63 return (cmap_lookup_hard(cm, from)); 64 } 65 66 static wint_t 67 cmap_max(struct cmap *cm) 68 { 69 return (cm->cm_max); 70 } 71 72 static inline bool 73 cset_in(struct cset *cs, wchar_t ch) 74 { 75 76 if (ch < CS_CACHE_SIZE && cs->cs_havecache) 77 return (cs->cs_cache[ch]); 78 return (cset_in_hard(cs, ch)); 79 } 80 81 int 82 main(int argc, char **argv) 83 { 84 static int carray[NCHARS_SB]; 85 struct cmap *map; 86 struct cset *delete, *squeeze; 87 int n, *p; 88 int Cflag, cflag, dflag, sflag, isstring2; 89 wint_t ch, cnt, lastch; 90 91 (void) setlocale(LC_ALL, ""); 92 93 Cflag = cflag = dflag = sflag = 0; 94 while ((ch = getopt(argc, argv, "Ccdsu")) != -1) 95 switch ((char)ch) { 96 case 'C': 97 Cflag = 1; 98 cflag = 0; 99 break; 100 case 'c': 101 cflag = 1; 102 Cflag = 0; 103 break; 104 case 'd': 105 dflag = 1; 106 break; 107 case 's': 108 sflag = 1; 109 break; 110 case 'u': 111 setbuf(stdout, (char *)NULL); 112 break; 113 case '?': 114 default: 115 usage(); 116 } 117 argc -= optind; 118 argv += optind; 119 120 switch (argc) { 121 case 0: 122 default: 123 usage(); 124 /* NOTREACHED */ 125 case 1: 126 isstring2 = 0; 127 break; 128 case 2: 129 isstring2 = 1; 130 break; 131 } 132 133 /* 134 * tr -ds [-Cc] string1 string2 135 * Delete all characters (or complemented characters) in string1. 136 * Squeeze all characters in string2. 137 */ 138 if (dflag && sflag) { 139 if (!isstring2) 140 usage(); 141 142 delete = setup(argv[0], &s1, cflag, Cflag); 143 squeeze = setup(argv[1], &s2, 0, 0); 144 145 for (lastch = OOBCH; (ch = getwchar()) != WEOF; ) 146 if (!cset_in(delete, ch) && 147 (lastch != ch || !cset_in(squeeze, ch))) { 148 lastch = ch; 149 (void) putwchar(ch); 150 } 151 if (ferror(stdin)) 152 err(1, NULL); 153 exit(0); 154 } 155 156 /* 157 * tr -d [-Cc] string1 158 * Delete all characters (or complemented characters) in string1. 159 */ 160 if (dflag) { 161 if (isstring2) 162 usage(); 163 164 delete = setup(argv[0], &s1, cflag, Cflag); 165 166 while ((ch = getwchar()) != WEOF) 167 if (!cset_in(delete, ch)) 168 (void) putwchar(ch); 169 if (ferror(stdin)) 170 err(1, NULL); 171 exit(0); 172 } 173 174 /* 175 * tr -s [-Cc] string1 176 * Squeeze all characters (or complemented characters) in string1. 177 */ 178 if (sflag && !isstring2) { 179 squeeze = setup(argv[0], &s1, cflag, Cflag); 180 181 for (lastch = OOBCH; (ch = getwchar()) != WEOF; ) 182 if (lastch != ch || !cset_in(squeeze, ch)) { 183 lastch = ch; 184 (void) putwchar(ch); 185 } 186 if (ferror(stdin)) 187 err(1, NULL); 188 exit(0); 189 } 190 191 /* 192 * tr [-Ccs] string1 string2 193 * Replace all characters (or complemented characters) in string1 with 194 * the character in the same position in string2. If the -s option is 195 * specified, squeeze all the characters in string2. 196 */ 197 if (!isstring2) 198 usage(); 199 200 map = cmap_alloc(); 201 if (map == NULL) 202 err(1, NULL); 203 squeeze = cset_alloc(); 204 if (squeeze == NULL) 205 err(1, NULL); 206 207 s1.str = argv[0]; 208 209 if (Cflag || cflag) { 210 (void) cmap_default(map, OOBCH); 211 if ((s2.str = strdup(argv[1])) == NULL) 212 errx(1, "strdup(argv[1])"); 213 } else 214 s2.str = argv[1]; 215 216 if (!next(&s2)) 217 errx(1, "empty string2"); 218 219 /* 220 * For -s result will contain only those characters defined 221 * as the second characters in each of the toupper or tolower 222 * pairs. 223 */ 224 225 /* If string2 runs out of characters, use the last one specified. */ 226 while (next(&s1)) { 227 again: 228 if (s1.state == CCLASS_LOWER && 229 s2.state == CCLASS_UPPER && 230 s1.cnt == 1 && s2.cnt == 1) { 231 do { 232 ch = towupper(s1.lastch); 233 (void) cmap_add(map, s1.lastch, ch); 234 if (sflag && iswupper(ch)) 235 (void) cset_add(squeeze, ch); 236 if (!next(&s1)) 237 goto endloop; 238 } while (s1.state == CCLASS_LOWER && s1.cnt > 1); 239 /* skip upper set */ 240 do { 241 if (!next(&s2)) 242 break; 243 } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 244 goto again; 245 } else if (s1.state == CCLASS_UPPER && 246 s2.state == CCLASS_LOWER && 247 s1.cnt == 1 && s2.cnt == 1) { 248 do { 249 ch = towlower(s1.lastch); 250 (void) cmap_add(map, s1.lastch, ch); 251 if (sflag && iswlower(ch)) 252 (void) cset_add(squeeze, ch); 253 if (!next(&s1)) 254 goto endloop; 255 } while (s1.state == CCLASS_UPPER && s1.cnt > 1); 256 /* skip lower set */ 257 do { 258 if (!next(&s2)) 259 break; 260 } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 261 goto again; 262 } else { 263 (void) cmap_add(map, s1.lastch, s2.lastch); 264 if (sflag) 265 (void) cset_add(squeeze, s2.lastch); 266 } 267 (void) next(&s2); 268 } 269 endloop: 270 if (cflag || (Cflag && MB_CUR_MAX > 1)) { 271 /* 272 * This is somewhat tricky: since the character set is 273 * potentially huge, we need to avoid allocating a map 274 * entry for every character. Our strategy is to set the 275 * default mapping to the last character of string #2 276 * (= the one that gets automatically repeated), then to 277 * add back identity mappings for characters that should 278 * remain unchanged. We don't waste space on identity mappings 279 * for non-characters with the -C option; those are simulated 280 * in the I/O loop. 281 */ 282 s2.str = argv[1]; 283 s2.state = NORMAL; 284 for (cnt = 0; cnt < WCHAR_MAX; cnt++) { 285 if (Cflag && !iswrune(cnt)) 286 continue; 287 if (cmap_lookup(map, cnt) == OOBCH) { 288 if (next(&s2)) 289 (void) cmap_add(map, cnt, s2.lastch); 290 if (sflag) 291 (void) cset_add(squeeze, s2.lastch); 292 } else 293 (void) cmap_add(map, cnt, cnt); 294 if ((s2.state == EOS || s2.state == INFINITE) && 295 cnt >= cmap_max(map)) 296 break; 297 } 298 (void) cmap_default(map, s2.lastch); 299 } else if (Cflag) { 300 for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { 301 if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) 302 *p++ = cnt; 303 else 304 (void) cmap_add(map, cnt, cnt); 305 } 306 n = p - carray; 307 if (Cflag && n > 1) 308 (void) qsort(carray, n, sizeof (*carray), charcoll); 309 310 s2.str = argv[1]; 311 s2.state = NORMAL; 312 for (cnt = 0; cnt < n; cnt++) { 313 (void) next(&s2); 314 (void) cmap_add(map, carray[cnt], s2.lastch); 315 /* 316 * Chars taken from s2 can be different this time 317 * due to lack of complex upper/lower processing, 318 * so fill string2 again to not miss some. 319 */ 320 if (sflag) 321 (void) cset_add(squeeze, s2.lastch); 322 } 323 } 324 325 cset_cache(squeeze); 326 cmap_cache(map); 327 328 if (sflag) 329 for (lastch = OOBCH; (ch = getwchar()) != WEOF; ) { 330 if (!Cflag || iswrune(ch)) 331 ch = cmap_lookup(map, ch); 332 if (lastch != ch || !cset_in(squeeze, ch)) { 333 lastch = ch; 334 (void) putwchar(ch); 335 } 336 } 337 else 338 while ((ch = getwchar()) != WEOF) { 339 if (!Cflag || iswrune(ch)) 340 ch = cmap_lookup(map, ch); 341 (void) putwchar(ch); 342 } 343 if (ferror(stdin)) 344 err(1, NULL); 345 exit(0); 346 } 347 348 static struct cset * 349 setup(char *arg, STR *str, int cflag, int Cflag) 350 { 351 struct cset *cs; 352 353 cs = cset_alloc(); 354 if (cs == NULL) 355 err(1, NULL); 356 str->str = arg; 357 while (next(str)) 358 (void) cset_add(cs, str->lastch); 359 if (Cflag) 360 (void) cset_addclass(cs, wctype("rune"), true); 361 if (cflag || Cflag) 362 cset_invert(cs); 363 cset_cache(cs); 364 return (cs); 365 } 366 367 int 368 charcoll(const void *a, const void *b) 369 { 370 static char sa[2], sb[2]; 371 372 sa[0] = *(const int *)a; 373 sb[0] = *(const int *)b; 374 return (strcoll(sa, sb)); 375 } 376 377 static void 378 usage(void) 379 { 380 (void) fprintf(stderr, "%s\n%s\n%s\n%s\n", 381 "usage: tr [-Ccsu] string1 string2", 382 " tr [-Ccu] -d string1", 383 " tr [-Ccu] -s string1", 384 " tr [-Ccu] -ds string1 string2"); 385 exit(1); 386 } 387