1 /* 2 * Copyright (c) 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 32 __FBSDID("$FreeBSD$"); 33 34 #ifndef lint 35 static const char copyright[] = 36 "@(#) Copyright (c) 1988, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38 #endif 39 40 #ifndef lint 41 static const char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95"; 42 #endif 43 44 #include <sys/types.h> 45 46 #include <ctype.h> 47 #include <err.h> 48 #include <limits.h> 49 #include <locale.h> 50 #include <stdint.h> 51 #include <stdio.h> 52 #include <stdlib.h> 53 #include <string.h> 54 #include <unistd.h> 55 #include <wchar.h> 56 #include <wctype.h> 57 58 #include "cmap.h" 59 #include "cset.h" 60 #include "extern.h" 61 62 static STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 63 static STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 64 65 static struct cset *setup(char *, STR *, int, int); 66 static void usage(void); 67 68 int 69 main(int argc, char **argv) 70 { 71 static int carray[NCHARS_SB]; 72 struct cmap *map; 73 struct cset *delete, *squeeze; 74 int n, *p; 75 int Cflag, cflag, dflag, sflag, isstring2; 76 wint_t ch, cnt, lastch; 77 78 (void)setlocale(LC_ALL, ""); 79 80 Cflag = cflag = dflag = sflag = 0; 81 while ((ch = getopt(argc, argv, "Ccdsu")) != -1) 82 switch((char)ch) { 83 case 'C': 84 Cflag = 1; 85 cflag = 0; 86 break; 87 case 'c': 88 cflag = 1; 89 Cflag = 0; 90 break; 91 case 'd': 92 dflag = 1; 93 break; 94 case 's': 95 sflag = 1; 96 break; 97 case 'u': 98 setbuf(stdout, (char *)NULL); 99 break; 100 case '?': 101 default: 102 usage(); 103 } 104 argc -= optind; 105 argv += optind; 106 107 switch(argc) { 108 case 0: 109 default: 110 usage(); 111 /* NOTREACHED */ 112 case 1: 113 isstring2 = 0; 114 break; 115 case 2: 116 isstring2 = 1; 117 break; 118 } 119 120 /* 121 * tr -ds [-Cc] string1 string2 122 * Delete all characters (or complemented characters) in string1. 123 * Squeeze all characters in string2. 124 */ 125 if (dflag && sflag) { 126 if (!isstring2) 127 usage(); 128 129 delete = setup(argv[0], &s1, cflag, Cflag); 130 squeeze = setup(argv[1], &s2, 0, 0); 131 132 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 133 if (!cset_in(delete, ch) && 134 (lastch != ch || !cset_in(squeeze, ch))) { 135 lastch = ch; 136 (void)putwchar(ch); 137 } 138 if (ferror(stdin)) 139 err(1, NULL); 140 exit(0); 141 } 142 143 /* 144 * tr -d [-Cc] string1 145 * Delete all characters (or complemented characters) in string1. 146 */ 147 if (dflag) { 148 if (isstring2) 149 usage(); 150 151 delete = setup(argv[0], &s1, cflag, Cflag); 152 153 while ((ch = getwchar()) != WEOF) 154 if (!cset_in(delete, ch)) 155 (void)putwchar(ch); 156 if (ferror(stdin)) 157 err(1, NULL); 158 exit(0); 159 } 160 161 /* 162 * tr -s [-Cc] string1 163 * Squeeze all characters (or complemented characters) in string1. 164 */ 165 if (sflag && !isstring2) { 166 squeeze = setup(argv[0], &s1, cflag, Cflag); 167 168 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 169 if (lastch != ch || !cset_in(squeeze, ch)) { 170 lastch = ch; 171 (void)putwchar(ch); 172 } 173 if (ferror(stdin)) 174 err(1, NULL); 175 exit(0); 176 } 177 178 /* 179 * tr [-Ccs] string1 string2 180 * Replace all characters (or complemented characters) in string1 with 181 * the character in the same position in string2. If the -s option is 182 * specified, squeeze all the characters in string2. 183 */ 184 if (!isstring2) 185 usage(); 186 187 map = cmap_alloc(); 188 if (map == NULL) 189 err(1, NULL); 190 squeeze = cset_alloc(); 191 if (squeeze == NULL) 192 err(1, NULL); 193 194 s1.str = argv[0]; 195 196 if (Cflag || cflag) { 197 cmap_default(map, OOBCH); 198 if ((s2.str = strdup(argv[1])) == NULL) 199 errx(1, "strdup(argv[1])"); 200 } else 201 s2.str = argv[1]; 202 203 if (!next(&s2)) 204 errx(1, "empty string2"); 205 206 /* 207 * For -s result will contain only those characters defined 208 * as the second characters in each of the toupper or tolower 209 * pairs. 210 */ 211 212 /* If string2 runs out of characters, use the last one specified. */ 213 while (next(&s1)) { 214 again: 215 if (s1.state == CCLASS_LOWER && 216 s2.state == CCLASS_UPPER && 217 s1.cnt == 1 && s2.cnt == 1) { 218 do { 219 ch = towupper(s1.lastch); 220 cmap_add(map, s1.lastch, ch); 221 if (sflag && iswupper(ch)) 222 cset_add(squeeze, ch); 223 if (!next(&s1)) 224 goto endloop; 225 } while (s1.state == CCLASS_LOWER && s1.cnt > 1); 226 /* skip upper set */ 227 do { 228 if (!next(&s2)) 229 break; 230 } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 231 goto again; 232 } else if (s1.state == CCLASS_UPPER && 233 s2.state == CCLASS_LOWER && 234 s1.cnt == 1 && s2.cnt == 1) { 235 do { 236 ch = towlower(s1.lastch); 237 cmap_add(map, s1.lastch, ch); 238 if (sflag && iswlower(ch)) 239 cset_add(squeeze, ch); 240 if (!next(&s1)) 241 goto endloop; 242 } while (s1.state == CCLASS_UPPER && s1.cnt > 1); 243 /* skip lower set */ 244 do { 245 if (!next(&s2)) 246 break; 247 } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 248 goto again; 249 } else { 250 cmap_add(map, s1.lastch, s2.lastch); 251 if (sflag) 252 cset_add(squeeze, s2.lastch); 253 } 254 (void)next(&s2); 255 } 256 endloop: 257 if (cflag || (Cflag && MB_CUR_MAX > 1)) { 258 /* 259 * This is somewhat tricky: since the character set is 260 * potentially huge, we need to avoid allocating a map 261 * entry for every character. Our strategy is to set the 262 * default mapping to the last character of string #2 263 * (= the one that gets automatically repeated), then to 264 * add back identity mappings for characters that should 265 * remain unchanged. We don't waste space on identity mappings 266 * for non-characters with the -C option; those are simulated 267 * in the I/O loop. 268 */ 269 s2.str = argv[1]; 270 s2.state = NORMAL; 271 for (cnt = 0; cnt < WINT_MAX; cnt++) { 272 if (Cflag && !iswrune(cnt)) 273 continue; 274 if (cmap_lookup(map, cnt) == OOBCH) { 275 if (next(&s2)) { 276 cmap_add(map, cnt, s2.lastch); 277 if (sflag) 278 cset_add(squeeze, s2.lastch); 279 } 280 } else 281 cmap_add(map, cnt, cnt); 282 if ((s2.state == EOS || s2.state == INFINITE) && 283 cnt >= cmap_max(map)) 284 break; 285 } 286 cmap_default(map, s2.lastch); 287 } else if (Cflag) { 288 for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { 289 if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) 290 *p++ = cnt; 291 else 292 cmap_add(map, cnt, cnt); 293 } 294 n = p - carray; 295 if (Cflag && n > 1) 296 (void)mergesort(carray, n, sizeof(*carray), charcoll); 297 298 s2.str = argv[1]; 299 s2.state = NORMAL; 300 for (cnt = 0; cnt < n; cnt++) { 301 (void)next(&s2); 302 cmap_add(map, carray[cnt], s2.lastch); 303 /* 304 * Chars taken from s2 can be different this time 305 * due to lack of complex upper/lower processing, 306 * so fill string2 again to not miss some. 307 */ 308 if (sflag) 309 cset_add(squeeze, s2.lastch); 310 } 311 } 312 313 cset_cache(squeeze); 314 cmap_cache(map); 315 316 if (sflag) 317 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) { 318 if (!Cflag || iswrune(ch)) 319 ch = cmap_lookup(map, ch); 320 if (lastch != ch || !cset_in(squeeze, ch)) { 321 lastch = ch; 322 (void)putwchar(ch); 323 } 324 } 325 else 326 while ((ch = getwchar()) != WEOF) { 327 if (!Cflag || iswrune(ch)) 328 ch = cmap_lookup(map, ch); 329 (void)putwchar(ch); 330 } 331 if (ferror(stdin)) 332 err(1, NULL); 333 exit (0); 334 } 335 336 static struct cset * 337 setup(char *arg, STR *str, int cflag, int Cflag) 338 { 339 struct cset *cs; 340 341 cs = cset_alloc(); 342 if (cs == NULL) 343 err(1, NULL); 344 str->str = arg; 345 while (next(str)) 346 cset_add(cs, str->lastch); 347 if (Cflag) 348 cset_addclass(cs, wctype("rune"), true); 349 if (cflag || Cflag) 350 cset_invert(cs); 351 cset_cache(cs); 352 return (cs); 353 } 354 355 int 356 charcoll(const void *a, const void *b) 357 { 358 static char sa[2], sb[2]; 359 360 sa[0] = *(const int *)a; 361 sb[0] = *(const int *)b; 362 return (strcoll(sa, sb)); 363 } 364 365 static void 366 usage(void) 367 { 368 (void)fprintf(stderr, "%s\n%s\n%s\n%s\n", 369 "usage: tr [-Ccsu] string1 string2", 370 " tr [-Ccu] -d string1", 371 " tr [-Ccu] -s string1", 372 " tr [-Ccu] -ds string1 string2"); 373 exit(1); 374 } 375