1 /* 2 * Copyright (c) 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 32 __FBSDID("$FreeBSD$"); 33 34 #ifndef lint 35 static const char copyright[] = 36 "@(#) Copyright (c) 1988, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38 #endif 39 40 #ifndef lint 41 static const char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95"; 42 #endif 43 44 #include <sys/types.h> 45 46 #include <ctype.h> 47 #include <err.h> 48 #include <limits.h> 49 #include <locale.h> 50 #include <stdio.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <unistd.h> 54 #include <wchar.h> 55 #include <wctype.h> 56 57 #include "cmap.h" 58 #include "cset.h" 59 #include "extern.h" 60 61 static STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 62 static STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 63 64 static struct cset *setup(char *, STR *, int, int); 65 static void usage(void); 66 67 int 68 main(int argc, char **argv) 69 { 70 static int carray[NCHARS_SB]; 71 struct cmap *map; 72 struct cset *delete, *squeeze; 73 int n, *p; 74 int Cflag, cflag, dflag, sflag, isstring2; 75 wint_t ch, cnt, lastch; 76 77 (void)setlocale(LC_ALL, ""); 78 79 Cflag = cflag = dflag = sflag = 0; 80 while ((ch = getopt(argc, argv, "Ccdsu")) != -1) 81 switch((char)ch) { 82 case 'C': 83 Cflag = 1; 84 cflag = 0; 85 break; 86 case 'c': 87 cflag = 1; 88 Cflag = 0; 89 break; 90 case 'd': 91 dflag = 1; 92 break; 93 case 's': 94 sflag = 1; 95 break; 96 case 'u': 97 setbuf(stdout, (char *)NULL); 98 break; 99 case '?': 100 default: 101 usage(); 102 } 103 argc -= optind; 104 argv += optind; 105 106 switch(argc) { 107 case 0: 108 default: 109 usage(); 110 /* NOTREACHED */ 111 case 1: 112 isstring2 = 0; 113 break; 114 case 2: 115 isstring2 = 1; 116 break; 117 } 118 119 /* 120 * tr -ds [-Cc] string1 string2 121 * Delete all characters (or complemented characters) in string1. 122 * Squeeze all characters in string2. 123 */ 124 if (dflag && sflag) { 125 if (!isstring2) 126 usage(); 127 128 delete = setup(argv[0], &s1, cflag, Cflag); 129 squeeze = setup(argv[1], &s2, 0, 0); 130 131 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 132 if (!cset_in(delete, ch) && 133 (lastch != ch || !cset_in(squeeze, ch))) { 134 lastch = ch; 135 (void)putwchar(ch); 136 } 137 if (ferror(stdin)) 138 err(1, NULL); 139 exit(0); 140 } 141 142 /* 143 * tr -d [-Cc] string1 144 * Delete all characters (or complemented characters) in string1. 145 */ 146 if (dflag) { 147 if (isstring2) 148 usage(); 149 150 delete = setup(argv[0], &s1, cflag, Cflag); 151 152 while ((ch = getwchar()) != WEOF) 153 if (!cset_in(delete, ch)) 154 (void)putwchar(ch); 155 if (ferror(stdin)) 156 err(1, NULL); 157 exit(0); 158 } 159 160 /* 161 * tr -s [-Cc] string1 162 * Squeeze all characters (or complemented characters) in string1. 163 */ 164 if (sflag && !isstring2) { 165 squeeze = setup(argv[0], &s1, cflag, Cflag); 166 167 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 168 if (lastch != ch || !cset_in(squeeze, ch)) { 169 lastch = ch; 170 (void)putwchar(ch); 171 } 172 if (ferror(stdin)) 173 err(1, NULL); 174 exit(0); 175 } 176 177 /* 178 * tr [-Ccs] string1 string2 179 * Replace all characters (or complemented characters) in string1 with 180 * the character in the same position in string2. If the -s option is 181 * specified, squeeze all the characters in string2. 182 */ 183 if (!isstring2) 184 usage(); 185 186 map = cmap_alloc(); 187 if (map == NULL) 188 err(1, NULL); 189 squeeze = cset_alloc(); 190 if (squeeze == NULL) 191 err(1, NULL); 192 193 s1.str = argv[0]; 194 195 if (Cflag || cflag) { 196 cmap_default(map, OOBCH); 197 if ((s2.str = strdup(argv[1])) == NULL) 198 errx(1, "strdup(argv[1])"); 199 } else 200 s2.str = argv[1]; 201 202 if (!next(&s2)) 203 errx(1, "empty string2"); 204 205 /* 206 * For -s result will contain only those characters defined 207 * as the second characters in each of the toupper or tolower 208 * pairs. 209 */ 210 211 /* If string2 runs out of characters, use the last one specified. */ 212 while (next(&s1)) { 213 again: 214 if (s1.state == CCLASS_LOWER && 215 s2.state == CCLASS_UPPER && 216 s1.cnt == 1 && s2.cnt == 1) { 217 do { 218 ch = towupper(s1.lastch); 219 cmap_add(map, s1.lastch, ch); 220 if (sflag && iswupper(ch)) 221 cset_add(squeeze, ch); 222 if (!next(&s1)) 223 goto endloop; 224 } while (s1.state == CCLASS_LOWER && s1.cnt > 1); 225 /* skip upper set */ 226 do { 227 if (!next(&s2)) 228 break; 229 } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 230 goto again; 231 } else if (s1.state == CCLASS_UPPER && 232 s2.state == CCLASS_LOWER && 233 s1.cnt == 1 && s2.cnt == 1) { 234 do { 235 ch = towlower(s1.lastch); 236 cmap_add(map, s1.lastch, ch); 237 if (sflag && iswlower(ch)) 238 cset_add(squeeze, ch); 239 if (!next(&s1)) 240 goto endloop; 241 } while (s1.state == CCLASS_UPPER && s1.cnt > 1); 242 /* skip lower set */ 243 do { 244 if (!next(&s2)) 245 break; 246 } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 247 goto again; 248 } else { 249 cmap_add(map, s1.lastch, s2.lastch); 250 if (sflag) 251 cset_add(squeeze, s2.lastch); 252 } 253 (void)next(&s2); 254 } 255 endloop: 256 if (cflag || (Cflag && MB_CUR_MAX > 1)) { 257 /* 258 * This is somewhat tricky: since the character set is 259 * potentially huge, we need to avoid allocating a map 260 * entry for every character. Our strategy is to set the 261 * default mapping to the last character of string #2 262 * (= the one that gets automatically repeated), then to 263 * add back identity mappings for characters that should 264 * remain unchanged. We don't waste space on identity mappings 265 * for non-characters with the -C option; those are simulated 266 * in the I/O loop. 267 */ 268 s2.str = argv[1]; 269 s2.state = NORMAL; 270 for (cnt = 0; cnt < WCHAR_MAX; cnt++) { 271 if (Cflag && !iswrune(cnt)) 272 continue; 273 if (cmap_lookup(map, cnt) == OOBCH) { 274 if (next(&s2)) 275 cmap_add(map, cnt, s2.lastch); 276 if (sflag) 277 cset_add(squeeze, s2.lastch); 278 } else 279 cmap_add(map, cnt, cnt); 280 if ((s2.state == EOS || s2.state == INFINITE) && 281 cnt >= cmap_max(map)) 282 break; 283 } 284 cmap_default(map, s2.lastch); 285 } else if (Cflag) { 286 for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { 287 if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) 288 *p++ = cnt; 289 else 290 cmap_add(map, cnt, cnt); 291 } 292 n = p - carray; 293 if (Cflag && n > 1) 294 (void)mergesort(carray, n, sizeof(*carray), charcoll); 295 296 s2.str = argv[1]; 297 s2.state = NORMAL; 298 for (cnt = 0; cnt < n; cnt++) { 299 (void)next(&s2); 300 cmap_add(map, carray[cnt], s2.lastch); 301 /* 302 * Chars taken from s2 can be different this time 303 * due to lack of complex upper/lower processing, 304 * so fill string2 again to not miss some. 305 */ 306 if (sflag) 307 cset_add(squeeze, s2.lastch); 308 } 309 } 310 311 cset_cache(squeeze); 312 cmap_cache(map); 313 314 if (sflag) 315 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) { 316 if (!Cflag || iswrune(ch)) 317 ch = cmap_lookup(map, ch); 318 if (lastch != ch || !cset_in(squeeze, ch)) { 319 lastch = ch; 320 (void)putwchar(ch); 321 } 322 } 323 else 324 while ((ch = getwchar()) != WEOF) { 325 if (!Cflag || iswrune(ch)) 326 ch = cmap_lookup(map, ch); 327 (void)putwchar(ch); 328 } 329 if (ferror(stdin)) 330 err(1, NULL); 331 exit (0); 332 } 333 334 static struct cset * 335 setup(char *arg, STR *str, int cflag, int Cflag) 336 { 337 struct cset *cs; 338 339 cs = cset_alloc(); 340 if (cs == NULL) 341 err(1, NULL); 342 str->str = arg; 343 while (next(str)) 344 cset_add(cs, str->lastch); 345 if (Cflag) 346 cset_addclass(cs, wctype("rune"), true); 347 if (cflag || Cflag) 348 cset_invert(cs); 349 cset_cache(cs); 350 return (cs); 351 } 352 353 int 354 charcoll(const void *a, const void *b) 355 { 356 static char sa[2], sb[2]; 357 358 sa[0] = *(const int *)a; 359 sb[0] = *(const int *)b; 360 return (strcoll(sa, sb)); 361 } 362 363 static void 364 usage(void) 365 { 366 (void)fprintf(stderr, "%s\n%s\n%s\n%s\n", 367 "usage: tr [-Ccsu] string1 string2", 368 " tr [-Ccu] -d string1", 369 " tr [-Ccu] -s string1", 370 " tr [-Ccu] -ds string1 string2"); 371 exit(1); 372 } 373