1 /* 2 * Copyright (c) 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 36 __FBSDID("$FreeBSD$"); 37 38 #ifndef lint 39 static const char copyright[] = 40 "@(#) Copyright (c) 1988, 1993\n\ 41 The Regents of the University of California. All rights reserved.\n"; 42 #endif 43 44 #ifndef lint 45 static const char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95"; 46 #endif 47 48 #include <sys/types.h> 49 50 #include <ctype.h> 51 #include <err.h> 52 #include <limits.h> 53 #include <locale.h> 54 #include <stdio.h> 55 #include <stdlib.h> 56 #include <string.h> 57 #include <unistd.h> 58 #include <wchar.h> 59 #include <wctype.h> 60 61 #include "cmap.h" 62 #include "cset.h" 63 #include "extern.h" 64 65 STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 66 STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 67 68 static struct cset *setup(char *, STR *, int, int); 69 static void usage(void); 70 71 int 72 main(int argc, char **argv) 73 { 74 static int carray[NCHARS_SB]; 75 struct cmap *map; 76 struct cset *delete, *squeeze; 77 int n, *p; 78 int Cflag, cflag, dflag, sflag, isstring2; 79 wint_t ch, cnt, lastch; 80 81 (void)setlocale(LC_ALL, ""); 82 83 Cflag = cflag = dflag = sflag = 0; 84 while ((ch = getopt(argc, argv, "Ccdsu")) != -1) 85 switch((char)ch) { 86 case 'C': 87 Cflag = 1; 88 cflag = 0; 89 break; 90 case 'c': 91 cflag = 1; 92 Cflag = 0; 93 break; 94 case 'd': 95 dflag = 1; 96 break; 97 case 's': 98 sflag = 1; 99 break; 100 case 'u': 101 setbuf(stdout, (char *)NULL); 102 break; 103 case '?': 104 default: 105 usage(); 106 } 107 argc -= optind; 108 argv += optind; 109 110 switch(argc) { 111 case 0: 112 default: 113 usage(); 114 /* NOTREACHED */ 115 case 1: 116 isstring2 = 0; 117 break; 118 case 2: 119 isstring2 = 1; 120 break; 121 } 122 123 /* 124 * tr -ds [-Cc] string1 string2 125 * Delete all characters (or complemented characters) in string1. 126 * Squeeze all characters in string2. 127 */ 128 if (dflag && sflag) { 129 if (!isstring2) 130 usage(); 131 132 delete = setup(argv[0], &s1, cflag, Cflag); 133 squeeze = setup(argv[1], &s2, 0, 0); 134 135 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 136 if (!cset_in(delete, ch) && 137 (lastch != ch || !cset_in(squeeze, ch))) { 138 lastch = ch; 139 (void)putwchar(ch); 140 } 141 if (ferror(stdin)) 142 err(1, NULL); 143 exit(0); 144 } 145 146 /* 147 * tr -d [-Cc] string1 148 * Delete all characters (or complemented characters) in string1. 149 */ 150 if (dflag) { 151 if (isstring2) 152 usage(); 153 154 delete = setup(argv[0], &s1, cflag, Cflag); 155 156 while ((ch = getwchar()) != WEOF) 157 if (!cset_in(delete, ch)) 158 (void)putwchar(ch); 159 if (ferror(stdin)) 160 err(1, NULL); 161 exit(0); 162 } 163 164 /* 165 * tr -s [-Cc] string1 166 * Squeeze all characters (or complemented characters) in string1. 167 */ 168 if (sflag && !isstring2) { 169 squeeze = setup(argv[0], &s1, cflag, Cflag); 170 171 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 172 if (lastch != ch || !cset_in(squeeze, ch)) { 173 lastch = ch; 174 (void)putwchar(ch); 175 } 176 if (ferror(stdin)) 177 err(1, NULL); 178 exit(0); 179 } 180 181 /* 182 * tr [-Ccs] string1 string2 183 * Replace all characters (or complemented characters) in string1 with 184 * the character in the same position in string2. If the -s option is 185 * specified, squeeze all the characters in string2. 186 */ 187 if (!isstring2) 188 usage(); 189 190 map = cmap_alloc(); 191 if (map == NULL) 192 err(1, NULL); 193 squeeze = cset_alloc(); 194 if (squeeze == NULL) 195 err(1, NULL); 196 197 s1.str = argv[0]; 198 199 if (Cflag || cflag) { 200 cmap_default(map, OOBCH); 201 if ((s2.str = strdup(argv[1])) == NULL) 202 errx(1, "strdup(argv[1])"); 203 } else 204 s2.str = argv[1]; 205 206 if (!next(&s2)) 207 errx(1, "empty string2"); 208 209 /* 210 * For -s result will contain only those characters defined 211 * as the second characters in each of the toupper or tolower 212 * pairs. 213 */ 214 215 /* If string2 runs out of characters, use the last one specified. */ 216 while (next(&s1)) { 217 again: 218 if (s1.state == CCLASS_LOWER && 219 s2.state == CCLASS_UPPER && 220 s1.cnt == 1 && s2.cnt == 1) { 221 do { 222 ch = towupper(s1.lastch); 223 cmap_add(map, s1.lastch, ch); 224 if (sflag && iswupper(ch)) 225 cset_add(squeeze, ch); 226 if (!next(&s1)) 227 goto endloop; 228 } while (s1.state == CCLASS_LOWER && s1.cnt > 1); 229 /* skip upper set */ 230 do { 231 if (!next(&s2)) 232 break; 233 } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 234 goto again; 235 } else if (s1.state == CCLASS_UPPER && 236 s2.state == CCLASS_LOWER && 237 s1.cnt == 1 && s2.cnt == 1) { 238 do { 239 ch = towlower(s1.lastch); 240 cmap_add(map, s1.lastch, ch); 241 if (sflag && iswlower(ch)) 242 cset_add(squeeze, ch); 243 if (!next(&s1)) 244 goto endloop; 245 } while (s1.state == CCLASS_UPPER && s1.cnt > 1); 246 /* skip lower set */ 247 do { 248 if (!next(&s2)) 249 break; 250 } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 251 goto again; 252 } else { 253 cmap_add(map, s1.lastch, s2.lastch); 254 if (sflag) 255 cset_add(squeeze, s2.lastch); 256 } 257 (void)next(&s2); 258 } 259 endloop: 260 if (cflag || (Cflag && MB_CUR_MAX > 1)) { 261 /* 262 * This is somewhat tricky: since the character set is 263 * potentially huge, we need to avoid allocating a map 264 * entry for every character. Our strategy is to set the 265 * default mapping to the last character of string #2 266 * (= the one that gets automatically repeated), then to 267 * add back identity mappings for characters that should 268 * remain unchanged. We don't waste space on identity mappings 269 * for non-characters with the -C option; those are simulated 270 * in the I/O loop. 271 */ 272 s2.str = argv[1]; 273 s2.state = NORMAL; 274 for (cnt = 0; cnt < WCHAR_MAX; cnt++) { 275 if (Cflag && !iswrune(cnt)) 276 continue; 277 if (cmap_lookup(map, cnt) == OOBCH) { 278 if (next(&s2)) 279 cmap_add(map, cnt, s2.lastch); 280 if (sflag) 281 cset_add(squeeze, s2.lastch); 282 } else 283 cmap_add(map, cnt, cnt); 284 if ((s2.state == EOS || s2.state == INFINITE) && 285 cnt >= cmap_max(map)) 286 break; 287 } 288 cmap_default(map, s2.lastch); 289 } else if (Cflag) { 290 for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { 291 if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) 292 *p++ = cnt; 293 else 294 cmap_add(map, cnt, cnt); 295 } 296 n = p - carray; 297 if (Cflag && n > 1) 298 (void)mergesort(carray, n, sizeof(*carray), charcoll); 299 300 s2.str = argv[1]; 301 s2.state = NORMAL; 302 for (cnt = 0; cnt < n; cnt++) { 303 (void)next(&s2); 304 cmap_add(map, carray[cnt], s2.lastch); 305 /* 306 * Chars taken from s2 can be different this time 307 * due to lack of complex upper/lower processing, 308 * so fill string2 again to not miss some. 309 */ 310 if (sflag) 311 cset_add(squeeze, s2.lastch); 312 } 313 } 314 315 cset_cache(squeeze); 316 cmap_cache(map); 317 318 if (sflag) 319 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) { 320 if (!Cflag || iswrune(ch)) 321 ch = cmap_lookup(map, ch); 322 if (lastch != ch || !cset_in(squeeze, ch)) { 323 lastch = ch; 324 (void)putwchar(ch); 325 } 326 } 327 else 328 while ((ch = getwchar()) != WEOF) { 329 if (!Cflag || iswrune(ch)) 330 ch = cmap_lookup(map, ch); 331 (void)putwchar(ch); 332 } 333 if (ferror(stdin)) 334 err(1, NULL); 335 exit (0); 336 } 337 338 static struct cset * 339 setup(char *arg, STR *str, int cflag, int Cflag) 340 { 341 struct cset *cs; 342 343 cs = cset_alloc(); 344 if (cs == NULL) 345 err(1, NULL); 346 str->str = arg; 347 while (next(str)) 348 cset_add(cs, str->lastch); 349 if (Cflag) 350 cset_addclass(cs, wctype("rune"), true); 351 if (cflag || Cflag) 352 cset_invert(cs); 353 cset_cache(cs); 354 return (cs); 355 } 356 357 int 358 charcoll(const void *a, const void *b) 359 { 360 static char sa[2], sb[2]; 361 362 sa[0] = *(const int *)a; 363 sb[0] = *(const int *)b; 364 return (strcoll(sa, sb)); 365 } 366 367 static void 368 usage(void) 369 { 370 (void)fprintf(stderr, "%s\n%s\n%s\n%s\n", 371 "usage: tr [-Ccsu] string1 string2", 372 " tr [-Ccu] -d string1", 373 " tr [-Ccu] -s string1", 374 " tr [-Ccu] -ds string1 string2"); 375 exit(1); 376 } 377