1 /* 2 * Copyright (c) 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 32 __FBSDID("$FreeBSD$"); 33 34 #ifndef lint 35 static const char copyright[] = 36 "@(#) Copyright (c) 1988, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38 #endif 39 40 #ifndef lint 41 static const char sccsid[] = "@(#)tr.c 8.2 (Berkeley) 5/4/95"; 42 #endif 43 44 #include <sys/types.h> 45 46 #include <ctype.h> 47 #include <err.h> 48 #include <limits.h> 49 #include <locale.h> 50 #include <stdint.h> 51 #include <stdio.h> 52 #include <stdlib.h> 53 #include <string.h> 54 #include <unistd.h> 55 #include <wchar.h> 56 #include <wctype.h> 57 58 #include "cmap.h" 59 #include "cset.h" 60 #include "extern.h" 61 62 static STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 63 static STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 64 65 static struct cset *setup(char *, STR *, int, int); 66 static void usage(void); 67 68 int 69 main(int argc, char **argv) 70 { 71 struct cmap *map; 72 struct cset *delete, *squeeze; 73 int Cflag, cflag, dflag, sflag, isstring2; 74 wint_t ch, cnt, lastch; 75 76 (void)setlocale(LC_ALL, ""); 77 78 Cflag = cflag = dflag = sflag = 0; 79 while ((ch = getopt(argc, argv, "Ccdsu")) != -1) 80 switch((char)ch) { 81 case 'C': 82 Cflag = 1; 83 cflag = 0; 84 break; 85 case 'c': 86 cflag = 1; 87 Cflag = 0; 88 break; 89 case 'd': 90 dflag = 1; 91 break; 92 case 's': 93 sflag = 1; 94 break; 95 case 'u': 96 setbuf(stdout, (char *)NULL); 97 break; 98 case '?': 99 default: 100 usage(); 101 } 102 argc -= optind; 103 argv += optind; 104 105 switch(argc) { 106 case 0: 107 default: 108 usage(); 109 /* NOTREACHED */ 110 case 1: 111 isstring2 = 0; 112 break; 113 case 2: 114 isstring2 = 1; 115 break; 116 } 117 118 /* 119 * tr -ds [-Cc] string1 string2 120 * Delete all characters (or complemented characters) in string1. 121 * Squeeze all characters in string2. 122 */ 123 if (dflag && sflag) { 124 if (!isstring2) 125 usage(); 126 127 delete = setup(argv[0], &s1, cflag, Cflag); 128 squeeze = setup(argv[1], &s2, 0, 0); 129 130 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 131 if (!cset_in(delete, ch) && 132 (lastch != ch || !cset_in(squeeze, ch))) { 133 lastch = ch; 134 (void)putwchar(ch); 135 } 136 if (ferror(stdin)) 137 err(1, NULL); 138 exit(0); 139 } 140 141 /* 142 * tr -d [-Cc] string1 143 * Delete all characters (or complemented characters) in string1. 144 */ 145 if (dflag) { 146 if (isstring2) 147 usage(); 148 149 delete = setup(argv[0], &s1, cflag, Cflag); 150 151 while ((ch = getwchar()) != WEOF) 152 if (!cset_in(delete, ch)) 153 (void)putwchar(ch); 154 if (ferror(stdin)) 155 err(1, NULL); 156 exit(0); 157 } 158 159 /* 160 * tr -s [-Cc] string1 161 * Squeeze all characters (or complemented characters) in string1. 162 */ 163 if (sflag && !isstring2) { 164 squeeze = setup(argv[0], &s1, cflag, Cflag); 165 166 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 167 if (lastch != ch || !cset_in(squeeze, ch)) { 168 lastch = ch; 169 (void)putwchar(ch); 170 } 171 if (ferror(stdin)) 172 err(1, NULL); 173 exit(0); 174 } 175 176 /* 177 * tr [-Ccs] string1 string2 178 * Replace all characters (or complemented characters) in string1 with 179 * the character in the same position in string2. If the -s option is 180 * specified, squeeze all the characters in string2. 181 */ 182 if (!isstring2) 183 usage(); 184 185 map = cmap_alloc(); 186 if (map == NULL) 187 err(1, NULL); 188 squeeze = cset_alloc(); 189 if (squeeze == NULL) 190 err(1, NULL); 191 192 s1.str = argv[0]; 193 194 if (Cflag || cflag) { 195 cmap_default(map, OOBCH); 196 if ((s2.str = strdup(argv[1])) == NULL) 197 errx(1, "strdup(argv[1])"); 198 } else 199 s2.str = argv[1]; 200 201 if (!next(&s2)) 202 errx(1, "empty string2"); 203 204 /* 205 * For -s result will contain only those characters defined 206 * as the second characters in each of the toupper or tolower 207 * pairs. 208 */ 209 210 /* If string2 runs out of characters, use the last one specified. */ 211 while (next(&s1)) { 212 again: 213 if (s1.state == CCLASS_LOWER && 214 s2.state == CCLASS_UPPER && 215 s1.cnt == 1 && s2.cnt == 1) { 216 do { 217 ch = towupper(s1.lastch); 218 cmap_add(map, s1.lastch, ch); 219 if (sflag && iswupper(ch)) 220 cset_add(squeeze, ch); 221 if (!next(&s1)) 222 goto endloop; 223 } while (s1.state == CCLASS_LOWER && s1.cnt > 1); 224 /* skip upper set */ 225 do { 226 if (!next(&s2)) 227 break; 228 } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 229 goto again; 230 } else if (s1.state == CCLASS_UPPER && 231 s2.state == CCLASS_LOWER && 232 s1.cnt == 1 && s2.cnt == 1) { 233 do { 234 ch = towlower(s1.lastch); 235 cmap_add(map, s1.lastch, ch); 236 if (sflag && iswlower(ch)) 237 cset_add(squeeze, ch); 238 if (!next(&s1)) 239 goto endloop; 240 } while (s1.state == CCLASS_UPPER && s1.cnt > 1); 241 /* skip lower set */ 242 do { 243 if (!next(&s2)) 244 break; 245 } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 246 goto again; 247 } else { 248 cmap_add(map, s1.lastch, s2.lastch); 249 if (sflag) 250 cset_add(squeeze, s2.lastch); 251 } 252 (void)next(&s2); 253 } 254 endloop: 255 if (cflag || Cflag) { 256 /* 257 * This is somewhat tricky: since the character set is 258 * potentially huge, we need to avoid allocating a map 259 * entry for every character. Our strategy is to set the 260 * default mapping to the last character of string #2 261 * (= the one that gets automatically repeated), then to 262 * add back identity mappings for characters that should 263 * remain unchanged. We don't waste space on identity mappings 264 * for non-characters with the -C option; those are simulated 265 * in the I/O loop. 266 */ 267 s2.str = argv[1]; 268 s2.state = NORMAL; 269 for (cnt = 0; cnt <= WINT_MAX; cnt++) { 270 if (Cflag && !iswrune(cnt)) 271 continue; 272 if (cmap_lookup(map, cnt) == OOBCH) { 273 if (next(&s2)) { 274 cmap_add(map, cnt, s2.lastch); 275 if (sflag) 276 cset_add(squeeze, s2.lastch); 277 } 278 } else 279 cmap_add(map, cnt, cnt); 280 if ((s2.state == EOS || s2.state == INFINITE) && 281 cnt >= cmap_max(map)) 282 break; 283 } 284 cmap_default(map, s2.lastch); 285 } 286 287 cset_cache(squeeze); 288 cmap_cache(map); 289 290 if (sflag) 291 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) { 292 if (!Cflag || iswrune(ch)) 293 ch = cmap_lookup(map, ch); 294 if (lastch != ch || !cset_in(squeeze, ch)) { 295 lastch = ch; 296 (void)putwchar(ch); 297 } 298 } 299 else 300 while ((ch = getwchar()) != WEOF) { 301 if (!Cflag || iswrune(ch)) 302 ch = cmap_lookup(map, ch); 303 (void)putwchar(ch); 304 } 305 if (ferror(stdin)) 306 err(1, NULL); 307 exit (0); 308 } 309 310 static struct cset * 311 setup(char *arg, STR *str, int cflag, int Cflag) 312 { 313 struct cset *cs; 314 315 cs = cset_alloc(); 316 if (cs == NULL) 317 err(1, NULL); 318 str->str = arg; 319 while (next(str)) 320 cset_add(cs, str->lastch); 321 if (Cflag) 322 cset_addclass(cs, wctype("rune"), true); 323 if (cflag || Cflag) 324 cset_invert(cs); 325 cset_cache(cs); 326 return (cs); 327 } 328 329 static void 330 usage(void) 331 { 332 (void)fprintf(stderr, "%s\n%s\n%s\n%s\n", 333 "usage: tr [-Ccsu] string1 string2", 334 " tr [-Ccu] -d string1", 335 " tr [-Ccu] -s string1", 336 " tr [-Ccu] -ds string1 string2"); 337 exit(1); 338 } 339