1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Case Larsen. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #ifndef lint 38 static const char copyright[] = 39 "@(#) Copyright (c) 1989, 1993\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41 #endif /* not lint */ 42 43 #ifndef lint 44 #if 0 45 static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95"; 46 #endif 47 static const char rcsid[] = 48 "$FreeBSD$"; 49 #endif /* not lint */ 50 51 #include <ctype.h> 52 #include <err.h> 53 #include <limits.h> 54 #include <locale.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <unistd.h> 59 #include <wchar.h> 60 #include <wctype.h> 61 62 #define MAXLINELEN (LINE_MAX + 1) 63 64 int cflag, dflag, uflag; 65 int numchars, numfields, repeats; 66 67 FILE *file(const char *, const char *); 68 wchar_t *getline(wchar_t *, size_t *, FILE *); 69 void show(FILE *, wchar_t *); 70 wchar_t *skip(wchar_t *); 71 void obsolete(char *[]); 72 static void usage(void); 73 int wcsicoll(wchar_t *, wchar_t *); 74 75 int 76 main (int argc, char *argv[]) 77 { 78 wchar_t *t1, *t2; 79 FILE *ifp, *ofp; 80 int ch, b1; 81 size_t prevbuflen, thisbuflen; 82 wchar_t *prevline, *thisline; 83 char *p; 84 const char *ifn; 85 int iflag = 0, comp; 86 87 (void) setlocale(LC_ALL, ""); 88 89 obsolete(argv); 90 while ((ch = getopt(argc, argv, "cdif:s:u")) != -1) 91 switch (ch) { 92 case 'c': 93 cflag = 1; 94 break; 95 case 'd': 96 dflag = 1; 97 break; 98 case 'i': 99 iflag = 1; 100 break; 101 case 'f': 102 numfields = strtol(optarg, &p, 10); 103 if (numfields < 0 || *p) 104 errx(1, "illegal field skip value: %s", optarg); 105 break; 106 case 's': 107 numchars = strtol(optarg, &p, 10); 108 if (numchars < 0 || *p) 109 errx(1, "illegal character skip value: %s", optarg); 110 break; 111 case 'u': 112 uflag = 1; 113 break; 114 case '?': 115 default: 116 usage(); 117 } 118 119 argc -= optind; 120 argv += optind; 121 122 /* If no flags are set, default is -d -u. */ 123 if (cflag) { 124 if (dflag || uflag) 125 usage(); 126 } else if (!dflag && !uflag) 127 dflag = uflag = 1; 128 129 if (argc > 2) 130 usage(); 131 132 ifp = stdin; 133 ifn = "stdin"; 134 ofp = stdout; 135 if (argc > 0 && strcmp(argv[0], "-") != 0) 136 ifp = file(ifn = argv[0], "r"); 137 if (argc > 1) 138 ofp = file(argv[1], "w"); 139 140 prevbuflen = MAXLINELEN; 141 thisbuflen = MAXLINELEN; 142 prevline = malloc(prevbuflen * sizeof(*prevline)); 143 thisline = malloc(thisbuflen * sizeof(*thisline)); 144 if (prevline == NULL || thisline == NULL) 145 err(1, "malloc"); 146 147 if ((prevline = getline(prevline, &prevbuflen, ifp)) == NULL) { 148 if (ferror(ifp)) 149 err(1, "%s", ifn); 150 exit(0); 151 } 152 if (!cflag && uflag && dflag) 153 show(ofp, prevline); 154 155 while ((thisline = getline(thisline, &thisbuflen, ifp)) != NULL) { 156 /* If requested get the chosen fields + character offsets. */ 157 if (numfields || numchars) { 158 t1 = skip(thisline); 159 t2 = skip(prevline); 160 } else { 161 t1 = thisline; 162 t2 = prevline; 163 } 164 165 /* If different, print; set previous to new value. */ 166 if (iflag) 167 comp = wcsicoll(t1, t2); 168 else 169 comp = wcscoll(t1, t2); 170 171 if (comp) { 172 if (cflag || !dflag || !uflag) 173 show(ofp, prevline); 174 t1 = prevline; 175 b1 = prevbuflen; 176 prevline = thisline; 177 prevbuflen = thisbuflen; 178 if (!cflag && uflag && dflag) 179 show(ofp, prevline); 180 thisline = t1; 181 thisbuflen = b1; 182 repeats = 0; 183 } else 184 ++repeats; 185 } 186 if (ferror(ifp)) 187 err(1, "%s", ifn); 188 if (cflag || !dflag || !uflag) 189 show(ofp, prevline); 190 exit(0); 191 } 192 193 wchar_t * 194 getline(wchar_t *buf, size_t *buflen, FILE *fp) 195 { 196 size_t bufpos; 197 wint_t ch; 198 199 bufpos = 0; 200 while ((ch = getwc(fp)) != WEOF && ch != '\n') { 201 if (bufpos + 2 >= *buflen) { 202 *buflen = *buflen * 2; 203 buf = reallocf(buf, *buflen * sizeof(*buf)); 204 if (buf == NULL) 205 return (NULL); 206 } 207 buf[bufpos++] = ch; 208 } 209 if (bufpos + 1 != *buflen) 210 buf[bufpos] = '\0'; 211 212 return (bufpos != 0 || ch == '\n' ? buf : NULL); 213 } 214 215 /* 216 * show -- 217 * Output a line depending on the flags and number of repetitions 218 * of the line. 219 */ 220 void 221 show(FILE *ofp, wchar_t *str) 222 { 223 224 if (cflag) 225 (void)fprintf(ofp, "%4d %ls\n", repeats + 1, str); 226 if ((dflag && repeats) || (uflag && !repeats)) 227 (void)fprintf(ofp, "%ls\n", str); 228 } 229 230 wchar_t * 231 skip(wchar_t *str) 232 { 233 int nchars, nfields; 234 235 for (nfields = 0; *str != '\0' && nfields++ != numfields; ) { 236 while (iswblank(*str)) 237 str++; 238 while (*str != '\0' && !iswblank(*str)) 239 str++; 240 } 241 for (nchars = numchars; nchars-- && *str; ++str); 242 return(str); 243 } 244 245 FILE * 246 file(const char *name, const char *mode) 247 { 248 FILE *fp; 249 250 if ((fp = fopen(name, mode)) == NULL) 251 err(1, "%s", name); 252 return(fp); 253 } 254 255 void 256 obsolete(char *argv[]) 257 { 258 int len; 259 char *ap, *p, *start; 260 261 while ((ap = *++argv)) { 262 /* Return if "--" or not an option of any form. */ 263 if (ap[0] != '-') { 264 if (ap[0] != '+') 265 return; 266 } else if (ap[1] == '-') 267 return; 268 if (!isdigit((unsigned char)ap[1])) 269 continue; 270 /* 271 * Digit signifies an old-style option. Malloc space for dash, 272 * new option and argument. 273 */ 274 len = strlen(ap); 275 if ((start = p = malloc(len + 3)) == NULL) 276 err(1, "malloc"); 277 *p++ = '-'; 278 *p++ = ap[0] == '+' ? 's' : 'f'; 279 (void)strcpy(p, ap + 1); 280 *argv = start; 281 } 282 } 283 284 static void 285 usage(void) 286 { 287 (void)fprintf(stderr, 288 "usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n"); 289 exit(1); 290 } 291 292 static size_t wcsicoll_l1_buflen = 0, wcsicoll_l2_buflen = 0; 293 static wchar_t *wcsicoll_l1_buf = NULL, *wcsicoll_l2_buf = NULL; 294 295 int 296 wcsicoll(wchar_t *s1, wchar_t *s2) 297 { 298 wchar_t *p; 299 size_t l1, l2; 300 size_t new_l1_buflen, new_l2_buflen; 301 302 l1 = wcslen(s1) + 1; 303 l2 = wcslen(s2) + 1; 304 new_l1_buflen = wcsicoll_l1_buflen; 305 new_l2_buflen = wcsicoll_l2_buflen; 306 while (new_l1_buflen < l1) { 307 if (new_l1_buflen == 0) 308 new_l1_buflen = MAXLINELEN; 309 else 310 new_l1_buflen *= 2; 311 } 312 while (new_l2_buflen < l2) { 313 if (new_l2_buflen == 0) 314 new_l2_buflen = MAXLINELEN; 315 else 316 new_l2_buflen *= 2; 317 } 318 if (new_l1_buflen > wcsicoll_l1_buflen) { 319 wcsicoll_l1_buf = reallocf(wcsicoll_l1_buf, new_l1_buflen * sizeof(*wcsicoll_l1_buf)); 320 if (wcsicoll_l1_buf == NULL) 321 err(1, "reallocf"); 322 wcsicoll_l1_buflen = new_l1_buflen; 323 } 324 if (new_l2_buflen > wcsicoll_l2_buflen) { 325 wcsicoll_l2_buf = reallocf(wcsicoll_l2_buf, new_l2_buflen * sizeof(*wcsicoll_l2_buf)); 326 if (wcsicoll_l2_buf == NULL) 327 err(1, "reallocf"); 328 wcsicoll_l2_buflen = new_l2_buflen; 329 } 330 331 for (p = wcsicoll_l1_buf; *s1; s1++) 332 *p++ = towlower(*s1); 333 *p = '\0'; 334 for (p = wcsicoll_l2_buf; *s2; s2++) 335 *p++ = towlower(*s2); 336 *p = '\0'; 337 338 return (wcscoll(wcsicoll_l1_buf, wcsicoll_l2_buf)); 339 } 340