1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Case Larsen. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #ifndef lint 38 static const char copyright[] = 39 "@(#) Copyright (c) 1989, 1993\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41 #endif /* not lint */ 42 43 #ifndef lint 44 #if 0 45 static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95"; 46 #endif 47 static const char rcsid[] = 48 "$FreeBSD$"; 49 #endif /* not lint */ 50 51 #include <ctype.h> 52 #include <err.h> 53 #include <limits.h> 54 #include <locale.h> 55 #include <stdint.h> 56 #define _WITH_GETLINE 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <string.h> 60 #include <unistd.h> 61 #include <wchar.h> 62 #include <wctype.h> 63 64 int cflag, dflag, uflag, iflag; 65 int numchars, numfields, repeats; 66 67 FILE *file(const char *, const char *); 68 wchar_t *convert(const char *); 69 int inlcmp(const char *, const char *); 70 void show(FILE *, const char *); 71 wchar_t *skip(wchar_t *); 72 void obsolete(char *[]); 73 static void usage(void); 74 75 int 76 main (int argc, char *argv[]) 77 { 78 wchar_t *tprev, *tthis; 79 FILE *ifp, *ofp; 80 int ch, comp; 81 size_t prevbuflen, thisbuflen, b1; 82 char *prevline, *thisline, *p; 83 const char *ifn; 84 85 (void) setlocale(LC_ALL, ""); 86 87 obsolete(argv); 88 while ((ch = getopt(argc, argv, "cdif:s:u")) != -1) 89 switch (ch) { 90 case 'c': 91 cflag = 1; 92 break; 93 case 'd': 94 dflag = 1; 95 break; 96 case 'i': 97 iflag = 1; 98 break; 99 case 'f': 100 numfields = strtol(optarg, &p, 10); 101 if (numfields < 0 || *p) 102 errx(1, "illegal field skip value: %s", optarg); 103 break; 104 case 's': 105 numchars = strtol(optarg, &p, 10); 106 if (numchars < 0 || *p) 107 errx(1, "illegal character skip value: %s", optarg); 108 break; 109 case 'u': 110 uflag = 1; 111 break; 112 case '?': 113 default: 114 usage(); 115 } 116 117 argc -= optind; 118 argv += optind; 119 120 /* If no flags are set, default is -d -u. */ 121 if (cflag) { 122 if (dflag || uflag) 123 usage(); 124 } else if (!dflag && !uflag) 125 dflag = uflag = 1; 126 127 if (argc > 2) 128 usage(); 129 130 ifp = stdin; 131 ifn = "stdin"; 132 ofp = stdout; 133 if (argc > 0 && strcmp(argv[0], "-") != 0) 134 ifp = file(ifn = argv[0], "r"); 135 if (argc > 1) 136 ofp = file(argv[1], "w"); 137 138 prevbuflen = thisbuflen = 0; 139 prevline = thisline = NULL; 140 141 if (getline(&prevline, &prevbuflen, ifp) < 0) { 142 if (ferror(ifp)) 143 err(1, "%s", ifn); 144 exit(0); 145 } 146 tprev = convert(prevline); 147 148 if (!cflag && uflag && dflag) 149 show(ofp, prevline); 150 151 tthis = NULL; 152 while (getline(&thisline, &thisbuflen, ifp) >= 0) { 153 if (tthis != NULL) 154 free(tthis); 155 tthis = convert(thisline); 156 157 if (tthis == NULL && tprev == NULL) 158 comp = inlcmp(thisline, prevline); 159 else if (tthis == NULL || tprev == NULL) 160 comp = 1; 161 else 162 comp = wcscoll(tthis, tprev); 163 164 if (comp) { 165 /* If different, print; set previous to new value. */ 166 if (cflag || !dflag || !uflag) 167 show(ofp, prevline); 168 p = prevline; 169 b1 = prevbuflen; 170 prevline = thisline; 171 prevbuflen = thisbuflen; 172 if (tprev != NULL) 173 free(tprev); 174 tprev = tthis; 175 if (!cflag && uflag && dflag) 176 show(ofp, prevline); 177 thisline = p; 178 thisbuflen = b1; 179 tthis = NULL; 180 repeats = 0; 181 } else 182 ++repeats; 183 } 184 if (ferror(ifp)) 185 err(1, "%s", ifn); 186 if (cflag || !dflag || !uflag) 187 show(ofp, prevline); 188 exit(0); 189 } 190 191 wchar_t * 192 convert(const char *str) 193 { 194 size_t n; 195 wchar_t *buf, *ret, *p; 196 197 if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1) 198 return (NULL); 199 if (SIZE_MAX / sizeof(*buf) < n + 1) 200 errx(1, "conversion buffer length overflow"); 201 if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL) 202 err(1, "malloc"); 203 if (mbstowcs(buf, str, n + 1) != n) 204 errx(1, "internal mbstowcs() error"); 205 /* The last line may not end with \n. */ 206 if (n > 0 && buf[n - 1] == L'\n') 207 buf[n - 1] = L'\0'; 208 209 /* If requested get the chosen fields + character offsets. */ 210 if (numfields || numchars) { 211 if ((ret = wcsdup(skip(buf))) == NULL) 212 err(1, "wcsdup"); 213 free(buf); 214 } else 215 ret = buf; 216 217 if (iflag) { 218 for (p = ret; *p != L'\0'; p++) 219 *p = towlower(*p); 220 } 221 222 return (ret); 223 } 224 225 int 226 inlcmp(const char *s1, const char *s2) 227 { 228 int c1, c2; 229 230 while (*s1 == *s2++) 231 if (*s1++ == '\0') 232 return (0); 233 c1 = (unsigned char)*s1; 234 c2 = (unsigned char)*(s2 - 1); 235 /* The last line may not end with \n. */ 236 if (c1 == '\n') 237 c1 = '\0'; 238 if (c2 == '\n') 239 c2 = '\0'; 240 return (c1 - c2); 241 } 242 243 /* 244 * show -- 245 * Output a line depending on the flags and number of repetitions 246 * of the line. 247 */ 248 void 249 show(FILE *ofp, const char *str) 250 { 251 252 if (cflag) 253 (void)fprintf(ofp, "%4d %s", repeats + 1, str); 254 if ((dflag && repeats) || (uflag && !repeats)) 255 (void)fprintf(ofp, "%s", str); 256 } 257 258 wchar_t * 259 skip(wchar_t *str) 260 { 261 int nchars, nfields; 262 263 for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) { 264 while (iswblank(*str)) 265 str++; 266 while (*str != L'\0' && !iswblank(*str)) 267 str++; 268 } 269 for (nchars = numchars; nchars-- && *str != L'\0'; ++str) 270 ; 271 return(str); 272 } 273 274 FILE * 275 file(const char *name, const char *mode) 276 { 277 FILE *fp; 278 279 if ((fp = fopen(name, mode)) == NULL) 280 err(1, "%s", name); 281 return(fp); 282 } 283 284 void 285 obsolete(char *argv[]) 286 { 287 int len; 288 char *ap, *p, *start; 289 290 while ((ap = *++argv)) { 291 /* Return if "--" or not an option of any form. */ 292 if (ap[0] != '-') { 293 if (ap[0] != '+') 294 return; 295 } else if (ap[1] == '-') 296 return; 297 if (!isdigit((unsigned char)ap[1])) 298 continue; 299 /* 300 * Digit signifies an old-style option. Malloc space for dash, 301 * new option and argument. 302 */ 303 len = strlen(ap); 304 if ((start = p = malloc(len + 3)) == NULL) 305 err(1, "malloc"); 306 *p++ = '-'; 307 *p++ = ap[0] == '+' ? 's' : 'f'; 308 (void)strcpy(p, ap + 1); 309 *argv = start; 310 } 311 } 312 313 static void 314 usage(void) 315 { 316 (void)fprintf(stderr, 317 "usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n"); 318 exit(1); 319 } 320