1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Case Larsen. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #ifndef lint 38 static const char copyright[] = 39 "@(#) Copyright (c) 1989, 1993\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41 #endif /* not lint */ 42 43 #ifndef lint 44 #if 0 45 static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95"; 46 #endif 47 static const char rcsid[] = 48 "$FreeBSD$"; 49 #endif /* not lint */ 50 51 #include <ctype.h> 52 #include <err.h> 53 #include <limits.h> 54 #include <locale.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <unistd.h> 59 60 #define MAXLINELEN (LINE_MAX + 1) 61 62 int cflag, dflag, uflag; 63 int numchars, numfields, repeats; 64 65 FILE *file(const char *, const char *); 66 char *getline(char *, size_t, FILE *); 67 void show(FILE *, char *); 68 char *skip(char *); 69 void obsolete(char *[]); 70 static void usage(void); 71 int stricoll(char *, char*); 72 73 int 74 main (int argc, char *argv[]) 75 { 76 char *t1, *t2; 77 FILE *ifp, *ofp; 78 int ch; 79 char *prevline, *thisline, *p; 80 int iflag = 0, comp; 81 82 (void) setlocale(LC_ALL, ""); 83 84 obsolete(argv); 85 while ((ch = getopt(argc, argv, "cdif:s:u")) != -1) 86 switch (ch) { 87 case 'c': 88 cflag = 1; 89 break; 90 case 'd': 91 dflag = 1; 92 break; 93 case 'i': 94 iflag = 1; 95 break; 96 case 'f': 97 numfields = strtol(optarg, &p, 10); 98 if (numfields < 0 || *p) 99 errx(1, "illegal field skip value: %s", optarg); 100 break; 101 case 's': 102 numchars = strtol(optarg, &p, 10); 103 if (numchars < 0 || *p) 104 errx(1, "illegal character skip value: %s", optarg); 105 break; 106 case 'u': 107 uflag = 1; 108 break; 109 case '?': 110 default: 111 usage(); 112 } 113 114 argc -= optind; 115 argv +=optind; 116 117 /* If no flags are set, default is -d -u. */ 118 if (cflag) { 119 if (dflag || uflag) 120 usage(); 121 } else if (!dflag && !uflag) 122 dflag = uflag = 1; 123 124 if (argc > 2) 125 usage(); 126 127 ifp = stdin; 128 ofp = stdout; 129 if (argc > 0 && strcmp(argv[0], "-") != 0) 130 ifp = file(argv[0], "r"); 131 if (argc > 1) 132 ofp = file(argv[1], "w"); 133 134 prevline = malloc(MAXLINELEN); 135 thisline = malloc(MAXLINELEN); 136 if (prevline == NULL || thisline == NULL) 137 err(1, "malloc"); 138 139 if (getline(prevline, MAXLINELEN, ifp) == NULL) 140 exit(0); 141 if (!cflag && uflag && dflag) 142 show(ofp, prevline); 143 144 while (getline(thisline, MAXLINELEN, ifp)) { 145 /* If requested get the chosen fields + character offsets. */ 146 if (numfields || numchars) { 147 t1 = skip(thisline); 148 t2 = skip(prevline); 149 } else { 150 t1 = thisline; 151 t2 = prevline; 152 } 153 154 /* If different, print; set previous to new value. */ 155 if (iflag) 156 comp = stricoll(t1, t2); 157 else 158 comp = strcoll(t1, t2); 159 160 if (comp) { 161 if (cflag || !dflag || !uflag) 162 show(ofp, prevline); 163 t1 = prevline; 164 prevline = thisline; 165 if (!cflag && uflag && dflag) 166 show(ofp, prevline); 167 thisline = t1; 168 repeats = 0; 169 } else 170 ++repeats; 171 } 172 if (cflag || !dflag || !uflag) 173 show(ofp, prevline); 174 exit(0); 175 } 176 177 char * 178 getline(char *buf, size_t buflen, FILE *fp) 179 { 180 size_t bufpos; 181 int ch; 182 183 bufpos = 0; 184 while (bufpos + 2 != buflen && (ch = getc(fp)) != EOF && ch != '\n') 185 buf[bufpos++] = ch; 186 if (bufpos + 1 != buflen) 187 buf[bufpos] = '\0'; 188 while (ch != EOF && ch != '\n') 189 ch = getc(fp); 190 191 return (bufpos != 0 || ch == '\n' ? buf : NULL); 192 } 193 194 /* 195 * show -- 196 * Output a line depending on the flags and number of repetitions 197 * of the line. 198 */ 199 void 200 show(FILE *ofp, char *str) 201 { 202 203 if (cflag && *str) 204 (void)fprintf(ofp, "%4d %s\n", repeats + 1, str); 205 if ((dflag && repeats) || (uflag && !repeats)) 206 (void)fprintf(ofp, "%s\n", str); 207 } 208 209 char * 210 skip(char *str) 211 { 212 int nchars, nfields; 213 214 for (nfields = 0; *str != '\0' && nfields++ != numfields; ) { 215 while (isblank((unsigned char)*str)) 216 str++; 217 while (*str != '\0' && !isblank((unsigned char)*str)) 218 str++; 219 } 220 for (nchars = numchars; nchars-- && *str; ++str); 221 return(str); 222 } 223 224 FILE * 225 file(const char *name, const char *mode) 226 { 227 FILE *fp; 228 229 if ((fp = fopen(name, mode)) == NULL) 230 err(1, "%s", name); 231 return(fp); 232 } 233 234 void 235 obsolete(char *argv[]) 236 { 237 int len; 238 char *ap, *p, *start; 239 240 while ((ap = *++argv)) { 241 /* Return if "--" or not an option of any form. */ 242 if (ap[0] != '-') { 243 if (ap[0] != '+') 244 return; 245 } else if (ap[1] == '-') 246 return; 247 if (!isdigit((unsigned char)ap[1])) 248 continue; 249 /* 250 * Digit signifies an old-style option. Malloc space for dash, 251 * new option and argument. 252 */ 253 len = strlen(ap); 254 if ((start = p = malloc(len + 3)) == NULL) 255 err(1, "malloc"); 256 *p++ = '-'; 257 *p++ = ap[0] == '+' ? 's' : 'f'; 258 (void)strcpy(p, ap + 1); 259 *argv = start; 260 } 261 } 262 263 static void 264 usage(void) 265 { 266 (void)fprintf(stderr, 267 "usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n"); 268 exit(1); 269 } 270 271 int 272 stricoll(char *s1, char *s2) 273 { 274 char *p, line1[MAXLINELEN], line2[MAXLINELEN]; 275 276 for (p = line1; *s1; s1++) 277 *p++ = tolower((unsigned char)*s1); 278 *p = '\0'; 279 for (p = line2; *s2; s2++) 280 *p++ = tolower((unsigned char)*s2); 281 *p = '\0'; 282 return strcoll(line1, line2); 283 } 284