1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Case Larsen. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #ifndef lint 36 static const char copyright[] = 37 "@(#) Copyright (c) 1989, 1993\n\ 38 The Regents of the University of California. All rights reserved.\n"; 39 #endif /* not lint */ 40 41 #ifndef lint 42 #if 0 43 static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95"; 44 #endif 45 static const char rcsid[] = 46 "$FreeBSD$"; 47 #endif /* not lint */ 48 49 #include <sys/capsicum.h> 50 51 #include <capsicum_helpers.h> 52 #include <ctype.h> 53 #include <err.h> 54 #include <errno.h> 55 #include <getopt.h> 56 #include <limits.h> 57 #include <locale.h> 58 #include <nl_types.h> 59 #include <stdint.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <string.h> 63 #include <termios.h> 64 #include <unistd.h> 65 #include <wchar.h> 66 #include <wctype.h> 67 68 static int Dflag, cflag, dflag, uflag, iflag; 69 static int numchars, numfields, repeats; 70 71 /* Dflag values */ 72 #define DF_NONE 0 73 #define DF_NOSEP 1 74 #define DF_PRESEP 2 75 #define DF_POSTSEP 3 76 77 static const struct option long_opts[] = 78 { 79 {"all-repeated",optional_argument, NULL, 'D'}, 80 {"count", no_argument, NULL, 'c'}, 81 {"repeated", no_argument, NULL, 'd'}, 82 {"skip-fields", required_argument, NULL, 'f'}, 83 {"ignore-case", no_argument, NULL, 'i'}, 84 {"skip-chars", required_argument, NULL, 's'}, 85 {"unique", no_argument, NULL, 'u'}, 86 {NULL, no_argument, NULL, 0} 87 }; 88 89 static FILE *file(const char *, const char *); 90 static wchar_t *convert(const char *); 91 static int inlcmp(const char *, const char *); 92 static void show(FILE *, const char *); 93 static wchar_t *skip(wchar_t *); 94 static void obsolete(char *[]); 95 static void usage(void); 96 97 int 98 main (int argc, char *argv[]) 99 { 100 wchar_t *tprev, *tthis; 101 FILE *ifp, *ofp; 102 int ch, comp; 103 size_t prevbuflen, thisbuflen, b1; 104 char *prevline, *thisline, *p; 105 const char *ifn; 106 cap_rights_t rights; 107 108 (void) setlocale(LC_ALL, ""); 109 110 obsolete(argv); 111 while ((ch = getopt_long(argc, argv, "+D::cdif:s:u", long_opts, 112 NULL)) != -1) 113 switch (ch) { 114 case 'D': 115 if (optarg == NULL || strcasecmp(optarg, "none") == 0) 116 Dflag = DF_NOSEP; 117 else if (strcasecmp(optarg, "prepend") == 0) 118 Dflag = DF_PRESEP; 119 else if (strcasecmp(optarg, "separate") == 0) 120 Dflag = DF_POSTSEP; 121 else 122 usage(); 123 break; 124 case 'c': 125 cflag = 1; 126 break; 127 case 'd': 128 dflag = 1; 129 break; 130 case 'i': 131 iflag = 1; 132 break; 133 case 'f': 134 numfields = strtol(optarg, &p, 10); 135 if (numfields < 0 || *p) 136 errx(1, "illegal field skip value: %s", optarg); 137 break; 138 case 's': 139 numchars = strtol(optarg, &p, 10); 140 if (numchars < 0 || *p) 141 errx(1, "illegal character skip value: %s", optarg); 142 break; 143 case 'u': 144 uflag = 1; 145 break; 146 case '?': 147 default: 148 usage(); 149 } 150 151 argc -= optind; 152 argv += optind; 153 154 if (argc > 2) 155 usage(); 156 157 ifp = stdin; 158 ifn = "stdin"; 159 ofp = stdout; 160 if (argc > 0 && strcmp(argv[0], "-") != 0) 161 ifp = file(ifn = argv[0], "r"); 162 cap_rights_init(&rights, CAP_FSTAT, CAP_READ); 163 if (caph_rights_limit(fileno(ifp), &rights) < 0) 164 err(1, "unable to limit rights for %s", ifn); 165 cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE); 166 if (argc > 1) 167 ofp = file(argv[1], "w"); 168 else 169 cap_rights_set(&rights, CAP_IOCTL); 170 if (caph_rights_limit(fileno(ofp), &rights) < 0) { 171 err(1, "unable to limit rights for %s", 172 argc > 1 ? argv[1] : "stdout"); 173 } 174 if (cap_rights_is_set(&rights, CAP_IOCTL)) { 175 unsigned long cmd; 176 177 cmd = TIOCGETA; /* required by isatty(3) in printf(3) */ 178 179 if (caph_ioctls_limit(fileno(ofp), &cmd, 1) < 0) { 180 err(1, "unable to limit ioctls for %s", 181 argc > 1 ? argv[1] : "stdout"); 182 } 183 } 184 185 caph_cache_catpages(); 186 if (caph_enter() < 0) 187 err(1, "unable to enter capability mode"); 188 189 prevbuflen = thisbuflen = 0; 190 prevline = thisline = NULL; 191 192 if (getline(&prevline, &prevbuflen, ifp) < 0) { 193 if (ferror(ifp)) 194 err(1, "%s", ifn); 195 exit(0); 196 } 197 tprev = convert(prevline); 198 199 tthis = NULL; 200 while (getline(&thisline, &thisbuflen, ifp) >= 0) { 201 if (tthis != NULL) 202 free(tthis); 203 tthis = convert(thisline); 204 205 if (tthis == NULL && tprev == NULL) 206 comp = inlcmp(thisline, prevline); 207 else if (tthis == NULL || tprev == NULL) 208 comp = 1; 209 else 210 comp = wcscoll(tthis, tprev); 211 212 if (comp) { 213 /* If different, print; set previous to new value. */ 214 if (Dflag == DF_POSTSEP && repeats > 0) 215 fputc('\n', ofp); 216 if (!Dflag) 217 show(ofp, prevline); 218 p = prevline; 219 b1 = prevbuflen; 220 prevline = thisline; 221 prevbuflen = thisbuflen; 222 if (tprev != NULL) 223 free(tprev); 224 tprev = tthis; 225 thisline = p; 226 thisbuflen = b1; 227 tthis = NULL; 228 repeats = 0; 229 } else { 230 if (Dflag) { 231 if (repeats == 0) { 232 if (Dflag == DF_PRESEP) 233 fputc('\n', ofp); 234 show(ofp, prevline); 235 } 236 show(ofp, thisline); 237 } 238 ++repeats; 239 } 240 } 241 if (ferror(ifp)) 242 err(1, "%s", ifn); 243 if (!Dflag) 244 show(ofp, prevline); 245 exit(0); 246 } 247 248 static wchar_t * 249 convert(const char *str) 250 { 251 size_t n; 252 wchar_t *buf, *ret, *p; 253 254 if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1) 255 return (NULL); 256 if (SIZE_MAX / sizeof(*buf) < n + 1) 257 errx(1, "conversion buffer length overflow"); 258 if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL) 259 err(1, "malloc"); 260 if (mbstowcs(buf, str, n + 1) != n) 261 errx(1, "internal mbstowcs() error"); 262 /* The last line may not end with \n. */ 263 if (n > 0 && buf[n - 1] == L'\n') 264 buf[n - 1] = L'\0'; 265 266 /* If requested get the chosen fields + character offsets. */ 267 if (numfields || numchars) { 268 if ((ret = wcsdup(skip(buf))) == NULL) 269 err(1, "wcsdup"); 270 free(buf); 271 } else 272 ret = buf; 273 274 if (iflag) { 275 for (p = ret; *p != L'\0'; p++) 276 *p = towlower(*p); 277 } 278 279 return (ret); 280 } 281 282 static int 283 inlcmp(const char *s1, const char *s2) 284 { 285 int c1, c2; 286 287 while (*s1 == *s2++) 288 if (*s1++ == '\0') 289 return (0); 290 c1 = (unsigned char)*s1; 291 c2 = (unsigned char)*(s2 - 1); 292 /* The last line may not end with \n. */ 293 if (c1 == '\n') 294 c1 = '\0'; 295 if (c2 == '\n') 296 c2 = '\0'; 297 return (c1 - c2); 298 } 299 300 /* 301 * show -- 302 * Output a line depending on the flags and number of repetitions 303 * of the line. 304 */ 305 static void 306 show(FILE *ofp, const char *str) 307 { 308 309 if ((!Dflag && dflag && repeats == 0) || (uflag && repeats > 0)) 310 return; 311 if (cflag) 312 (void)fprintf(ofp, "%4d %s", repeats + 1, str); 313 else 314 (void)fprintf(ofp, "%s", str); 315 } 316 317 static wchar_t * 318 skip(wchar_t *str) 319 { 320 int nchars, nfields; 321 322 for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) { 323 while (iswblank(*str)) 324 str++; 325 while (*str != L'\0' && !iswblank(*str)) 326 str++; 327 } 328 for (nchars = numchars; nchars-- && *str != L'\0'; ++str) 329 ; 330 return(str); 331 } 332 333 static FILE * 334 file(const char *name, const char *mode) 335 { 336 FILE *fp; 337 338 if ((fp = fopen(name, mode)) == NULL) 339 err(1, "%s", name); 340 return(fp); 341 } 342 343 static void 344 obsolete(char *argv[]) 345 { 346 int len; 347 char *ap, *p, *start; 348 349 while ((ap = *++argv)) { 350 /* Return if "--" or not an option of any form. */ 351 if (ap[0] != '-') { 352 if (ap[0] != '+') 353 return; 354 } else if (ap[1] == '-') 355 return; 356 if (!isdigit((unsigned char)ap[1])) 357 continue; 358 /* 359 * Digit signifies an old-style option. Malloc space for dash, 360 * new option and argument. 361 */ 362 len = strlen(ap); 363 if ((start = p = malloc(len + 3)) == NULL) 364 err(1, "malloc"); 365 *p++ = '-'; 366 *p++ = ap[0] == '+' ? 's' : 'f'; 367 (void)strcpy(p, ap + 1); 368 *argv = start; 369 } 370 } 371 372 static void 373 usage(void) 374 { 375 (void)fprintf(stderr, 376 "usage: uniq [-c | -d | -D | -u] [-i] [-f fields] [-s chars] [input [output]]\n"); 377 exit(1); 378 } 379