xref: /freebsd/usr.bin/uniq/uniq.c (revision ef36b3f75658d201edb495068db5e1be49593de5)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Case Larsen.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #ifndef lint
34 static const char copyright[] =
35 "@(#) Copyright (c) 1989, 1993\n\
36 	The Regents of the University of California.  All rights reserved.\n";
37 #endif /* not lint */
38 
39 #ifndef lint
40 #if 0
41 static char sccsid[] = "@(#)uniq.c	8.3 (Berkeley) 5/4/95";
42 #endif
43 static const char rcsid[] =
44   "$FreeBSD$";
45 #endif /* not lint */
46 
47 #include <sys/capsicum.h>
48 
49 #include <ctype.h>
50 #include <err.h>
51 #include <errno.h>
52 #include <limits.h>
53 #include <locale.h>
54 #include <nl_types.h>
55 #include <stdint.h>
56 #include <stdio.h>
57 #include <stdlib.h>
58 #include <string.h>
59 #include <termios.h>
60 #include <unistd.h>
61 #include <wchar.h>
62 #include <wctype.h>
63 
64 static int cflag, dflag, uflag, iflag;
65 static int numchars, numfields, repeats;
66 
67 static FILE	*file(const char *, const char *);
68 static wchar_t	*convert(const char *);
69 static int	 inlcmp(const char *, const char *);
70 static void	 show(FILE *, const char *);
71 static wchar_t	*skip(wchar_t *);
72 static void	 obsolete(char *[]);
73 static void	 usage(void);
74 
75 static void
76 strerror_init(void)
77 {
78 
79 	/*
80 	 * Cache NLS data before entering capability mode.
81 	 * XXXPJD: There should be strerror_init() and strsignal_init() in libc.
82 	 */
83 	(void)catopen("libc", NL_CAT_LOCALE);
84 }
85 
86 int
87 main (int argc, char *argv[])
88 {
89 	wchar_t *tprev, *tthis;
90 	FILE *ifp, *ofp;
91 	int ch, comp;
92 	size_t prevbuflen, thisbuflen, b1;
93 	char *prevline, *thisline, *p;
94 	const char *ifn;
95 	cap_rights_t rights;
96 
97 	(void) setlocale(LC_ALL, "");
98 
99 	obsolete(argv);
100 	while ((ch = getopt(argc, argv, "cdif:s:u")) != -1)
101 		switch (ch) {
102 		case 'c':
103 			cflag = 1;
104 			break;
105 		case 'd':
106 			dflag = 1;
107 			break;
108 		case 'i':
109 			iflag = 1;
110 			break;
111 		case 'f':
112 			numfields = strtol(optarg, &p, 10);
113 			if (numfields < 0 || *p)
114 				errx(1, "illegal field skip value: %s", optarg);
115 			break;
116 		case 's':
117 			numchars = strtol(optarg, &p, 10);
118 			if (numchars < 0 || *p)
119 				errx(1, "illegal character skip value: %s", optarg);
120 			break;
121 		case 'u':
122 			uflag = 1;
123 			break;
124 		case '?':
125 		default:
126 			usage();
127 		}
128 
129 	argc -= optind;
130 	argv += optind;
131 
132 	if (argc > 2)
133 		usage();
134 
135 	ifp = stdin;
136 	ifn = "stdin";
137 	ofp = stdout;
138 	if (argc > 0 && strcmp(argv[0], "-") != 0)
139 		ifp = file(ifn = argv[0], "r");
140 	cap_rights_init(&rights, CAP_FSTAT, CAP_READ);
141 	if (cap_rights_limit(fileno(ifp), &rights) < 0 && errno != ENOSYS)
142 		err(1, "unable to limit rights for %s", ifn);
143 	cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE);
144 	if (argc > 1)
145 		ofp = file(argv[1], "w");
146 	else
147 		cap_rights_set(&rights, CAP_IOCTL);
148 	if (cap_rights_limit(fileno(ofp), &rights) < 0 && errno != ENOSYS) {
149 		err(1, "unable to limit rights for %s",
150 		    argc > 1 ? argv[1] : "stdout");
151 	}
152 	if (cap_rights_is_set(&rights, CAP_IOCTL)) {
153 		unsigned long cmd;
154 
155 		cmd = TIOCGETA; /* required by isatty(3) in printf(3) */
156 
157 		if (cap_ioctls_limit(fileno(ofp), &cmd, 1) < 0 &&
158 		    errno != ENOSYS) {
159 			err(1, "unable to limit ioctls for %s",
160 			    argc > 1 ? argv[1] : "stdout");
161 		}
162 	}
163 
164 	strerror_init();
165 	if (cap_enter() < 0 && errno != ENOSYS)
166 		err(1, "unable to enter capability mode");
167 
168 	prevbuflen = thisbuflen = 0;
169 	prevline = thisline = NULL;
170 
171 	if (getline(&prevline, &prevbuflen, ifp) < 0) {
172 		if (ferror(ifp))
173 			err(1, "%s", ifn);
174 		exit(0);
175 	}
176 	tprev = convert(prevline);
177 
178 	tthis = NULL;
179 	while (getline(&thisline, &thisbuflen, ifp) >= 0) {
180 		if (tthis != NULL)
181 			free(tthis);
182 		tthis = convert(thisline);
183 
184 		if (tthis == NULL && tprev == NULL)
185 			comp = inlcmp(thisline, prevline);
186 		else if (tthis == NULL || tprev == NULL)
187 			comp = 1;
188 		else
189 			comp = wcscoll(tthis, tprev);
190 
191 		if (comp) {
192 			/* If different, print; set previous to new value. */
193 			show(ofp, prevline);
194 			p = prevline;
195 			b1 = prevbuflen;
196 			prevline = thisline;
197 			prevbuflen = thisbuflen;
198 			if (tprev != NULL)
199 				free(tprev);
200 			tprev = tthis;
201 			thisline = p;
202 			thisbuflen = b1;
203 			tthis = NULL;
204 			repeats = 0;
205 		} else
206 			++repeats;
207 	}
208 	if (ferror(ifp))
209 		err(1, "%s", ifn);
210 	show(ofp, prevline);
211 	exit(0);
212 }
213 
214 static wchar_t *
215 convert(const char *str)
216 {
217 	size_t n;
218 	wchar_t *buf, *ret, *p;
219 
220 	if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1)
221 		return (NULL);
222 	if (SIZE_MAX / sizeof(*buf) < n + 1)
223 		errx(1, "conversion buffer length overflow");
224 	if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL)
225 		err(1, "malloc");
226 	if (mbstowcs(buf, str, n + 1) != n)
227 		errx(1, "internal mbstowcs() error");
228 	/* The last line may not end with \n. */
229 	if (n > 0 && buf[n - 1] == L'\n')
230 		buf[n - 1] = L'\0';
231 
232 	/* If requested get the chosen fields + character offsets. */
233 	if (numfields || numchars) {
234 		if ((ret = wcsdup(skip(buf))) == NULL)
235 			err(1, "wcsdup");
236 		free(buf);
237 	} else
238 		ret = buf;
239 
240 	if (iflag) {
241 		for (p = ret; *p != L'\0'; p++)
242 			*p = towlower(*p);
243 	}
244 
245 	return (ret);
246 }
247 
248 static int
249 inlcmp(const char *s1, const char *s2)
250 {
251 	int c1, c2;
252 
253 	while (*s1 == *s2++)
254 		if (*s1++ == '\0')
255 			return (0);
256 	c1 = (unsigned char)*s1;
257 	c2 = (unsigned char)*(s2 - 1);
258 	/* The last line may not end with \n. */
259 	if (c1 == '\n')
260 		c1 = '\0';
261 	if (c2 == '\n')
262 		c2 = '\0';
263 	return (c1 - c2);
264 }
265 
266 /*
267  * show --
268  *	Output a line depending on the flags and number of repetitions
269  *	of the line.
270  */
271 static void
272 show(FILE *ofp, const char *str)
273 {
274 
275 	if ((dflag && repeats == 0) || (uflag && repeats > 0))
276 		return;
277 	if (cflag)
278 		(void)fprintf(ofp, "%4d %s", repeats + 1, str);
279 	else
280 		(void)fprintf(ofp, "%s", str);
281 }
282 
283 static wchar_t *
284 skip(wchar_t *str)
285 {
286 	int nchars, nfields;
287 
288 	for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) {
289 		while (iswblank(*str))
290 			str++;
291 		while (*str != L'\0' && !iswblank(*str))
292 			str++;
293 	}
294 	for (nchars = numchars; nchars-- && *str != L'\0'; ++str)
295 		;
296 	return(str);
297 }
298 
299 static FILE *
300 file(const char *name, const char *mode)
301 {
302 	FILE *fp;
303 
304 	if ((fp = fopen(name, mode)) == NULL)
305 		err(1, "%s", name);
306 	return(fp);
307 }
308 
309 static void
310 obsolete(char *argv[])
311 {
312 	int len;
313 	char *ap, *p, *start;
314 
315 	while ((ap = *++argv)) {
316 		/* Return if "--" or not an option of any form. */
317 		if (ap[0] != '-') {
318 			if (ap[0] != '+')
319 				return;
320 		} else if (ap[1] == '-')
321 			return;
322 		if (!isdigit((unsigned char)ap[1]))
323 			continue;
324 		/*
325 		 * Digit signifies an old-style option.  Malloc space for dash,
326 		 * new option and argument.
327 		 */
328 		len = strlen(ap);
329 		if ((start = p = malloc(len + 3)) == NULL)
330 			err(1, "malloc");
331 		*p++ = '-';
332 		*p++ = ap[0] == '+' ? 's' : 'f';
333 		(void)strcpy(p, ap + 1);
334 		*argv = start;
335 	}
336 }
337 
338 static void
339 usage(void)
340 {
341 	(void)fprintf(stderr,
342 "usage: uniq [-c] [-d | -u] [-i] [-f fields] [-s chars] [input [output]]\n");
343 	exit(1);
344 }
345