xref: /freebsd/usr.bin/comm/comm.c (revision 13014ca04aad1931d41958b56f71a2c65b9a7a2c)
1 /*
2  * Copyright (c) 1989, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Case Larsen.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #ifndef lint
38 static const char copyright[] =
39 "@(#) Copyright (c) 1989, 1993, 1994\n\
40 	The Regents of the University of California.  All rights reserved.\n";
41 #endif
42 
43 #if 0
44 #ifndef lint
45 static char sccsid[] = "From: @(#)comm.c	8.4 (Berkeley) 5/4/95";
46 #endif
47 #endif
48 
49 #include <sys/cdefs.h>
50 __FBSDID("$FreeBSD$");
51 
52 #include <err.h>
53 #include <limits.h>
54 #include <locale.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 #include <wchar.h>
60 #include <wctype.h>
61 
62 #define	MAXLINELEN	(LINE_MAX + 1)
63 
64 const wchar_t *tabs[] = { L"", L"\t", L"\t\t" };
65 
66 FILE   *file(const char *);
67 wchar_t	*getline(wchar_t *, size_t *, FILE *);
68 void	show(FILE *, const char *, const wchar_t *, wchar_t *, size_t *);
69 int     wcsicoll(const wchar_t *, const wchar_t *);
70 static void	usage(void);
71 
72 int
73 main(int argc, char *argv[])
74 {
75 	int comp, read1, read2;
76 	int ch, flag1, flag2, flag3, iflag;
77 	FILE *fp1, *fp2;
78 	const wchar_t *col1, *col2, *col3;
79 	size_t line1len, line2len;
80 	wchar_t *line1, *line2;
81 	const wchar_t **p;
82 
83 	flag1 = flag2 = flag3 = 1;
84 	iflag = 0;
85 
86  	line1len = MAXLINELEN;
87  	line2len = MAXLINELEN;
88  	line1 = malloc(line1len * sizeof(*line1));
89  	line2 = malloc(line2len * sizeof(*line2));
90 	if (line1 == NULL || line2 == NULL)
91 		err(1, "malloc");
92 
93 	(void) setlocale(LC_ALL, "");
94 
95 	while ((ch = getopt(argc, argv, "123i")) != -1)
96 		switch(ch) {
97 		case '1':
98 			flag1 = 0;
99 			break;
100 		case '2':
101 			flag2 = 0;
102 			break;
103 		case '3':
104 			flag3 = 0;
105 			break;
106 		case 'i':
107 			iflag = 1;
108 			break;
109 		case '?':
110 		default:
111 			usage();
112 		}
113 	argc -= optind;
114 	argv += optind;
115 
116 	if (argc != 2)
117 		usage();
118 
119 	fp1 = file(argv[0]);
120 	fp2 = file(argv[1]);
121 
122 	/* for each column printed, add another tab offset */
123 	p = tabs;
124 	col1 = col2 = col3 = NULL;
125 	if (flag1)
126 		col1 = *p++;
127 	if (flag2)
128 		col2 = *p++;
129 	if (flag3)
130 		col3 = *p;
131 
132 	for (read1 = read2 = 1;;) {
133 		/* read next line, check for EOF */
134 		if (read1) {
135 			line1 = getline(line1, &line1len, fp1);
136 			if (line1 == NULL && ferror(fp1))
137 				err(1, "%s", argv[0]);
138 		}
139 		if (read2) {
140 			line2 = getline(line2, &line2len, fp2);
141 			if (line2 == NULL && ferror(fp2))
142 				err(1, "%s", argv[1]);
143 		}
144 
145 		/* if one file done, display the rest of the other file */
146 		if (line1 == NULL) {
147 			if (line2 != NULL && col2 != NULL)
148 				show(fp2, argv[1], col2, line2, &line2len);
149 			break;
150 		}
151 		if (line2 == NULL) {
152 			if (line1 != NULL && col1 != NULL)
153 				show(fp1, argv[0], col1, line1, &line1len);
154 			break;
155 		}
156 
157 		/* lines are the same */
158 		if(iflag)
159 			comp = wcsicoll(line1, line2);
160 		else
161 			comp = wcscoll(line1, line2);
162 
163 		if (!comp) {
164 			read1 = read2 = 1;
165 			if (col3 != NULL)
166 				(void)printf("%ls%ls", col3, line1);
167 			continue;
168 		}
169 
170 		/* lines are different */
171 		if (comp < 0) {
172 			read1 = 1;
173 			read2 = 0;
174 			if (col1 != NULL)
175 				(void)printf("%ls%ls", col1, line1);
176 		} else {
177 			read1 = 0;
178 			read2 = 1;
179 			if (col2 != NULL)
180 				(void)printf("%ls%ls", col2, line2);
181 		}
182 	}
183 	exit(0);
184 }
185 
186 wchar_t *
187 getline(wchar_t *buf, size_t *buflen, FILE *fp)
188 {
189 	size_t bufpos;
190 	wint_t ch;
191 
192 	bufpos = 0;
193 	do {
194 		if ((ch = getwc(fp)) != WEOF) {
195 			if (bufpos + 2 >= *buflen) {
196 				*buflen = *buflen * 2;
197 				buf = reallocf(buf, *buflen * sizeof(*buf));
198 				if (buf == NULL)
199 					return (NULL);
200 			}
201 			buf[bufpos++] = ch;
202 		}
203 	} while (ch != WEOF && ch != '\n');
204 	if (bufpos + 1 != *buflen)
205 		buf[bufpos] = '\0';
206 
207 	return (bufpos != 0 || ch == '\n' ? buf : NULL);
208 }
209 
210 void
211 show(FILE *fp, const char *fn, const wchar_t *offset, wchar_t *buf, size_t *buflen)
212 {
213 
214 	do {
215 		(void)printf("%ls%ls", offset, buf);
216 	} while ((buf = getline(buf, buflen, fp)) != NULL);
217 	if (ferror(fp))
218 		err(1, "%s", fn);
219 }
220 
221 FILE *
222 file(const char *name)
223 {
224 	FILE *fp;
225 
226 	if (!strcmp(name, "-"))
227 		return (stdin);
228 	if ((fp = fopen(name, "r")) == NULL) {
229 		err(1, "%s", name);
230 	}
231 	return (fp);
232 }
233 
234 static void
235 usage(void)
236 {
237 	(void)fprintf(stderr, "usage: comm [-123i] file1 file2\n");
238 	exit(1);
239 }
240 
241 static size_t wcsicoll_l1_buflen = 0, wcsicoll_l2_buflen = 0;
242 static wchar_t *wcsicoll_l1_buf = NULL, *wcsicoll_l2_buf = NULL;
243 
244 int
245 wcsicoll(const wchar_t *s1, const wchar_t *s2)
246 {
247 	wchar_t *p;
248 	size_t l1, l2;
249 	size_t new_l1_buflen, new_l2_buflen;
250 
251 	l1 = wcslen(s1) + 1;
252 	l2 = wcslen(s2) + 1;
253 	new_l1_buflen = wcsicoll_l1_buflen;
254 	new_l2_buflen = wcsicoll_l2_buflen;
255 	while (new_l1_buflen < l1) {
256 		if (new_l1_buflen == 0)
257 			new_l1_buflen = MAXLINELEN;
258 		else
259 			new_l1_buflen *= 2;
260 	}
261 	while (new_l2_buflen < l2) {
262 		if (new_l2_buflen == 0)
263 			new_l2_buflen = MAXLINELEN;
264 		else
265 			new_l2_buflen *= 2;
266 	}
267 	if (new_l1_buflen > wcsicoll_l1_buflen) {
268 		wcsicoll_l1_buf = reallocf(wcsicoll_l1_buf, new_l1_buflen * sizeof(*wcsicoll_l1_buf));
269 		if (wcsicoll_l1_buf == NULL)
270                 	err(1, "reallocf");
271 		wcsicoll_l1_buflen = new_l1_buflen;
272 	}
273 	if (new_l2_buflen > wcsicoll_l2_buflen) {
274 		wcsicoll_l2_buf = reallocf(wcsicoll_l2_buf, new_l2_buflen * sizeof(*wcsicoll_l2_buf));
275 		if (wcsicoll_l2_buf == NULL)
276                 	err(1, "reallocf");
277 		wcsicoll_l2_buflen = new_l2_buflen;
278 	}
279 
280 	for (p = wcsicoll_l1_buf; *s1; s1++)
281 		*p++ = towlower(*s1);
282 	*p = '\0';
283 	for (p = wcsicoll_l2_buf; *s2; s2++)
284 		*p++ = towlower(*s2);
285 	*p = '\0';
286 
287 	return (wcscoll(wcsicoll_l1_buf, wcsicoll_l2_buf));
288 }
289