1 /*
2 * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
3 */
4
5 /***********************************************************************
6 * *
7 * This software is part of the ast package *
8 * Copyright (c) 1992-2012 AT&T Intellectual Property *
9 * and is licensed under the *
10 * Eclipse Public License, Version 1.0 *
11 * by AT&T Intellectual Property *
12 * *
13 * A copy of the License is available at *
14 * http://www.eclipse.org/org/documents/epl-v10.html *
15 * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
16 * *
17 * Information and Software Systems Research *
18 * AT&T Research *
19 * Florham Park NJ *
20 * *
21 * Glenn Fowler <gsf@research.att.com> *
22 * David Korn <dgk@research.att.com> *
23 * *
24 ***********************************************************************/
25 #pragma prototyped
26 /*
27 * David Korn
28 * Glenn Fowler
29 * AT&T Bell Laboratories
30 *
31 * cmp
32 */
33
34 static const char usage[] =
35 "[-?\n@(#)$Id: cmp (AT&T Research) 2010-04-11 $\n]"
36 USAGE_LICENSE
37 "[+NAME?cmp - compare two files]"
38 "[+DESCRIPTION?\bcmp\b compares two files \afile1\a and \afile2\a. "
39 "\bcmp\b writes no output if the files are the same. By default, if the "
40 "files differ, the byte and line number at which the first difference "
41 "occurred are written to standard output. Bytes and lines are numbered "
42 "beginning with 1.]"
43 "[+?If \askip1\a or \askip2\a are specified, or the \b-i\b option is "
44 "specified, initial bytes of the corresponding file are skipped before "
45 "beginning the compare. The skip values are in bytes or can have a "
46 "suffix of \bk\b for kilobytes or \bm\b for megabytes.]"
47 "[+?If either \afile1\a or \afiles2\a is \b-\b, \bcmp\b uses standard "
48 "input starting at the current location.]"
49 "[b:print-bytes?Print differing bytes as 3 digit octal values.]"
50 "[c:print-chars?Print differing bytes as follows: non-space printable "
51 "characters as themselves; space and control characters as \b^\b "
52 "followed by a letter of the alphabet; and characters with the high bit "
53 "set as the lower 7 bit character prefixed by \bM^\b for 7 bit space and "
54 "non-printable characters and \bM-\b for all other characters. If the 7 "
55 "bit character encoding is not ASCII then the characters are converted "
56 "to ASCII to determine \ahigh bit set\a, and if set it is cleared and "
57 "converted back to the native encoding. Multibyte characters in the "
58 "current locale are treated as printable characters.]"
59 "[d:differences?Print at most \adifferences\a differences using "
60 "\b--verbose\b output format. \b--differences=0\b is equivalent to "
61 "\b--silent\b.]#[differences]"
62 "[i:ignore-initial|skip?Skip the the first \askip1\a bytes in \afile1\a "
63 "and the first \askip2\a bytes in \afile2\a. If \askip2\a is omitted "
64 "then \askip1\a is used.]:[skip1[::skip2]]:=0::0]"
65 "[l:verbose?Write the decimal byte number and the differing bytes (in "
66 "octal) for each difference.]"
67 "[n:count|bytes?Compare at most \acount\a bytes.]#[count]"
68 "[s:quiet|silent?Write nothing for differing files; return non-zero exit "
69 "status only.]"
70 "\n"
71 "\nfile1 file2 [skip1 [skip2]]\n"
72 "\n"
73 "[+EXIT STATUS?]"
74 "{"
75 "[+0?The files or portions compared are identical.]"
76 "[+1?The files are different.]"
77 "[+>1?An error occurred.]"
78 "}"
79 "[+SEE ALSO?\bcomm\b(1), \bdiff\b(1), \bcat\b(1)]"
80 ;
81
82 #include <cmd.h>
83 #include <ls.h>
84 #include <ctype.h>
85 #include <ccode.h>
86
87 #define CMP_VERBOSE 0x01
88 #define CMP_SILENT 0x02
89 #define CMP_CHARS 0x04
90 #define CMP_BYTES 0x08
91
92 static void
pretty(Sfio_t * out,int o,int delim,int flags)93 pretty(Sfio_t *out, int o, int delim, int flags)
94 {
95 int c;
96 int m;
97 char* s;
98 char buf[10];
99
100 s = buf;
101 if ((flags & CMP_BYTES) || !(flags & CMP_CHARS))
102 {
103 *s++ = ' ';
104 if ((flags & CMP_CHARS) && delim != -1)
105 *s++ = ' ';
106 *s++ = '0' + ((o >> 6) & 07);
107 *s++ = '0' + ((o >> 3) & 07);
108 *s++ = '0' + (o & 07);
109 }
110 if (flags & CMP_CHARS)
111 {
112 *s++ = ' ';
113 c = ccmapc(o, CC_NATIVE, CC_ASCII);
114 if (c & 0x80)
115 {
116 m = 1;
117 *s++ = 'M';
118 c &= 0x7f;
119 o = ccmapc(c, CC_ASCII, CC_NATIVE);
120 }
121 else
122 m = 0;
123 if (isspace(o) || !isprint(o))
124 {
125 if (!m)
126 *s++ = ' ';
127 *s++ = '^';
128 c ^= 0x40;
129 o = ccmapc(c, CC_ASCII, CC_NATIVE);
130 }
131 else if (m)
132 *s++ = '-';
133 else
134 {
135 *s++ = ' ';
136 *s++ = ' ';
137 }
138 *s++ = o;
139 }
140 *s = 0;
141 sfputr(out, buf, delim);
142 }
143
144 /*
145 * compare two files
146 */
147
148 static int
cmp(const char * file1,Sfio_t * f1,const char * file2,Sfio_t * f2,int flags,Sfoff_t count,Sfoff_t differences)149 cmp(const char* file1, Sfio_t* f1, const char* file2, Sfio_t* f2, int flags, Sfoff_t count, Sfoff_t differences)
150 {
151 register int c1;
152 register int c2;
153 register unsigned char* p1 = 0;
154 register unsigned char* p2 = 0;
155 register Sfoff_t lines = 1;
156 register unsigned char* e1 = 0;
157 register unsigned char* e2 = 0;
158 Sfoff_t pos = 0;
159 int n1 = 0;
160 int ret = 0;
161 unsigned char* last;
162
163 for (;;)
164 {
165 if ((c1 = e1 - p1) <= 0)
166 {
167 if (count > 0 && !(count -= n1))
168 return ret;
169 if (!(p1 = (unsigned char*)sfreserve(f1, SF_UNBOUND, 0)) || (c1 = sfvalue(f1)) <= 0)
170 {
171 if (sferror(f1)) {
172 error(ERROR_exit(2),
173 "read error on %s", file1);
174 }
175 if ((e2 - p2) > 0 || sfreserve(f2, SF_UNBOUND, 0) && sfvalue(f2) > 0)
176 {
177 ret = 1;
178 if (!(flags & CMP_SILENT))
179 error(ERROR_exit(1), "EOF on %s", file1);
180 }
181 if (sferror(f2)) {
182 error(ERROR_exit(2),
183 "read error on %s", file2);
184 }
185 return ret;
186 }
187 if (count > 0 && c1 > count)
188 c1 = (int)count;
189 e1 = p1 + c1;
190 n1 = c1;
191 }
192 if ((c2 = e2 - p2) <= 0)
193 {
194 if (!(p2 = (unsigned char*)sfreserve(f2, SF_UNBOUND, 0)) || (c2 = sfvalue(f2)) <= 0)
195 {
196 if (sferror(f2)) {
197 error(ERROR_exit(2),
198 "read error on %s", file2);
199 }
200 if (!(flags & CMP_SILENT))
201 error(ERROR_exit(1), "EOF on %s", file2);
202 return 1;
203 }
204 e2 = p2 + c2;
205 }
206 if (c1 > c2)
207 c1 = c2;
208 pos += c1;
209 if (flags & CMP_SILENT)
210 {
211 if (memcmp(p1, p2, c1))
212 return 1;
213 p1 += c1;
214 p2 += c1;
215 }
216 else
217 {
218 last = p1 + c1;
219 while (p1 < last)
220 {
221 if ((c1 = *p1++) != *p2++)
222 {
223 if (differences >= 0)
224 {
225 if (!differences)
226 return 1;
227 differences--;
228 }
229 #if 0
230 if (!flags)
231 sfprintf(sfstdout, "%s %s differ: char %I*d, line %I*u\n", file1, file2, sizeof(pos), pos - (last - p1), sizeof(lines), lines);
232 else
233 {
234 sfprintf(sfstdout, "%6I*d", sizeof(pos), pos - (last - p1));
235 pretty(sfstdout, c1, -1, flags);
236 pretty(sfstdout, *(p2-1), '\n', flags);
237 }
238 #else
239 if (flags & CMP_VERBOSE)
240 sfprintf(sfstdout, "%6I*d", sizeof(pos), pos - (last - p1));
241 else
242 sfprintf(sfstdout, "%s %s differ: char %I*d, line %I*u", file1, file2, sizeof(pos), pos - (last - p1), sizeof(lines), lines);
243 if (flags & (CMP_BYTES|CMP_CHARS|CMP_VERBOSE))
244 {
245 sfputc(sfstdout, (flags & CMP_VERBOSE) ? ' ' : ',');
246 pretty(sfstdout, c1, -1, flags);
247 pretty(sfstdout, *(p2-1), '\n', flags);
248 }
249 else
250 sfputc(sfstdout, '\n');
251 #endif
252 if (!differences || differences < 0 && !(flags & CMP_VERBOSE))
253 return 1;
254 ret = 1;
255 }
256 if (c1 == '\n')
257 lines++;
258 }
259 }
260 }
261 }
262
263 int
b_cmp(int argc,register char ** argv,Shbltin_t * context)264 b_cmp(int argc, register char** argv, Shbltin_t* context)
265 {
266 char* s;
267 char* e;
268 char* file1;
269 char* file2;
270 int n;
271 struct stat s1;
272 struct stat s2;
273
274 Sfio_t* f1 = 0;
275 Sfio_t* f2 = 0;
276 Sfoff_t o1 = 0;
277 Sfoff_t o2 = 0;
278 Sfoff_t count = -1;
279 Sfoff_t differences = -1;
280 int flags = 0;
281
282 NoP(argc);
283 cmdinit(argc, argv, context, ERROR_CATALOG, 0);
284 for (;;)
285 {
286 switch (optget(argv, usage))
287 {
288 case 'b':
289 flags |= CMP_BYTES;
290 continue;
291 case 'c':
292 flags |= CMP_CHARS;
293 continue;
294 case 'd':
295 flags |= CMP_VERBOSE;
296 differences = opt_info.number;
297 continue;
298 case 'i':
299 o1 = strtoll(opt_info.arg, &e, 0);
300 if (*e == ':')
301 o2 = strtoll(e + 1, &e, 0);
302 else
303 o2 = o1;
304 if (*e)
305 {
306 error(2, "%s: skip1:skip2 expected", opt_info.arg);
307 break;
308 }
309 continue;
310 case 'l':
311 flags |= CMP_VERBOSE;
312 continue;
313 case 'n':
314 count = opt_info.number;
315 continue;
316 case 's':
317 flags |= CMP_SILENT;
318 continue;
319 case ':':
320 error(2, "%s", opt_info.arg);
321 break;
322 case '?':
323 error(ERROR_usage(2), "%s", opt_info.arg);
324 break;
325 }
326 break;
327 }
328 argv += opt_info.index;
329 if (error_info.errors || !(file1 = *argv++) || !(file2 = *argv++))
330 error(ERROR_usage(2), "%s", optusage(NiL));
331 n = 2;
332 if (streq(file1, "-"))
333 f1 = sfstdin;
334 else if (!(f1 = sfopen(NiL, file1, "r")))
335 {
336 if (!(flags & CMP_SILENT))
337 error(ERROR_system(0), "%s: cannot open", file1);
338 goto done;
339 }
340 if (streq(file2, "-"))
341 f2 = sfstdin;
342 else if (!(f2 = sfopen(NiL, file2, "r")))
343 {
344 if (!(flags & CMP_SILENT))
345 error(ERROR_system(0), "%s: cannot open", file2);
346 goto done;
347 }
348 if (s = *argv++)
349 {
350 o1 = strtoll(s, &e, 0);
351 if (*e)
352 {
353 error(ERROR_exit(0), "%s: %s: invalid skip", file1, s);
354 goto done;
355 }
356 if (s = *argv++)
357 {
358 o2 = strtoll(s, &e, 0);
359 if (*e)
360 {
361 error(ERROR_exit(0), "%s: %s: invalid skip", file2, s);
362 goto done;
363 }
364 }
365 if (*argv)
366 {
367 error(ERROR_usage(0), "%s", optusage(NiL));
368 goto done;
369 }
370 }
371 if (o1 && sfseek(f1, o1, SEEK_SET) != o1)
372 {
373 if (!(flags & CMP_SILENT))
374 error(ERROR_exit(0), "EOF on %s", file1);
375 n = 1;
376 goto done;
377 }
378 if (o2 && sfseek(f2, o2, SEEK_SET) != o2)
379 {
380 if (!(flags & CMP_SILENT))
381 error(ERROR_exit(0), "EOF on %s", file2);
382 n = 1;
383 goto done;
384 }
385 if (fstat(sffileno(f1), &s1))
386 error(ERROR_system(0), "%s: cannot stat", file1);
387 else if (fstat(sffileno(f2), &s2))
388 error(ERROR_system(0), "%s: cannot stat", file1);
389 else if (s1.st_ino == s2.st_ino && s1.st_dev == s2.st_dev && o1 == o2)
390 n = 0;
391 else
392 n = ((flags & CMP_SILENT) && S_ISREG(s1.st_mode) && S_ISREG(s2.st_mode) && (s1.st_size - o1) != (s2.st_size - o2)) ? 1 : cmp(file1, f1, file2, f2, flags, count, differences);
393 done:
394 if (f1 && f1 != sfstdin)
395 sfclose(f1);
396 if (f2 && f2 != sfstdin)
397 sfclose(f2);
398 return n;
399 }
400