xref: /freebsd/usr.bin/sdiff/sdiff.c (revision 079171874c9bf263b69e3af10784ad2bcd1fe699)
1 /*	$OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */
2 
3 /*
4  * Written by Raymond Lai <ray@cyth.net>.
5  * Public domain.
6  */
7 
8 #include <sys/cdefs.h>
9 __FBSDID("$FreeBSD$");
10 
11 #include <sys/param.h>
12 #include <sys/queue.h>
13 #include <sys/stat.h>
14 #include <sys/types.h>
15 #include <sys/wait.h>
16 
17 #include <ctype.h>
18 #include <err.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <getopt.h>
22 #include <limits.h>
23 #include <paths.h>
24 #include <stdint.h>
25 #define _WITH_GETLINE
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "common.h"
32 #include "extern.h"
33 
34 #define DIFF_PATH	"/usr/bin/diff"
35 
36 #define WIDTH 126
37 /*
38  * Each column must be at least one character wide, plus three
39  * characters between the columns (space, [<|>], space).
40  */
41 #define WIDTH_MIN 5
42 
43 /* 3 kilobytes of chars */
44 #define MAX_CHECK 768
45 
46 /* A single diff line. */
47 struct diffline {
48 	STAILQ_ENTRY(diffline) diffentries;
49 	char	*left;
50 	char	 div;
51 	char	*right;
52 };
53 
54 static void astrcat(char **, const char *);
55 static void enqueue(char *, char, char *);
56 static char *mktmpcpy(const char *);
57 static int istextfile(FILE *);
58 static void binexec(char *, char *, char *) __dead2;
59 static void freediff(struct diffline *);
60 static void int_usage(void);
61 static int parsecmd(FILE *, FILE *, FILE *);
62 static void printa(FILE *, size_t);
63 static void printc(FILE *, size_t, FILE *, size_t);
64 static void printcol(const char *, size_t *, const size_t);
65 static void printd(FILE *, size_t);
66 static void println(const char *, const char, const char *);
67 static void processq(void);
68 static void prompt(const char *, const char *);
69 static void usage(void) __dead2;
70 static char *xfgets(FILE *);
71 
72 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
73 static size_t line_width;	/* width of a line (two columns and divider) */
74 static size_t width;		/* width of each column */
75 static size_t file1ln, file2ln;	/* line number of file1 and file2 */
76 static int Iflag = 0;	/* ignore sets matching regexp */
77 static int	lflag;		/* print only left column for identical lines */
78 static int	sflag;		/* skip identical lines */
79 FILE *outfp;		/* file to save changes to */
80 const char *tmpdir;	/* TMPDIR or /tmp */
81 
82 enum {
83 	HELP_OPT = CHAR_MAX + 1,
84 	NORMAL_OPT,
85 	FCASE_SENSITIVE_OPT,
86 	FCASE_IGNORE_OPT,
87 	FROMFILE_OPT,
88 	TOFILE_OPT,
89 	UNIDIR_OPT,
90 	STRIPCR_OPT,
91 	HORIZ_OPT,
92 	LEFTC_OPT,
93 	SUPCL_OPT,
94 	LF_OPT,
95 	/* the following groupings must be in sequence */
96 	OLDGF_OPT,
97 	NEWGF_OPT,
98 	UNCGF_OPT,
99 	CHGF_OPT,
100 	OLDLF_OPT,
101 	NEWLF_OPT,
102 	UNCLF_OPT,
103 	/* end order-sensitive enums */
104 	TSIZE_OPT,
105 	HLINES_OPT,
106 	LFILES_OPT,
107 	DIFFPROG_OPT,
108 	PIPE_FD,
109 	/* pid from the diff parent (if applicable) */
110 	DIFF_PID,
111 
112 	NOOP_OPT,
113 };
114 
115 static struct option longopts[] = {
116 	/* options only processed in sdiff */
117 	{ "left-column",		no_argument,		NULL,	LEFTC_OPT },
118 	{ "suppress-common-lines",	no_argument,		NULL,	's' },
119 	{ "width",			required_argument,	NULL,	'w' },
120 
121 	{ "output",			required_argument,	NULL,	'o' },
122 	{ "diff-program",		required_argument,	NULL,	DIFFPROG_OPT },
123 
124 	{ "pipe-fd",			required_argument,	NULL,	PIPE_FD },
125 	{ "diff-pid",			required_argument,	NULL,	DIFF_PID },
126 	/* Options processed by diff. */
127 	{ "ignore-file-name-case",	no_argument,		NULL,	FCASE_IGNORE_OPT },
128 	{ "no-ignore-file-name-case",	no_argument,		NULL,	FCASE_SENSITIVE_OPT },
129 	{ "strip-trailing-cr",		no_argument,		NULL,	STRIPCR_OPT },
130 	{ "tabsize",			required_argument,	NULL,	TSIZE_OPT },
131 	{ "help",			no_argument,		NULL,	HELP_OPT },
132 	{ "text",			no_argument,		NULL,	'a' },
133 	{ "ignore-blank-lines",		no_argument,		NULL,	'B' },
134 	{ "ignore-space-change",	no_argument,		NULL,	'b' },
135 	{ "minimal",			no_argument,		NULL,	'd' },
136 	{ "ignore-tab-expansion",	no_argument,		NULL,	'E' },
137 	{ "ignore-matching-lines",	required_argument,	NULL,	'I' },
138 	{ "ignore-case",		no_argument,		NULL,	'i' },
139 	{ "expand-tabs",		no_argument,		NULL,	't' },
140 	{ "speed-large-files",		no_argument,		NULL,	'H' },
141 	{ "ignore-all-space",		no_argument,		NULL,	'W' },
142 
143 	{ NULL,				0,			NULL,	'\0'}
144 };
145 
146 static const char *help_msg[] = {
147 	"\nusage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
148 	"\t-l, --left-column, Only print the left column for identical lines.",
149 	"\t-o OUTFILE, --output=OUTFILE, nteractively merge file1 and file2 into outfile.",
150 	"\t-s, --suppress-common-lines, Skip identical lines.",
151 	"\t-w WIDTH, --width=WIDTH, Print a maximum of WIDTH characters on each line.",
152 	"\tOptions passed to diff(1) are:",
153 	"\t\t-a, --text, Treat file1 and file2 as text files.",
154 	"\t\t-b, --ignore-trailing-cr, Ignore trailing blank spaces.",
155 	"\t\t-d, --minimal, Minimize diff size.",
156 	"\t\t-I RE, --ignore-matching-lines=RE, Ignore changes whose line matches RE.",
157 	"\t\t-i, --ignore-case, Do a case-insensitive comparison.",
158 	"\t\t-t, --expand-tabs Expand tabs to spaces.",
159 	"\t\t-W, --ignore-all-spaces, Ignore all spaces.",
160 	"\t\t--speed-large-files, Assume large file with scattered changes.",
161 	"\t\t--strip-trailing-cr, Strip trailing carriage return.",
162 	"\t\t--ignore-file-name-case, Ignore case of file names.",
163 	"\t\t--no-ignore-file-name-case, Do not ignore file name case",
164 	"\t\t--tabsize NUM, Change size of tabs (default 8.)",
165 
166 	NULL,
167 };
168 
169 /*
170  * Create temporary file if source_file is not a regular file.
171  * Returns temporary file name if one was malloced, NULL if unnecessary.
172  */
173 static char *
174 mktmpcpy(const char *source_file)
175 {
176 	struct stat sb;
177 	ssize_t rcount;
178 	int ifd, ofd;
179 	u_char buf[BUFSIZ];
180 	char *target_file;
181 
182 	/* Open input and output. */
183 	ifd = open(source_file, O_RDONLY, 0);
184 	/* File was opened successfully. */
185 	if (ifd != -1) {
186 		if (fstat(ifd, &sb) == -1)
187 			err(2, "error getting file status from %s", source_file);
188 
189 		/* Regular file. */
190 		if (S_ISREG(sb.st_mode)) {
191 			close(ifd);
192 			return (NULL);
193 		}
194 	} else {
195 		/* If ``-'' does not exist the user meant stdin. */
196 		if (errno == ENOENT && strcmp(source_file, "-") == 0)
197 			ifd = STDIN_FILENO;
198 		else
199 			err(2, "error opening %s", source_file);
200 	}
201 
202 	/* Not a regular file, so copy input into temporary file. */
203 	if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
204 		err(2, "asprintf");
205 	if ((ofd = mkstemp(target_file)) == -1) {
206 		warn("error opening %s", target_file);
207 		goto FAIL;
208 	}
209 	while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
210 	    rcount != 0) {
211 		ssize_t wcount;
212 
213 		wcount = write(ofd, buf, (size_t)rcount);
214 		if (-1 == wcount || rcount != wcount) {
215 			warn("error writing to %s", target_file);
216 			goto FAIL;
217 		}
218 	}
219 	if (rcount == -1) {
220 		warn("error reading from %s", source_file);
221 		goto FAIL;
222 	}
223 
224 	close(ifd);
225 	close(ofd);
226 
227 	return (target_file);
228 
229 FAIL:
230 	unlink(target_file);
231 	exit(2);
232 }
233 
234 int
235 main(int argc, char **argv)
236 {
237 	FILE *diffpipe=NULL, *file1, *file2;
238 	size_t diffargc = 0, wflag = WIDTH;
239 	int ch, fd[2] = {-1}, status;
240 	pid_t pid=0; pid_t ppid =-1;
241 	const char *outfile = NULL;
242 	struct option *popt;
243 	char **diffargv, *diffprog = DIFF_PATH, *filename1, *filename2,
244 	     *tmp1, *tmp2, *s1, *s2;
245 	int i;
246 
247 	/*
248 	 * Process diff flags.
249 	 */
250 	/*
251 	 * Allocate memory for diff arguments and NULL.
252 	 * Each flag has at most one argument, so doubling argc gives an
253 	 * upper limit of how many diff args can be passed.  argv[0],
254 	 * file1, and file2 won't have arguments so doubling them will
255 	 * waste some memory; however we need an extra space for the
256 	 * NULL at the end, so it sort of works out.
257 	 */
258 	if (!(diffargv = calloc(argc, sizeof(char **) * 2)))
259 		err(2, "main");
260 
261 	/* Add first argument, the program name. */
262 	diffargv[diffargc++] = diffprog;
263 
264 	/* create a dynamic string for merging single-switch options */
265 	if ( asprintf(&diffargv[diffargc++], "-")  < 0 )
266 		err(2, "main");
267 
268 	while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
269 	    longopts, NULL)) != -1) {
270 		const char *errstr;
271 
272 		switch (ch) {
273 		/* only compatible --long-name-form with diff */
274 		case FCASE_IGNORE_OPT:
275 		case FCASE_SENSITIVE_OPT:
276 		case STRIPCR_OPT:
277 		case TSIZE_OPT:
278 		case 'S':
279 		break;
280 		/* combine no-arg single switches */
281 		case 'a':
282 		case 'B':
283 		case 'b':
284 		case 'd':
285 		case 'E':
286 		case 'i':
287 		case 't':
288 		case 'H':
289 		case 'W':
290 			for(popt = longopts; ch != popt->val && popt->name != NULL; popt++);
291 			diffargv[1]  = realloc(diffargv[1], sizeof(char) * strlen(diffargv[1]) + 2);
292 			/*
293 			 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
294 			 */
295 			if (ch == 'W')
296 				sprintf(diffargv[1], "%sw", diffargv[1]);
297 			else
298 				sprintf(diffargv[1], "%s%c", diffargv[1], ch);
299 			break;
300 		case DIFFPROG_OPT:
301 			diffargv[0] = diffprog = optarg;
302 			break;
303 		case 'I':
304 			Iflag = 1;
305 			diffargv[diffargc++] = "-I";
306 			diffargv[diffargc++] = optarg;
307 			break;
308 		case 'l':
309 			lflag = 1;
310 			break;
311 		case 'o':
312 			outfile = optarg;
313 			break;
314 		case 's':
315 			sflag = 1;
316 			break;
317 		case 'w':
318 			wflag = strtonum(optarg, WIDTH_MIN,
319 			    INT_MAX, &errstr);
320 			if (errstr)
321 				errx(2, "width is %s: %s", errstr, optarg);
322 			break;
323 		case DIFF_PID:
324 			ppid = strtonum(optarg, 0, INT_MAX, &errstr);
325 			if (errstr)
326 				errx(2, "diff pid value is %s: %s", errstr, optarg);
327 			break;
328 		case HELP_OPT:
329 			for (i = 0; help_msg[i] != NULL; i++)
330 				printf("%s\n", help_msg[i]);
331 			exit(0);
332 			break;
333 		default:
334 			usage();
335 			break;
336 		}
337 	}
338 
339 	/* no single switches were used */
340 	if (strcmp(diffargv[1], "-") == 0 ) {
341 		for ( i = 1; i < argc-1; i++) {
342 			diffargv[i] = diffargv[i+1];
343 		}
344 		diffargv[diffargc-1] = NULL;
345 		diffargc--;
346 	}
347 
348 	argc -= optind;
349 	argv += optind;
350 
351 	if (argc != 2)
352 		usage();
353 
354 	if (outfile && (outfp = fopen(outfile, "w")) == NULL)
355 		err(2, "could not open: %s", optarg);
356 
357 	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
358 		tmpdir = _PATH_TMP;
359 
360 	filename1 = argv[0];
361 	filename2 = argv[1];
362 
363 	/*
364 	 * Create temporary files for diff and sdiff to share if file1
365 	 * or file2 are not regular files.  This allows sdiff and diff
366 	 * to read the same inputs if one or both inputs are stdin.
367 	 *
368 	 * If any temporary files were created, their names would be
369 	 * saved in tmp1 or tmp2.  tmp1 should never equal tmp2.
370 	 */
371 	tmp1 = tmp2 = NULL;
372 	/* file1 and file2 are the same, so copy to same temp file. */
373 	if (strcmp(filename1, filename2) == 0) {
374 		if ((tmp1 = mktmpcpy(filename1)))
375 			filename1 = filename2 = tmp1;
376 	/* Copy file1 and file2 into separate temp files. */
377 	} else {
378 		if ((tmp1 = mktmpcpy(filename1)))
379 			filename1 = tmp1;
380 		if ((tmp2 = mktmpcpy(filename2)))
381 			filename2 = tmp2;
382 	}
383 
384 	diffargv[diffargc++] = filename1;
385 	diffargv[diffargc++] = filename2;
386 	/* Add NULL to end of array to indicate end of array. */
387 	diffargv[diffargc++] = NULL;
388 
389 	/* Subtract column divider and divide by two. */
390 	width = (wflag - 3) / 2;
391 	/* Make sure line_width can fit in size_t. */
392 	if (width > (SIZE_MAX - 3) / 2)
393 		errx(2, "width is too large: %zu", width);
394 	line_width = width * 2 + 3;
395 
396 	if (ppid == -1 ) {
397 		if (pipe(fd))
398 			err(2, "pipe");
399 
400 		switch (pid = fork()) {
401 		case 0:
402 			/* child */
403 			/* We don't read from the pipe. */
404 			close(fd[0]);
405 			if (dup2(fd[1], STDOUT_FILENO) == -1)
406 				err(2, "child could not duplicate descriptor");
407 			/* Free unused descriptor. */
408 			close(fd[1]);
409 			execvp(diffprog, diffargv);
410 			err(2, "could not execute diff: %s", diffprog);
411 			break;
412 		case -1:
413 			err(2, "could not fork");
414 			break;
415 		}
416 
417 		/* parent */
418 		/* We don't write to the pipe. */
419 		close(fd[1]);
420 
421 		/* Open pipe to diff command. */
422 		if ((diffpipe = fdopen(fd[0], "r")) == NULL)
423 			err(2, "could not open diff pipe");
424 	}
425 	if ((file1 = fopen(filename1, "r")) == NULL)
426 		err(2, "could not open %s", filename1);
427 	if ((file2 = fopen(filename2, "r")) == NULL)
428 		err(2, "could not open %s", filename2);
429 	if (!istextfile(file1) || !istextfile(file2)) {
430 		/* Close open files and pipe, delete temps */
431 		fclose(file1);
432 		fclose(file2);
433 		if (diffpipe != NULL)
434 			fclose(diffpipe);
435 		if (tmp1)
436 			if (unlink(tmp1))
437 				warn("Error deleting %s.", tmp1);
438 		if (tmp2)
439 			if (unlink(tmp2))
440 				warn("Error deleting %s.", tmp2);
441 		free(tmp1);
442 		free(tmp2);
443 		binexec(diffprog, filename1, filename2);
444 	}
445 	/* Line numbers start at one. */
446 	file1ln = file2ln = 1;
447 
448 	/* Read and parse diff output. */
449 	while (parsecmd(diffpipe, file1, file2) != EOF)
450 		;
451 	fclose(diffpipe);
452 
453 	/* Wait for diff to exit. */
454 	if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
455 	    WEXITSTATUS(status) >= 2)
456 		err(2, "diff exited abnormally.");
457 
458 	/* Delete and free unneeded temporary files. */
459 	if (tmp1)
460 		if (unlink(tmp1))
461 			warn("Error deleting %s.", tmp1);
462 	if (tmp2)
463 		if (unlink(tmp2))
464 			warn("Error deleting %s.", tmp2);
465 	free(tmp1);
466 	free(tmp2);
467 	filename1 = filename2 = tmp1 = tmp2 = NULL;
468 
469 	/* No more diffs, so print common lines. */
470 	if (lflag)
471 		while ((s1 = xfgets(file1)))
472 			enqueue(s1, ' ', NULL);
473 	else
474 		for (;;) {
475 			s1 = xfgets(file1);
476 			s2 = xfgets(file2);
477 			if (s1 || s2)
478 				enqueue(s1, ' ', s2);
479 			else
480 				break;
481 		}
482 	fclose(file1);
483 	fclose(file2);
484 	/* Process unmodified lines. */
485 	processq();
486 
487 	/* Return diff exit status. */
488 	return (WEXITSTATUS(status));
489 }
490 
491 /*
492  * When sdiff/zsdiff detects a binary file as input, executes them with
493  * diff/zdiff to maintain the same behavior as GNU sdiff with binary input.
494  */
495 static void
496 binexec(char *diffprog, char *f1, char *f2)
497 {
498 
499 	char *args[] = {diffprog, f1, f2, (char *) 0};
500 	execv(diffprog, args);
501 
502 	/* If execv() fails, sdiff's execution will continue below. */
503 	errx(1, "Could not execute diff process.\n");
504 }
505 
506 /*
507  * Checks whether a file appears to be a text file.
508  */
509 static int
510 istextfile(FILE *f)
511 {
512 	int	ch, i;
513 
514 	if (f == NULL)
515 		return (1);
516 	rewind(f);
517 	for (i = 0; i <= MAX_CHECK; i++) {
518 		ch = fgetc(f);
519 		if (ch == '\0') {
520 			rewind(f);
521 			return (0);
522 		}
523 		if (ch == EOF)
524 			break;
525 	}
526 	rewind(f);
527 	return (1);
528 }
529 
530 /*
531  * Prints an individual column (left or right), taking into account
532  * that tabs are variable-width.  Takes a string, the current column
533  * the cursor is on the screen, and the maximum value of the column.
534  * The column value is updated as we go along.
535  */
536 static void
537 printcol(const char *s, size_t *col, const size_t col_max)
538 {
539 
540 	for (; *s && *col < col_max; ++s) {
541 		size_t new_col;
542 
543 		switch (*s) {
544 		case '\t':
545 			/*
546 			 * If rounding to next multiple of eight causes
547 			 * an integer overflow, just return.
548 			 */
549 			if (*col > SIZE_MAX - 8)
550 				return;
551 
552 			/* Round to next multiple of eight. */
553 			new_col = (*col / 8 + 1) * 8;
554 
555 			/*
556 			 * If printing the tab goes past the column
557 			 * width, don't print it and just quit.
558 			 */
559 			if (new_col > col_max)
560 				return;
561 			*col = new_col;
562 			break;
563 		default:
564 			++(*col);
565 		}
566 		putchar(*s);
567 	}
568 }
569 
570 /*
571  * Prompts user to either choose between two strings or edit one, both,
572  * or neither.
573  */
574 static void
575 prompt(const char *s1, const char *s2)
576 {
577 	char *cmd;
578 
579 	/* Print command prompt. */
580 	putchar('%');
581 
582 	/* Get user input. */
583 	for (; (cmd = xfgets(stdin)); free(cmd)) {
584 		const char *p;
585 
586 		/* Skip leading whitespace. */
587 		for (p = cmd; isspace(*p); ++p)
588 			;
589 		switch (*p) {
590 		case 'e':
591 			/* Skip `e'. */
592 			++p;
593 			if (eparse(p, s1, s2) == -1)
594 				goto USAGE;
595 			break;
596 		case 'l':
597 		case '1':
598 			/* Choose left column as-is. */
599 			if (s1 != NULL)
600 				fprintf(outfp, "%s\n", s1);
601 			/* End of command parsing. */
602 			break;
603 		case 'q':
604 			goto QUIT;
605 		case 'r':
606 		case '2':
607 			/* Choose right column as-is. */
608 			if (s2 != NULL)
609 				fprintf(outfp, "%s\n", s2);
610 			/* End of command parsing. */
611 			break;
612 		case 's':
613 			sflag = 1;
614 			goto PROMPT;
615 		case 'v':
616 			sflag = 0;
617 			/* FALLTHROUGH */
618 		default:
619 			/* Interactive usage help. */
620 USAGE:
621 			int_usage();
622 PROMPT:
623 			putchar('%');
624 
625 			/* Prompt user again. */
626 			continue;
627 		}
628 		free(cmd);
629 		return;
630 	}
631 
632 	/*
633 	 * If there was no error, we received an EOF from stdin, so we
634 	 * should quit.
635 	 */
636 QUIT:
637 	fclose(outfp);
638 	exit(0);
639 }
640 
641 /*
642  * Takes two strings, separated by a column divider.  NULL strings are
643  * treated as empty columns.  If the divider is the ` ' character, the
644  * second column is not printed (-l flag).  In this case, the second
645  * string must be NULL.  When the second column is NULL, the divider
646  * does not print the trailing space following the divider character.
647  *
648  * Takes into account that tabs can take multiple columns.
649  */
650 static void
651 println(const char *s1, const char div, const char *s2)
652 {
653 	size_t col;
654 
655 	/* Print first column.  Skips if s1 == NULL. */
656 	col = 0;
657 	if (s1) {
658 		/* Skip angle bracket and space. */
659 		printcol(s1, &col, width);
660 
661 	}
662 
663 	/* Otherwise, we pad this column up to width. */
664 	for (; col < width; ++col)
665 		putchar(' ');
666 
667 	/* Only print left column. */
668 	if (div == ' ' && !s2) {
669 		printf(" (\n");
670 		return;
671 	}
672 
673 	/*
674 	 * Print column divider.  If there is no second column, we don't
675 	 * need to add the space for padding.
676 	 */
677 	if (!s2) {
678 		printf(" %c\n", div);
679 		return;
680 	}
681 	printf(" %c ", div);
682 	col += 3;
683 
684 	/* Skip angle bracket and space. */
685 	printcol(s2, &col, line_width);
686 
687 	putchar('\n');
688 }
689 
690 /*
691  * Reads a line from file and returns as a string.  If EOF is reached,
692  * NULL is returned.  The returned string must be freed afterwards.
693  */
694 static char *
695 xfgets(FILE *file)
696 {
697 	size_t linecap;
698 	ssize_t l;
699 	char *s;
700 
701 	clearerr(file);
702 	linecap = 0;
703 	s = NULL;
704 
705 	if ((l = getline(&s, &linecap, file)) == -1) {
706 		if (ferror(file))
707 			err(2, "error reading file");
708 		return (NULL);
709 	}
710 
711 	if (s[l-1] == '\n')
712 		s[l-1] = '\0';
713 
714 	return (s);
715 }
716 
717 /*
718  * Parse ed commands from diffpipe and print lines from file1 (lines
719  * to change or delete) or file2 (lines to add or change).
720  * Returns EOF or 0.
721  */
722 static int
723 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
724 {
725 	size_t file1start, file1end, file2start, file2end, n;
726 	/* ed command line and pointer to characters in line */
727 	char *line, *p, *q;
728 	const char *errstr;
729 	char c, cmd;
730 
731 	/* Read ed command. */
732 	if (!(line = xfgets(diffpipe)))
733 		return (EOF);
734 
735 	p = line;
736 	/* Go to character after line number. */
737 	while (isdigit(*p))
738 		++p;
739 	c = *p;
740 	*p++ = 0;
741 	file1start = strtonum(line, 0, INT_MAX, &errstr);
742 	if (errstr)
743 		errx(2, "file1 start is %s: %s", errstr, line);
744 
745 	/* A range is specified for file1. */
746 	if (c == ',') {
747 		q = p;
748 		/* Go to character after file2end. */
749 		while (isdigit(*p))
750 			++p;
751 		c = *p;
752 		*p++ = 0;
753 		file1end = strtonum(q, 0, INT_MAX, &errstr);
754 		if (errstr)
755 			errx(2, "file1 end is %s: %s", errstr, line);
756 		if (file1start > file1end)
757 			errx(2, "invalid line range in file1: %s", line);
758 	} else
759 		file1end = file1start;
760 
761 	cmd = c;
762 	/* Check that cmd is valid. */
763 	if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
764 		errx(2, "ed command not recognized: %c: %s", cmd, line);
765 
766 	q = p;
767 	/* Go to character after line number. */
768 	while (isdigit(*p))
769 		++p;
770 	c = *p;
771 	*p++ = 0;
772 	file2start = strtonum(q, 0, INT_MAX, &errstr);
773 	if (errstr)
774 		errx(2, "file2 start is %s: %s", errstr, line);
775 
776 	/*
777 	 * There should either be a comma signifying a second line
778 	 * number or the line should just end here.
779 	 */
780 	if (c != ',' && c != '\0')
781 		errx(2, "invalid line range in file2: %c: %s", c, line);
782 
783 	if (c == ',') {
784 
785 		file2end = strtonum(p, 0, INT_MAX, &errstr);
786 		if (errstr)
787 			errx(2, "file2 end is %s: %s", errstr, line);
788 		if (file2start >= file2end)
789 			errx(2, "invalid line range in file2: %s", line);
790 	} else
791 		file2end = file2start;
792 
793 	/* Appends happen _after_ stated line. */
794 	if (cmd == 'a') {
795 		if (file1start != file1end)
796 			errx(2, "append cannot have a file1 range: %s",
797 			    line);
798 		if (file1start == SIZE_MAX)
799 			errx(2, "file1 line range too high: %s", line);
800 		file1start = ++file1end;
801 	}
802 	/*
803 	 * I'm not sure what the deal is with the line numbers for
804 	 * deletes, though.
805 	 */
806 	else if (cmd == 'd') {
807 		if (file2start != file2end)
808 			errx(2, "delete cannot have a file2 range: %s",
809 			    line);
810 		if (file2start == SIZE_MAX)
811 			errx(2, "file2 line range too high: %s", line);
812 		file2start = ++file2end;
813 	}
814 
815 	/*
816 	 * Continue reading file1 and file2 until we reach line numbers
817 	 * specified by diff.  Should only happen with -I flag.
818 	 */
819 	for (; file1ln < file1start && file2ln < file2start;
820 	    ++file1ln, ++file2ln) {
821 		char *s1, *s2;
822 
823 		if (!(s1 = xfgets(file1)))
824 			errx(2, "file1 shorter than expected");
825 		if (!(s2 = xfgets(file2)))
826 			errx(2, "file2 shorter than expected");
827 
828 		/* If the -l flag was specified, print only left column. */
829 		if (lflag) {
830 			free(s2);
831 			/*
832 			 * XXX - If -l and -I are both specified, all
833 			 * unchanged or ignored lines are shown with a
834 			 * `(' divider.  This matches GNU sdiff, but I
835 			 * believe it is a bug.  Just check out:
836 			 * gsdiff -l -I '^$' samefile samefile.
837 			 */
838 			if (Iflag)
839 				enqueue(s1, '(', NULL);
840 			else
841 				enqueue(s1, ' ', NULL);
842 		} else
843 			enqueue(s1, ' ', s2);
844 	}
845 	/* Ignore deleted lines. */
846 	for (; file1ln < file1start; ++file1ln) {
847 		char *s;
848 
849 		if (!(s = xfgets(file1)))
850 			errx(2, "file1 shorter than expected");
851 
852 		enqueue(s, '(', NULL);
853 	}
854 	/* Ignore added lines. */
855 	for (; file2ln < file2start; ++file2ln) {
856 		char *s;
857 
858 		if (!(s = xfgets(file2)))
859 			errx(2, "file2 shorter than expected");
860 
861 		/* If -l flag was given, don't print right column. */
862 		if (lflag)
863 			free(s);
864 		else
865 			enqueue(NULL, ')', s);
866 	}
867 
868 	/* Process unmodified or skipped lines. */
869 	processq();
870 
871 	switch (cmd) {
872 	case 'a':
873 		printa(file2, file2end);
874 		n = file2end - file2start + 1;
875 		break;
876 	case 'c':
877 		printc(file1, file1end, file2, file2end);
878 		n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
879 		break;
880 	case 'd':
881 		printd(file1, file1end);
882 		n = file1end - file1start + 1;
883 		break;
884 	default:
885 		errx(2, "invalid diff command: %c: %s", cmd, line);
886 	}
887 	free(line);
888 
889 	/* Skip to next ed line. */
890 	while (n--) {
891 		if (!(line = xfgets(diffpipe)))
892 			errx(2, "diff ended early");
893 		free(line);
894 	}
895 
896 	return (0);
897 }
898 
899 /*
900  * Queues up a diff line.
901  */
902 static void
903 enqueue(char *left, char div, char *right)
904 {
905 	struct diffline *diffp;
906 
907 	if (!(diffp = malloc(sizeof(struct diffline))))
908 		err(2, "enqueue");
909 	diffp->left = left;
910 	diffp->div = div;
911 	diffp->right = right;
912 	STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries);
913 }
914 
915 /*
916  * Free a diffline structure and its elements.
917  */
918 static void
919 freediff(struct diffline *diffp)
920 {
921 
922 	free(diffp->left);
923 	free(diffp->right);
924 	free(diffp);
925 }
926 
927 /*
928  * Append second string into first.  Repeated appends to the same string
929  * are cached, making this an O(n) function, where n = strlen(append).
930  */
931 static void
932 astrcat(char **s, const char *append)
933 {
934 	/* Length of string in previous run. */
935 	static size_t offset = 0;
936 	size_t newsiz;
937 	/*
938 	 * String from previous run.  Compared to *s to see if we are
939 	 * dealing with the same string.  If so, we can use offset.
940 	 */
941 	static const char *oldstr = NULL;
942 	char *newstr;
943 
944 	/*
945 	 * First string is NULL, so just copy append.
946 	 */
947 	if (!*s) {
948 		if (!(*s = strdup(append)))
949 			err(2, "astrcat");
950 
951 		/* Keep track of string. */
952 		offset = strlen(*s);
953 		oldstr = *s;
954 
955 		return;
956 	}
957 
958 	/*
959 	 * *s is a string so concatenate.
960 	 */
961 
962 	/* Did we process the same string in the last run? */
963 	/*
964 	 * If this is a different string from the one we just processed
965 	 * cache new string.
966 	 */
967 	if (oldstr != *s) {
968 		offset = strlen(*s);
969 		oldstr = *s;
970 	}
971 
972 	/* Size = strlen(*s) + \n + strlen(append) + '\0'. */
973 	newsiz = offset + 1 + strlen(append) + 1;
974 
975 	/* Resize *s to fit new string. */
976 	newstr = realloc(*s, newsiz);
977 	if (newstr == NULL)
978 		err(2, "astrcat");
979 	*s = newstr;
980 
981 	/* *s + offset should be end of string. */
982 	/* Concatenate. */
983 	strlcpy(*s + offset, "\n", newsiz - offset);
984 	strlcat(*s + offset, append, newsiz - offset);
985 
986 	/* New string length should be exactly newsiz - 1 characters. */
987 	/* Store generated string's values. */
988 	offset = newsiz - 1;
989 	oldstr = *s;
990 }
991 
992 /*
993  * Process diff set queue, printing, prompting, and saving each diff
994  * line stored in queue.
995  */
996 static void
997 processq(void)
998 {
999 	struct diffline *diffp;
1000 	char divc, *left, *right;
1001 
1002 	/* Don't process empty queue. */
1003 	if (STAILQ_EMPTY(&diffhead))
1004 		return;
1005 
1006 	/* Remember the divider. */
1007 	divc = STAILQ_FIRST(&diffhead)->div;
1008 
1009 	left = NULL;
1010 	right = NULL;
1011 	/*
1012 	 * Go through set of diffs, concatenating each line in left or
1013 	 * right column into two long strings, `left' and `right'.
1014 	 */
1015 	STAILQ_FOREACH(diffp, &diffhead, diffentries) {
1016 		/*
1017 		 * Print changed lines if -s was given,
1018 		 * print all lines if -s was not given.
1019 		 */
1020 		if (!sflag || diffp->div == '|' || diffp->div == '<' ||
1021 		    diffp->div == '>')
1022 			println(diffp->left, diffp->div, diffp->right);
1023 
1024 		/* Append new lines to diff set. */
1025 		if (diffp->left)
1026 			astrcat(&left, diffp->left);
1027 		if (diffp->right)
1028 			astrcat(&right, diffp->right);
1029 	}
1030 
1031 	/* Empty queue and free each diff line and its elements. */
1032 	while (!STAILQ_EMPTY(&diffhead)) {
1033 		diffp = STAILQ_FIRST(&diffhead);
1034 		STAILQ_REMOVE_HEAD(&diffhead, diffentries);
1035 		freediff(diffp);
1036 	}
1037 
1038 	/* Write to outfp, prompting user if lines are different. */
1039 	if (outfp)
1040 		switch (divc) {
1041 		case ' ': case '(': case ')':
1042 			fprintf(outfp, "%s\n", left);
1043 			break;
1044 		case '|': case '<': case '>':
1045 			prompt(left, right);
1046 			break;
1047 		default:
1048 			errx(2, "invalid divider: %c", divc);
1049 		}
1050 
1051 	/* Free left and right. */
1052 	free(left);
1053 	free(right);
1054 }
1055 
1056 /*
1057  * Print lines following an (a)ppend command.
1058  */
1059 static void
1060 printa(FILE *file, size_t line2)
1061 {
1062 	char *line;
1063 
1064 	for (; file2ln <= line2; ++file2ln) {
1065 		if (!(line = xfgets(file)))
1066 			errx(2, "append ended early");
1067 		enqueue(NULL, '>', line);
1068 	}
1069 	processq();
1070 }
1071 
1072 /*
1073  * Print lines following a (c)hange command, from file1ln to file1end
1074  * and from file2ln to file2end.
1075  */
1076 static void
1077 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
1078 {
1079 	struct fileline {
1080 		STAILQ_ENTRY(fileline)	 fileentries;
1081 		char			*line;
1082 	};
1083 	STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead);
1084 
1085 	/* Read lines to be deleted. */
1086 	for (; file1ln <= file1end; ++file1ln) {
1087 		struct fileline *linep;
1088 		char *line1;
1089 
1090 		/* Read lines from both. */
1091 		if (!(line1 = xfgets(file1)))
1092 			errx(2, "error reading file1 in delete in change");
1093 
1094 		/* Add to delete queue. */
1095 		if (!(linep = malloc(sizeof(struct fileline))))
1096 			err(2, "printc");
1097 		linep->line = line1;
1098 		STAILQ_INSERT_TAIL(&delqhead, linep, fileentries);
1099 	}
1100 
1101 	/* Process changed lines.. */
1102 	for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end;
1103 	    ++file2ln) {
1104 		struct fileline *del;
1105 		char *add;
1106 
1107 		/* Get add line. */
1108 		if (!(add = xfgets(file2)))
1109 			errx(2, "error reading add in change");
1110 
1111 		del = STAILQ_FIRST(&delqhead);
1112 		enqueue(del->line, '|', add);
1113 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1114 		/*
1115 		 * Free fileline structure but not its elements since
1116 		 * they are queued up.
1117 		 */
1118 		free(del);
1119 	}
1120 	processq();
1121 
1122 	/* Process remaining lines to add. */
1123 	for (; file2ln <= file2end; ++file2ln) {
1124 		char *add;
1125 
1126 		/* Get add line. */
1127 		if (!(add = xfgets(file2)))
1128 			errx(2, "error reading add in change");
1129 
1130 		enqueue(NULL, '>', add);
1131 	}
1132 	processq();
1133 
1134 	/* Process remaining lines to delete. */
1135 	while (!STAILQ_EMPTY(&delqhead)) {
1136 		struct fileline *filep;
1137 
1138 		filep = STAILQ_FIRST(&delqhead);
1139 		enqueue(filep->line, '<', NULL);
1140 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1141 		free(filep);
1142 	}
1143 	processq();
1144 }
1145 
1146 /*
1147  * Print deleted lines from file, from file1ln to file1end.
1148  */
1149 static void
1150 printd(FILE *file1, size_t file1end)
1151 {
1152 	char *line1;
1153 
1154 	/* Print out lines file1ln to line2. */
1155 	for (; file1ln <= file1end; ++file1ln) {
1156 		if (!(line1 = xfgets(file1)))
1157 			errx(2, "file1 ended early in delete");
1158 		enqueue(line1, '<', NULL);
1159 	}
1160 	processq();
1161 }
1162 
1163 /*
1164  * Interactive mode usage.
1165  */
1166 static void
1167 int_usage(void)
1168 {
1169 
1170 	puts("e:\tedit blank diff\n"
1171 	    "eb:\tedit both diffs concatenated\n"
1172 	    "el:\tedit left diff\n"
1173 	    "er:\tedit right diff\n"
1174 	    "l | 1:\tchoose left diff\n"
1175 	    "r | 2:\tchoose right diff\n"
1176 	    "s:\tsilent mode--don't print identical lines\n"
1177 	    "v:\tverbose mode--print identical lines\n"
1178 	    "q:\tquit");
1179 }
1180 
1181 static void
1182 usage(void)
1183 {
1184 
1185 	fprintf(stderr,
1186 	    "usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1"
1187 	    " file2\n");
1188 	exit(2);
1189 }
1190