xref: /freebsd/usr.bin/sdiff/sdiff.c (revision 1c05a6ea6b849ff95e539c31adea887c644a6a01)
1 /*	$OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */
2 
3 /*
4  * Written by Raymond Lai <ray@cyth.net>.
5  * Public domain.
6  */
7 
8 #include <sys/cdefs.h>
9 __FBSDID("$FreeBSD$");
10 
11 #include <sys/param.h>
12 #include <sys/queue.h>
13 #include <sys/stat.h>
14 #include <sys/types.h>
15 #include <sys/wait.h>
16 
17 #include <ctype.h>
18 #include <err.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <getopt.h>
22 #include <limits.h>
23 #include <paths.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 
30 #include "extern.h"
31 
32 #define DIFF_PATH	"/usr/bin/diff"
33 
34 #define WIDTH 126
35 /*
36  * Each column must be at least one character wide, plus three
37  * characters between the columns (space, [<|>], space).
38  */
39 #define WIDTH_MIN 5
40 
41 /* 3 kilobytes of chars */
42 #define MAX_CHECK 768
43 
44 /* A single diff line. */
45 struct diffline {
46 	STAILQ_ENTRY(diffline) diffentries;
47 	char	*left;
48 	char	 div;
49 	char	*right;
50 };
51 
52 static void astrcat(char **, const char *);
53 static void enqueue(char *, char, char *);
54 static char *mktmpcpy(const char *);
55 static int istextfile(FILE *);
56 static void binexec(char *, char *, char *) __dead2;
57 static void freediff(struct diffline *);
58 static void int_usage(void);
59 static int parsecmd(FILE *, FILE *, FILE *);
60 static void printa(FILE *, size_t);
61 static void printc(FILE *, size_t, FILE *, size_t);
62 static void printcol(const char *, size_t *, const size_t);
63 static void printd(FILE *, size_t);
64 static void println(const char *, const char, const char *);
65 static void processq(void);
66 static void prompt(const char *, const char *);
67 static void usage(void) __dead2;
68 static char *xfgets(FILE *);
69 
70 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
71 static size_t line_width;	/* width of a line (two columns and divider) */
72 static size_t width;		/* width of each column */
73 static size_t file1ln, file2ln;	/* line number of file1 and file2 */
74 static int Iflag = 0;	/* ignore sets matching regexp */
75 static int	lflag;		/* print only left column for identical lines */
76 static int	sflag;		/* skip identical lines */
77 FILE *outfp;		/* file to save changes to */
78 const char *tmpdir;	/* TMPDIR or /tmp */
79 
80 enum {
81 	HELP_OPT = CHAR_MAX + 1,
82 	NORMAL_OPT,
83 	FCASE_SENSITIVE_OPT,
84 	FCASE_IGNORE_OPT,
85 	STRIPCR_OPT,
86 	TSIZE_OPT,
87 	DIFFPROG_OPT,
88 };
89 
90 static struct option longopts[] = {
91 	/* options only processed in sdiff */
92 	{ "suppress-common-lines",	no_argument,		NULL,	's' },
93 	{ "width",			required_argument,	NULL,	'w' },
94 
95 	{ "output",			required_argument,	NULL,	'o' },
96 	{ "diff-program",		required_argument,	NULL,	DIFFPROG_OPT },
97 
98 	/* Options processed by diff. */
99 	{ "ignore-file-name-case",	no_argument,		NULL,	FCASE_IGNORE_OPT },
100 	{ "no-ignore-file-name-case",	no_argument,		NULL,	FCASE_SENSITIVE_OPT },
101 	{ "strip-trailing-cr",		no_argument,		NULL,	STRIPCR_OPT },
102 	{ "tabsize",			required_argument,	NULL,	TSIZE_OPT },
103 	{ "help",			no_argument,		NULL,	HELP_OPT },
104 	{ "text",			no_argument,		NULL,	'a' },
105 	{ "ignore-blank-lines",		no_argument,		NULL,	'B' },
106 	{ "ignore-space-change",	no_argument,		NULL,	'b' },
107 	{ "minimal",			no_argument,		NULL,	'd' },
108 	{ "ignore-tab-expansion",	no_argument,		NULL,	'E' },
109 	{ "ignore-matching-lines",	required_argument,	NULL,	'I' },
110 	{ "ignore-case",		no_argument,		NULL,	'i' },
111 	{ "left-column",		no_argument,		NULL,	'l' },
112 	{ "expand-tabs",		no_argument,		NULL,	't' },
113 	{ "speed-large-files",		no_argument,		NULL,	'H' },
114 	{ "ignore-all-space",		no_argument,		NULL,	'W' },
115 
116 	{ NULL,				0,			NULL,	'\0'}
117 };
118 
119 static const char *help_msg[] = {
120 	"usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
121 	"-l, --left-column: only print the left column for identical lines.",
122 	"-o OUTFILE, --output=OUTFILE: interactively merge file1 and file2 into outfile.",
123 	"-s, --suppress-common-lines: skip identical lines.",
124 	"-w WIDTH, --width=WIDTH: print a maximum of WIDTH characters on each line.",
125 	"",
126 	"Options passed to diff(1) are:",
127 	"\t-a, --text: treat file1 and file2 as text files.",
128 	"\t-b, --ignore-trailing-cr: ignore trailing blank spaces.",
129 	"\t-d, --minimal: minimize diff size.",
130 	"\t-I RE, --ignore-matching-lines=RE: ignore changes whose line matches RE.",
131 	"\t-i, --ignore-case: do a case-insensitive comparison.",
132 	"\t-t, --expand-tabs: sxpand tabs to spaces.",
133 	"\t-W, --ignore-all-spaces: ignore all spaces.",
134 	"\t--speed-large-files: assume large file with scattered changes.",
135 	"\t--strip-trailing-cr: strip trailing carriage return.",
136 	"\t--ignore-file-name-case: ignore case of file names.",
137 	"\t--no-ignore-file-name-case: do not ignore file name case",
138 	"\t--tabsize NUM: change size of tabs (default 8.)",
139 
140 	NULL,
141 };
142 
143 /*
144  * Create temporary file if source_file is not a regular file.
145  * Returns temporary file name if one was malloced, NULL if unnecessary.
146  */
147 static char *
148 mktmpcpy(const char *source_file)
149 {
150 	struct stat sb;
151 	ssize_t rcount;
152 	int ifd, ofd;
153 	u_char buf[BUFSIZ];
154 	char *target_file;
155 
156 	/* Open input and output. */
157 	ifd = open(source_file, O_RDONLY, 0);
158 	/* File was opened successfully. */
159 	if (ifd != -1) {
160 		if (fstat(ifd, &sb) == -1)
161 			err(2, "error getting file status from %s", source_file);
162 
163 		/* Regular file. */
164 		if (S_ISREG(sb.st_mode)) {
165 			close(ifd);
166 			return (NULL);
167 		}
168 	} else {
169 		/* If ``-'' does not exist the user meant stdin. */
170 		if (errno == ENOENT && strcmp(source_file, "-") == 0)
171 			ifd = STDIN_FILENO;
172 		else
173 			err(2, "error opening %s", source_file);
174 	}
175 
176 	/* Not a regular file, so copy input into temporary file. */
177 	if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
178 		err(2, "asprintf");
179 	if ((ofd = mkstemp(target_file)) == -1) {
180 		warn("error opening %s", target_file);
181 		goto FAIL;
182 	}
183 	while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
184 	    rcount != 0) {
185 		ssize_t wcount;
186 
187 		wcount = write(ofd, buf, (size_t)rcount);
188 		if (-1 == wcount || rcount != wcount) {
189 			warn("error writing to %s", target_file);
190 			goto FAIL;
191 		}
192 	}
193 	if (rcount == -1) {
194 		warn("error reading from %s", source_file);
195 		goto FAIL;
196 	}
197 
198 	close(ifd);
199 	close(ofd);
200 
201 	return (target_file);
202 
203 FAIL:
204 	unlink(target_file);
205 	exit(2);
206 }
207 
208 int
209 main(int argc, char **argv)
210 {
211 	FILE *diffpipe=NULL, *file1, *file2;
212 	size_t diffargc = 0, wflag = WIDTH;
213 	int ch, fd[2] = {-1}, status;
214 	pid_t pid=0;
215 	const char *outfile = NULL;
216 	char **diffargv, *diffprog = DIFF_PATH, *filename1, *filename2,
217 	     *tmp1, *tmp2, *s1, *s2;
218 	int i;
219 
220 	/*
221 	 * Process diff flags.
222 	 */
223 	/*
224 	 * Allocate memory for diff arguments and NULL.
225 	 * Each flag has at most one argument, so doubling argc gives an
226 	 * upper limit of how many diff args can be passed.  argv[0],
227 	 * file1, and file2 won't have arguments so doubling them will
228 	 * waste some memory; however we need an extra space for the
229 	 * NULL at the end, so it sort of works out.
230 	 */
231 	if (!(diffargv = calloc(argc, sizeof(char **) * 2)))
232 		err(2, "main");
233 
234 	/* Add first argument, the program name. */
235 	diffargv[diffargc++] = diffprog;
236 
237 	/* create a dynamic string for merging single-switch options */
238 	if ( asprintf(&diffargv[diffargc++], "-")  < 0 )
239 		err(2, "main");
240 
241 	while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
242 	    longopts, NULL)) != -1) {
243 		const char *errstr;
244 
245 		switch (ch) {
246 		/* only compatible --long-name-form with diff */
247 		case FCASE_IGNORE_OPT:
248 		case FCASE_SENSITIVE_OPT:
249 		case STRIPCR_OPT:
250 		case TSIZE_OPT:
251 		case 'S':
252 		break;
253 		/* combine no-arg single switches */
254 		case 'a':
255 		case 'B':
256 		case 'b':
257 		case 'd':
258 		case 'E':
259 		case 'i':
260 		case 't':
261 		case 'W':
262 			diffargv[1]  = realloc(diffargv[1], sizeof(char) * strlen(diffargv[1]) + 2);
263 			/*
264 			 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
265 			 */
266 			if (ch == 'W')
267 				sprintf(diffargv[1], "%sw", diffargv[1]);
268 			else
269 				sprintf(diffargv[1], "%s%c", diffargv[1], ch);
270 			break;
271 		case 'H':
272 			diffargv[diffargc++] = "--speed-large-files";
273 			break;
274 		case DIFFPROG_OPT:
275 			diffargv[0] = diffprog = optarg;
276 			break;
277 		case 'I':
278 			Iflag = 1;
279 			diffargv[diffargc++] = "-I";
280 			diffargv[diffargc++] = optarg;
281 			break;
282 		case 'l':
283 			lflag = 1;
284 			break;
285 		case 'o':
286 			outfile = optarg;
287 			break;
288 		case 's':
289 			sflag = 1;
290 			break;
291 		case 'w':
292 			wflag = strtonum(optarg, WIDTH_MIN,
293 			    INT_MAX, &errstr);
294 			if (errstr)
295 				errx(2, "width is %s: %s", errstr, optarg);
296 			break;
297 		case HELP_OPT:
298 			for (i = 0; help_msg[i] != NULL; i++)
299 				printf("%s\n", help_msg[i]);
300 			exit(0);
301 			break;
302 		default:
303 			usage();
304 			break;
305 		}
306 	}
307 
308 	/* no single switches were used */
309 	if (strcmp(diffargv[1], "-") == 0 ) {
310 		for ( i = 1; i < argc-1; i++) {
311 			diffargv[i] = diffargv[i+1];
312 		}
313 		diffargv[diffargc-1] = NULL;
314 		diffargc--;
315 	}
316 
317 	argc -= optind;
318 	argv += optind;
319 
320 	if (argc != 2)
321 		usage();
322 
323 	if (outfile && (outfp = fopen(outfile, "w")) == NULL)
324 		err(2, "could not open: %s", optarg);
325 
326 	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
327 		tmpdir = _PATH_TMP;
328 
329 	filename1 = argv[0];
330 	filename2 = argv[1];
331 
332 	/*
333 	 * Create temporary files for diff and sdiff to share if file1
334 	 * or file2 are not regular files.  This allows sdiff and diff
335 	 * to read the same inputs if one or both inputs are stdin.
336 	 *
337 	 * If any temporary files were created, their names would be
338 	 * saved in tmp1 or tmp2.  tmp1 should never equal tmp2.
339 	 */
340 	tmp1 = tmp2 = NULL;
341 	/* file1 and file2 are the same, so copy to same temp file. */
342 	if (strcmp(filename1, filename2) == 0) {
343 		if ((tmp1 = mktmpcpy(filename1)))
344 			filename1 = filename2 = tmp1;
345 	/* Copy file1 and file2 into separate temp files. */
346 	} else {
347 		if ((tmp1 = mktmpcpy(filename1)))
348 			filename1 = tmp1;
349 		if ((tmp2 = mktmpcpy(filename2)))
350 			filename2 = tmp2;
351 	}
352 
353 	diffargv[diffargc++] = filename1;
354 	diffargv[diffargc++] = filename2;
355 	/* Add NULL to end of array to indicate end of array. */
356 	diffargv[diffargc++] = NULL;
357 
358 	/* Subtract column divider and divide by two. */
359 	width = (wflag - 3) / 2;
360 	/* Make sure line_width can fit in size_t. */
361 	if (width > (SIZE_MAX - 3) / 2)
362 		errx(2, "width is too large: %zu", width);
363 	line_width = width * 2 + 3;
364 
365 	if (pipe(fd))
366 		err(2, "pipe");
367 
368 	switch (pid = fork()) {
369 	case 0:
370 		/* child */
371 		/* We don't read from the pipe. */
372 		close(fd[0]);
373 		if (dup2(fd[1], STDOUT_FILENO) == -1)
374 			err(2, "child could not duplicate descriptor");
375 		/* Free unused descriptor. */
376 		close(fd[1]);
377 		execvp(diffprog, diffargv);
378 		err(2, "could not execute diff: %s", diffprog);
379 		break;
380 	case -1:
381 		err(2, "could not fork");
382 		break;
383 	}
384 
385 	/* parent */
386 	/* We don't write to the pipe. */
387 	close(fd[1]);
388 
389 	/* Open pipe to diff command. */
390 	if ((diffpipe = fdopen(fd[0], "r")) == NULL)
391 		err(2, "could not open diff pipe");
392 
393 	if ((file1 = fopen(filename1, "r")) == NULL)
394 		err(2, "could not open %s", filename1);
395 	if ((file2 = fopen(filename2, "r")) == NULL)
396 		err(2, "could not open %s", filename2);
397 	if (!istextfile(file1) || !istextfile(file2)) {
398 		/* Close open files and pipe, delete temps */
399 		fclose(file1);
400 		fclose(file2);
401 		if (diffpipe != NULL)
402 			fclose(diffpipe);
403 		if (tmp1)
404 			if (unlink(tmp1))
405 				warn("Error deleting %s.", tmp1);
406 		if (tmp2)
407 			if (unlink(tmp2))
408 				warn("Error deleting %s.", tmp2);
409 		free(tmp1);
410 		free(tmp2);
411 		binexec(diffprog, filename1, filename2);
412 	}
413 	/* Line numbers start at one. */
414 	file1ln = file2ln = 1;
415 
416 	/* Read and parse diff output. */
417 	while (parsecmd(diffpipe, file1, file2) != EOF)
418 		;
419 	fclose(diffpipe);
420 
421 	/* Wait for diff to exit. */
422 	if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
423 	    WEXITSTATUS(status) >= 2)
424 		err(2, "diff exited abnormally.");
425 
426 	/* Delete and free unneeded temporary files. */
427 	if (tmp1)
428 		if (unlink(tmp1))
429 			warn("Error deleting %s.", tmp1);
430 	if (tmp2)
431 		if (unlink(tmp2))
432 			warn("Error deleting %s.", tmp2);
433 	free(tmp1);
434 	free(tmp2);
435 	filename1 = filename2 = tmp1 = tmp2 = NULL;
436 
437 	/* No more diffs, so print common lines. */
438 	if (lflag)
439 		while ((s1 = xfgets(file1)))
440 			enqueue(s1, ' ', NULL);
441 	else
442 		for (;;) {
443 			s1 = xfgets(file1);
444 			s2 = xfgets(file2);
445 			if (s1 || s2)
446 				enqueue(s1, ' ', s2);
447 			else
448 				break;
449 		}
450 	fclose(file1);
451 	fclose(file2);
452 	/* Process unmodified lines. */
453 	processq();
454 
455 	/* Return diff exit status. */
456 	return (WEXITSTATUS(status));
457 }
458 
459 /*
460  * When sdiff detects a binary file as input, executes them with
461  * diff to maintain the same behavior as GNU sdiff with binary input.
462  */
463 static void
464 binexec(char *diffprog, char *f1, char *f2)
465 {
466 
467 	char *args[] = {diffprog, f1, f2, (char *) 0};
468 	execv(diffprog, args);
469 
470 	/* If execv() fails, sdiff's execution will continue below. */
471 	errx(1, "could not execute diff process");
472 }
473 
474 /*
475  * Checks whether a file appears to be a text file.
476  */
477 static int
478 istextfile(FILE *f)
479 {
480 	int	ch, i;
481 
482 	if (f == NULL)
483 		return (1);
484 	rewind(f);
485 	for (i = 0; i <= MAX_CHECK; i++) {
486 		ch = fgetc(f);
487 		if (ch == '\0') {
488 			rewind(f);
489 			return (0);
490 		}
491 		if (ch == EOF)
492 			break;
493 	}
494 	rewind(f);
495 	return (1);
496 }
497 
498 /*
499  * Prints an individual column (left or right), taking into account
500  * that tabs are variable-width.  Takes a string, the current column
501  * the cursor is on the screen, and the maximum value of the column.
502  * The column value is updated as we go along.
503  */
504 static void
505 printcol(const char *s, size_t *col, const size_t col_max)
506 {
507 
508 	for (; *s && *col < col_max; ++s) {
509 		size_t new_col;
510 
511 		switch (*s) {
512 		case '\t':
513 			/*
514 			 * If rounding to next multiple of eight causes
515 			 * an integer overflow, just return.
516 			 */
517 			if (*col > SIZE_MAX - 8)
518 				return;
519 
520 			/* Round to next multiple of eight. */
521 			new_col = (*col / 8 + 1) * 8;
522 
523 			/*
524 			 * If printing the tab goes past the column
525 			 * width, don't print it and just quit.
526 			 */
527 			if (new_col > col_max)
528 				return;
529 			*col = new_col;
530 			break;
531 		default:
532 			++(*col);
533 		}
534 		putchar(*s);
535 	}
536 }
537 
538 /*
539  * Prompts user to either choose between two strings or edit one, both,
540  * or neither.
541  */
542 static void
543 prompt(const char *s1, const char *s2)
544 {
545 	char *cmd;
546 
547 	/* Print command prompt. */
548 	putchar('%');
549 
550 	/* Get user input. */
551 	for (; (cmd = xfgets(stdin)); free(cmd)) {
552 		const char *p;
553 
554 		/* Skip leading whitespace. */
555 		for (p = cmd; isspace(*p); ++p)
556 			;
557 		switch (*p) {
558 		case 'e':
559 			/* Skip `e'. */
560 			++p;
561 			if (eparse(p, s1, s2) == -1)
562 				goto USAGE;
563 			break;
564 		case 'l':
565 		case '1':
566 			/* Choose left column as-is. */
567 			if (s1 != NULL)
568 				fprintf(outfp, "%s\n", s1);
569 			/* End of command parsing. */
570 			break;
571 		case 'q':
572 			goto QUIT;
573 		case 'r':
574 		case '2':
575 			/* Choose right column as-is. */
576 			if (s2 != NULL)
577 				fprintf(outfp, "%s\n", s2);
578 			/* End of command parsing. */
579 			break;
580 		case 's':
581 			sflag = 1;
582 			goto PROMPT;
583 		case 'v':
584 			sflag = 0;
585 			/* FALLTHROUGH */
586 		default:
587 			/* Interactive usage help. */
588 USAGE:
589 			int_usage();
590 PROMPT:
591 			putchar('%');
592 
593 			/* Prompt user again. */
594 			continue;
595 		}
596 		free(cmd);
597 		return;
598 	}
599 
600 	/*
601 	 * If there was no error, we received an EOF from stdin, so we
602 	 * should quit.
603 	 */
604 QUIT:
605 	fclose(outfp);
606 	exit(0);
607 }
608 
609 /*
610  * Takes two strings, separated by a column divider.  NULL strings are
611  * treated as empty columns.  If the divider is the ` ' character, the
612  * second column is not printed (-l flag).  In this case, the second
613  * string must be NULL.  When the second column is NULL, the divider
614  * does not print the trailing space following the divider character.
615  *
616  * Takes into account that tabs can take multiple columns.
617  */
618 static void
619 println(const char *s1, const char div, const char *s2)
620 {
621 	size_t col;
622 
623 	/* Print first column.  Skips if s1 == NULL. */
624 	col = 0;
625 	if (s1) {
626 		/* Skip angle bracket and space. */
627 		printcol(s1, &col, width);
628 
629 	}
630 
631 	/* Otherwise, we pad this column up to width. */
632 	for (; col < width; ++col)
633 		putchar(' ');
634 
635 	/* Only print left column. */
636 	if (div == ' ' && !s2) {
637 		printf(" (\n");
638 		return;
639 	}
640 
641 	/*
642 	 * Print column divider.  If there is no second column, we don't
643 	 * need to add the space for padding.
644 	 */
645 	if (!s2) {
646 		printf(" %c\n", div);
647 		return;
648 	}
649 	printf(" %c ", div);
650 	col += 3;
651 
652 	/* Skip angle bracket and space. */
653 	printcol(s2, &col, line_width);
654 
655 	putchar('\n');
656 }
657 
658 /*
659  * Reads a line from file and returns as a string.  If EOF is reached,
660  * NULL is returned.  The returned string must be freed afterwards.
661  */
662 static char *
663 xfgets(FILE *file)
664 {
665 	size_t linecap;
666 	ssize_t l;
667 	char *s;
668 
669 	clearerr(file);
670 	linecap = 0;
671 	s = NULL;
672 
673 	if ((l = getline(&s, &linecap, file)) == -1) {
674 		if (ferror(file))
675 			err(2, "error reading file");
676 		return (NULL);
677 	}
678 
679 	if (s[l-1] == '\n')
680 		s[l-1] = '\0';
681 
682 	return (s);
683 }
684 
685 /*
686  * Parse ed commands from diffpipe and print lines from file1 (lines
687  * to change or delete) or file2 (lines to add or change).
688  * Returns EOF or 0.
689  */
690 static int
691 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
692 {
693 	size_t file1start, file1end, file2start, file2end, n;
694 	/* ed command line and pointer to characters in line */
695 	char *line, *p, *q;
696 	const char *errstr;
697 	char c, cmd;
698 
699 	/* Read ed command. */
700 	if (!(line = xfgets(diffpipe)))
701 		return (EOF);
702 
703 	p = line;
704 	/* Go to character after line number. */
705 	while (isdigit(*p))
706 		++p;
707 	c = *p;
708 	*p++ = 0;
709 	file1start = strtonum(line, 0, INT_MAX, &errstr);
710 	if (errstr)
711 		errx(2, "file1 start is %s: %s", errstr, line);
712 
713 	/* A range is specified for file1. */
714 	if (c == ',') {
715 		q = p;
716 		/* Go to character after file2end. */
717 		while (isdigit(*p))
718 			++p;
719 		c = *p;
720 		*p++ = 0;
721 		file1end = strtonum(q, 0, INT_MAX, &errstr);
722 		if (errstr)
723 			errx(2, "file1 end is %s: %s", errstr, line);
724 		if (file1start > file1end)
725 			errx(2, "invalid line range in file1: %s", line);
726 	} else
727 		file1end = file1start;
728 
729 	cmd = c;
730 	/* Check that cmd is valid. */
731 	if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
732 		errx(2, "ed command not recognized: %c: %s", cmd, line);
733 
734 	q = p;
735 	/* Go to character after line number. */
736 	while (isdigit(*p))
737 		++p;
738 	c = *p;
739 	*p++ = 0;
740 	file2start = strtonum(q, 0, INT_MAX, &errstr);
741 	if (errstr)
742 		errx(2, "file2 start is %s: %s", errstr, line);
743 
744 	/*
745 	 * There should either be a comma signifying a second line
746 	 * number or the line should just end here.
747 	 */
748 	if (c != ',' && c != '\0')
749 		errx(2, "invalid line range in file2: %c: %s", c, line);
750 
751 	if (c == ',') {
752 
753 		file2end = strtonum(p, 0, INT_MAX, &errstr);
754 		if (errstr)
755 			errx(2, "file2 end is %s: %s", errstr, line);
756 		if (file2start >= file2end)
757 			errx(2, "invalid line range in file2: %s", line);
758 	} else
759 		file2end = file2start;
760 
761 	/* Appends happen _after_ stated line. */
762 	if (cmd == 'a') {
763 		if (file1start != file1end)
764 			errx(2, "append cannot have a file1 range: %s",
765 			    line);
766 		if (file1start == SIZE_MAX)
767 			errx(2, "file1 line range too high: %s", line);
768 		file1start = ++file1end;
769 	}
770 	/*
771 	 * I'm not sure what the deal is with the line numbers for
772 	 * deletes, though.
773 	 */
774 	else if (cmd == 'd') {
775 		if (file2start != file2end)
776 			errx(2, "delete cannot have a file2 range: %s",
777 			    line);
778 		if (file2start == SIZE_MAX)
779 			errx(2, "file2 line range too high: %s", line);
780 		file2start = ++file2end;
781 	}
782 
783 	/*
784 	 * Continue reading file1 and file2 until we reach line numbers
785 	 * specified by diff.  Should only happen with -I flag.
786 	 */
787 	for (; file1ln < file1start && file2ln < file2start;
788 	    ++file1ln, ++file2ln) {
789 		char *s1, *s2;
790 
791 		if (!(s1 = xfgets(file1)))
792 			errx(2, "file1 shorter than expected");
793 		if (!(s2 = xfgets(file2)))
794 			errx(2, "file2 shorter than expected");
795 
796 		/* If the -l flag was specified, print only left column. */
797 		if (lflag) {
798 			free(s2);
799 			/*
800 			 * XXX - If -l and -I are both specified, all
801 			 * unchanged or ignored lines are shown with a
802 			 * `(' divider.  This matches GNU sdiff, but I
803 			 * believe it is a bug.  Just check out:
804 			 * gsdiff -l -I '^$' samefile samefile.
805 			 */
806 			if (Iflag)
807 				enqueue(s1, '(', NULL);
808 			else
809 				enqueue(s1, ' ', NULL);
810 		} else
811 			enqueue(s1, ' ', s2);
812 	}
813 	/* Ignore deleted lines. */
814 	for (; file1ln < file1start; ++file1ln) {
815 		char *s;
816 
817 		if (!(s = xfgets(file1)))
818 			errx(2, "file1 shorter than expected");
819 
820 		enqueue(s, '(', NULL);
821 	}
822 	/* Ignore added lines. */
823 	for (; file2ln < file2start; ++file2ln) {
824 		char *s;
825 
826 		if (!(s = xfgets(file2)))
827 			errx(2, "file2 shorter than expected");
828 
829 		/* If -l flag was given, don't print right column. */
830 		if (lflag)
831 			free(s);
832 		else
833 			enqueue(NULL, ')', s);
834 	}
835 
836 	/* Process unmodified or skipped lines. */
837 	processq();
838 
839 	switch (cmd) {
840 	case 'a':
841 		printa(file2, file2end);
842 		n = file2end - file2start + 1;
843 		break;
844 	case 'c':
845 		printc(file1, file1end, file2, file2end);
846 		n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
847 		break;
848 	case 'd':
849 		printd(file1, file1end);
850 		n = file1end - file1start + 1;
851 		break;
852 	default:
853 		errx(2, "invalid diff command: %c: %s", cmd, line);
854 	}
855 	free(line);
856 
857 	/* Skip to next ed line. */
858 	while (n--) {
859 		if (!(line = xfgets(diffpipe)))
860 			errx(2, "diff ended early");
861 		free(line);
862 	}
863 
864 	return (0);
865 }
866 
867 /*
868  * Queues up a diff line.
869  */
870 static void
871 enqueue(char *left, char div, char *right)
872 {
873 	struct diffline *diffp;
874 
875 	if (!(diffp = malloc(sizeof(struct diffline))))
876 		err(2, "enqueue");
877 	diffp->left = left;
878 	diffp->div = div;
879 	diffp->right = right;
880 	STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries);
881 }
882 
883 /*
884  * Free a diffline structure and its elements.
885  */
886 static void
887 freediff(struct diffline *diffp)
888 {
889 
890 	free(diffp->left);
891 	free(diffp->right);
892 	free(diffp);
893 }
894 
895 /*
896  * Append second string into first.  Repeated appends to the same string
897  * are cached, making this an O(n) function, where n = strlen(append).
898  */
899 static void
900 astrcat(char **s, const char *append)
901 {
902 	/* Length of string in previous run. */
903 	static size_t offset = 0;
904 	size_t newsiz;
905 	/*
906 	 * String from previous run.  Compared to *s to see if we are
907 	 * dealing with the same string.  If so, we can use offset.
908 	 */
909 	static const char *oldstr = NULL;
910 	char *newstr;
911 
912 	/*
913 	 * First string is NULL, so just copy append.
914 	 */
915 	if (!*s) {
916 		if (!(*s = strdup(append)))
917 			err(2, "astrcat");
918 
919 		/* Keep track of string. */
920 		offset = strlen(*s);
921 		oldstr = *s;
922 
923 		return;
924 	}
925 
926 	/*
927 	 * *s is a string so concatenate.
928 	 */
929 
930 	/* Did we process the same string in the last run? */
931 	/*
932 	 * If this is a different string from the one we just processed
933 	 * cache new string.
934 	 */
935 	if (oldstr != *s) {
936 		offset = strlen(*s);
937 		oldstr = *s;
938 	}
939 
940 	/* Size = strlen(*s) + \n + strlen(append) + '\0'. */
941 	newsiz = offset + 1 + strlen(append) + 1;
942 
943 	/* Resize *s to fit new string. */
944 	newstr = realloc(*s, newsiz);
945 	if (newstr == NULL)
946 		err(2, "astrcat");
947 	*s = newstr;
948 
949 	/* *s + offset should be end of string. */
950 	/* Concatenate. */
951 	strlcpy(*s + offset, "\n", newsiz - offset);
952 	strlcat(*s + offset, append, newsiz - offset);
953 
954 	/* New string length should be exactly newsiz - 1 characters. */
955 	/* Store generated string's values. */
956 	offset = newsiz - 1;
957 	oldstr = *s;
958 }
959 
960 /*
961  * Process diff set queue, printing, prompting, and saving each diff
962  * line stored in queue.
963  */
964 static void
965 processq(void)
966 {
967 	struct diffline *diffp;
968 	char divc, *left, *right;
969 
970 	/* Don't process empty queue. */
971 	if (STAILQ_EMPTY(&diffhead))
972 		return;
973 
974 	/* Remember the divider. */
975 	divc = STAILQ_FIRST(&diffhead)->div;
976 
977 	left = NULL;
978 	right = NULL;
979 	/*
980 	 * Go through set of diffs, concatenating each line in left or
981 	 * right column into two long strings, `left' and `right'.
982 	 */
983 	STAILQ_FOREACH(diffp, &diffhead, diffentries) {
984 		/*
985 		 * Print changed lines if -s was given,
986 		 * print all lines if -s was not given.
987 		 */
988 		if (!sflag || diffp->div == '|' || diffp->div == '<' ||
989 		    diffp->div == '>')
990 			println(diffp->left, diffp->div, diffp->right);
991 
992 		/* Append new lines to diff set. */
993 		if (diffp->left)
994 			astrcat(&left, diffp->left);
995 		if (diffp->right)
996 			astrcat(&right, diffp->right);
997 	}
998 
999 	/* Empty queue and free each diff line and its elements. */
1000 	while (!STAILQ_EMPTY(&diffhead)) {
1001 		diffp = STAILQ_FIRST(&diffhead);
1002 		STAILQ_REMOVE_HEAD(&diffhead, diffentries);
1003 		freediff(diffp);
1004 	}
1005 
1006 	/* Write to outfp, prompting user if lines are different. */
1007 	if (outfp)
1008 		switch (divc) {
1009 		case ' ': case '(': case ')':
1010 			fprintf(outfp, "%s\n", left);
1011 			break;
1012 		case '|': case '<': case '>':
1013 			prompt(left, right);
1014 			break;
1015 		default:
1016 			errx(2, "invalid divider: %c", divc);
1017 		}
1018 
1019 	/* Free left and right. */
1020 	free(left);
1021 	free(right);
1022 }
1023 
1024 /*
1025  * Print lines following an (a)ppend command.
1026  */
1027 static void
1028 printa(FILE *file, size_t line2)
1029 {
1030 	char *line;
1031 
1032 	for (; file2ln <= line2; ++file2ln) {
1033 		if (!(line = xfgets(file)))
1034 			errx(2, "append ended early");
1035 		enqueue(NULL, '>', line);
1036 	}
1037 	processq();
1038 }
1039 
1040 /*
1041  * Print lines following a (c)hange command, from file1ln to file1end
1042  * and from file2ln to file2end.
1043  */
1044 static void
1045 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
1046 {
1047 	struct fileline {
1048 		STAILQ_ENTRY(fileline)	 fileentries;
1049 		char			*line;
1050 	};
1051 	STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead);
1052 
1053 	/* Read lines to be deleted. */
1054 	for (; file1ln <= file1end; ++file1ln) {
1055 		struct fileline *linep;
1056 		char *line1;
1057 
1058 		/* Read lines from both. */
1059 		if (!(line1 = xfgets(file1)))
1060 			errx(2, "error reading file1 in delete in change");
1061 
1062 		/* Add to delete queue. */
1063 		if (!(linep = malloc(sizeof(struct fileline))))
1064 			err(2, "printc");
1065 		linep->line = line1;
1066 		STAILQ_INSERT_TAIL(&delqhead, linep, fileentries);
1067 	}
1068 
1069 	/* Process changed lines.. */
1070 	for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end;
1071 	    ++file2ln) {
1072 		struct fileline *del;
1073 		char *add;
1074 
1075 		/* Get add line. */
1076 		if (!(add = xfgets(file2)))
1077 			errx(2, "error reading add in change");
1078 
1079 		del = STAILQ_FIRST(&delqhead);
1080 		enqueue(del->line, '|', add);
1081 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1082 		/*
1083 		 * Free fileline structure but not its elements since
1084 		 * they are queued up.
1085 		 */
1086 		free(del);
1087 	}
1088 	processq();
1089 
1090 	/* Process remaining lines to add. */
1091 	for (; file2ln <= file2end; ++file2ln) {
1092 		char *add;
1093 
1094 		/* Get add line. */
1095 		if (!(add = xfgets(file2)))
1096 			errx(2, "error reading add in change");
1097 
1098 		enqueue(NULL, '>', add);
1099 	}
1100 	processq();
1101 
1102 	/* Process remaining lines to delete. */
1103 	while (!STAILQ_EMPTY(&delqhead)) {
1104 		struct fileline *filep;
1105 
1106 		filep = STAILQ_FIRST(&delqhead);
1107 		enqueue(filep->line, '<', NULL);
1108 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1109 		free(filep);
1110 	}
1111 	processq();
1112 }
1113 
1114 /*
1115  * Print deleted lines from file, from file1ln to file1end.
1116  */
1117 static void
1118 printd(FILE *file1, size_t file1end)
1119 {
1120 	char *line1;
1121 
1122 	/* Print out lines file1ln to line2. */
1123 	for (; file1ln <= file1end; ++file1ln) {
1124 		if (!(line1 = xfgets(file1)))
1125 			errx(2, "file1 ended early in delete");
1126 		enqueue(line1, '<', NULL);
1127 	}
1128 	processq();
1129 }
1130 
1131 /*
1132  * Interactive mode usage.
1133  */
1134 static void
1135 int_usage(void)
1136 {
1137 
1138 	puts("e:\tedit blank diff\n"
1139 	    "eb:\tedit both diffs concatenated\n"
1140 	    "el:\tedit left diff\n"
1141 	    "er:\tedit right diff\n"
1142 	    "l | 1:\tchoose left diff\n"
1143 	    "r | 2:\tchoose right diff\n"
1144 	    "s:\tsilent mode--don't print identical lines\n"
1145 	    "v:\tverbose mode--print identical lines\n"
1146 	    "q:\tquit");
1147 }
1148 
1149 static void
1150 usage(void)
1151 {
1152 
1153 	fprintf(stderr,
1154 	    "usage: sdiff [-abdilstHW] [-I regexp] [-o outfile] [-w width] file1"
1155 	    " file2\n");
1156 	exit(2);
1157 }
1158