xref: /freebsd/usr.bin/sdiff/sdiff.c (revision d2033021a73db7b8d910c1ffc52f4d1d0def7162)
1 /*	$OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */
2 
3 /*
4  * Written by Raymond Lai <ray@cyth.net>.
5  * Public domain.
6  */
7 
8 #include <sys/param.h>
9 #include <sys/queue.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12 #include <sys/wait.h>
13 
14 #include <ctype.h>
15 #include <err.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <getopt.h>
19 #include <limits.h>
20 #include <paths.h>
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26 
27 #include "extern.h"
28 
29 static char diff_path[] = "/usr/bin/diff";
30 
31 #define WIDTH 126
32 /*
33  * Each column must be at least one character wide, plus three
34  * characters between the columns (space, [<|>], space).
35  */
36 #define WIDTH_MIN 5
37 
38 /* 3 kilobytes of chars */
39 #define MAX_CHECK 768
40 
41 /* A single diff line. */
42 struct diffline {
43 	STAILQ_ENTRY(diffline) diffentries;
44 	char	*left;
45 	char	 div;
46 	char	*right;
47 };
48 
49 static void astrcat(char **, const char *);
50 static void enqueue(char *, char, char *);
51 static char *mktmpcpy(const char *);
52 static int istextfile(FILE *);
53 static void binexec(char *, char *, char *) __dead2;
54 static void freediff(struct diffline *);
55 static void int_usage(void);
56 static int parsecmd(FILE *, FILE *, FILE *);
57 static void printa(FILE *, size_t);
58 static void printc(FILE *, size_t, FILE *, size_t);
59 static void printcol(const char *, size_t *, const size_t);
60 static void printd(FILE *, size_t);
61 static void println(const char *, const char, const char *);
62 static void processq(void);
63 static void prompt(const char *, const char *);
64 static void usage(void) __dead2;
65 static char *xfgets(FILE *);
66 
67 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
68 static size_t line_width;	/* width of a line (two columns and divider) */
69 static size_t width;		/* width of each column */
70 static size_t file1ln, file2ln;	/* line number of file1 and file2 */
71 static int Iflag = 0;	/* ignore sets matching regexp */
72 static int	lflag;		/* print only left column for identical lines */
73 static int	sflag;		/* skip identical lines */
74 FILE *outfp;		/* file to save changes to */
75 const char *tmpdir;	/* TMPDIR or /tmp */
76 
77 enum {
78 	HELP_OPT = CHAR_MAX + 1,
79 	NORMAL_OPT,
80 	FCASE_SENSITIVE_OPT,
81 	FCASE_IGNORE_OPT,
82 	STRIPCR_OPT,
83 	TSIZE_OPT,
84 	DIFFPROG_OPT,
85 };
86 
87 static struct option longopts[] = {
88 	/* options only processed in sdiff */
89 	{ "suppress-common-lines",	no_argument,		NULL,	's' },
90 	{ "width",			required_argument,	NULL,	'w' },
91 
92 	{ "output",			required_argument,	NULL,	'o' },
93 	{ "diff-program",		required_argument,	NULL,	DIFFPROG_OPT },
94 
95 	/* Options processed by diff. */
96 	{ "ignore-file-name-case",	no_argument,		NULL,	FCASE_IGNORE_OPT },
97 	{ "no-ignore-file-name-case",	no_argument,		NULL,	FCASE_SENSITIVE_OPT },
98 	{ "strip-trailing-cr",		no_argument,		NULL,	STRIPCR_OPT },
99 	{ "tabsize",			required_argument,	NULL,	TSIZE_OPT },
100 	{ "help",			no_argument,		NULL,	HELP_OPT },
101 	{ "text",			no_argument,		NULL,	'a' },
102 	{ "ignore-blank-lines",		no_argument,		NULL,	'B' },
103 	{ "ignore-space-change",	no_argument,		NULL,	'b' },
104 	{ "minimal",			no_argument,		NULL,	'd' },
105 	{ "ignore-tab-expansion",	no_argument,		NULL,	'E' },
106 	{ "ignore-matching-lines",	required_argument,	NULL,	'I' },
107 	{ "ignore-case",		no_argument,		NULL,	'i' },
108 	{ "left-column",		no_argument,		NULL,	'l' },
109 	{ "expand-tabs",		no_argument,		NULL,	't' },
110 	{ "speed-large-files",		no_argument,		NULL,	'H' },
111 	{ "ignore-all-space",		no_argument,		NULL,	'W' },
112 
113 	{ NULL,				0,			NULL,	'\0'}
114 };
115 
116 static const char *help_msg[] = {
117 	"usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
118 	"-l, --left-column: only print the left column for identical lines.",
119 	"-o OUTFILE, --output=OUTFILE: interactively merge file1 and file2 into outfile.",
120 	"-s, --suppress-common-lines: skip identical lines.",
121 	"-w WIDTH, --width=WIDTH: print a maximum of WIDTH characters on each line.",
122 	"",
123 	"Options passed to diff(1) are:",
124 	"\t-a, --text: treat file1 and file2 as text files.",
125 	"\t-b, --ignore-trailing-cr: ignore trailing blank spaces.",
126 	"\t-d, --minimal: minimize diff size.",
127 	"\t-I RE, --ignore-matching-lines=RE: ignore changes whose line matches RE.",
128 	"\t-i, --ignore-case: do a case-insensitive comparison.",
129 	"\t-t, --expand-tabs: sxpand tabs to spaces.",
130 	"\t-W, --ignore-all-spaces: ignore all spaces.",
131 	"\t--speed-large-files: assume large file with scattered changes.",
132 	"\t--strip-trailing-cr: strip trailing carriage return.",
133 	"\t--ignore-file-name-case: ignore case of file names.",
134 	"\t--no-ignore-file-name-case: do not ignore file name case",
135 	"\t--tabsize NUM: change size of tabs (default 8.)",
136 
137 	NULL,
138 };
139 
140 /*
141  * Create temporary file if source_file is not a regular file.
142  * Returns temporary file name if one was malloced, NULL if unnecessary.
143  */
144 static char *
145 mktmpcpy(const char *source_file)
146 {
147 	struct stat sb;
148 	ssize_t rcount;
149 	int ifd, ofd;
150 	u_char buf[BUFSIZ];
151 	char *target_file;
152 
153 	/* Open input and output. */
154 	ifd = open(source_file, O_RDONLY, 0);
155 	/* File was opened successfully. */
156 	if (ifd != -1) {
157 		if (fstat(ifd, &sb) == -1)
158 			err(2, "error getting file status from %s", source_file);
159 
160 		/* Regular file. */
161 		if (S_ISREG(sb.st_mode)) {
162 			close(ifd);
163 			return (NULL);
164 		}
165 	} else {
166 		/* If ``-'' does not exist the user meant stdin. */
167 		if (errno == ENOENT && strcmp(source_file, "-") == 0)
168 			ifd = STDIN_FILENO;
169 		else
170 			err(2, "error opening %s", source_file);
171 	}
172 
173 	/* Not a regular file, so copy input into temporary file. */
174 	if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
175 		err(2, "asprintf");
176 	if ((ofd = mkstemp(target_file)) == -1) {
177 		warn("error opening %s", target_file);
178 		goto FAIL;
179 	}
180 	while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
181 	    rcount != 0) {
182 		ssize_t wcount;
183 
184 		wcount = write(ofd, buf, (size_t)rcount);
185 		if (-1 == wcount || rcount != wcount) {
186 			warn("error writing to %s", target_file);
187 			goto FAIL;
188 		}
189 	}
190 	if (rcount == -1) {
191 		warn("error reading from %s", source_file);
192 		goto FAIL;
193 	}
194 
195 	close(ifd);
196 	close(ofd);
197 
198 	return (target_file);
199 
200 FAIL:
201 	unlink(target_file);
202 	exit(2);
203 }
204 
205 int
206 main(int argc, char **argv)
207 {
208 	FILE *diffpipe=NULL, *file1, *file2;
209 	size_t diffargc = 0, wflag = WIDTH;
210 	int ch, fd[2] = {-1}, status;
211 	pid_t pid=0;
212 	const char *outfile = NULL;
213 	char **diffargv, *diffprog = diff_path, *filename1, *filename2,
214 	     *tmp1, *tmp2, *s1, *s2;
215 	int i;
216 	char I_arg[] = "-I";
217 	char speed_lf[] = "--speed-large-files";
218 
219 	/*
220 	 * Process diff flags.
221 	 */
222 	/*
223 	 * Allocate memory for diff arguments and NULL.
224 	 * Each flag has at most one argument, so doubling argc gives an
225 	 * upper limit of how many diff args can be passed.  argv[0],
226 	 * file1, and file2 won't have arguments so doubling them will
227 	 * waste some memory; however we need an extra space for the
228 	 * NULL at the end, so it sort of works out.
229 	 */
230 	if (!(diffargv = calloc(argc, sizeof(char **) * 2)))
231 		err(2, "main");
232 
233 	/* Add first argument, the program name. */
234 	diffargv[diffargc++] = diffprog;
235 
236 	/* create a dynamic string for merging single-switch options */
237 	if ( asprintf(&diffargv[diffargc++], "-")  < 0 )
238 		err(2, "main");
239 
240 	while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
241 	    longopts, NULL)) != -1) {
242 		const char *errstr;
243 
244 		switch (ch) {
245 		/* only compatible --long-name-form with diff */
246 		case FCASE_IGNORE_OPT:
247 		case FCASE_SENSITIVE_OPT:
248 		case STRIPCR_OPT:
249 		case TSIZE_OPT:
250 		case 'S':
251 		break;
252 		/* combine no-arg single switches */
253 		case 'a':
254 		case 'B':
255 		case 'b':
256 		case 'd':
257 		case 'E':
258 		case 'i':
259 		case 't':
260 		case 'W':
261 			diffargv[1]  = realloc(diffargv[1], sizeof(char) * strlen(diffargv[1]) + 2);
262 			/*
263 			 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
264 			 */
265 			if (ch == 'W')
266 				sprintf(diffargv[1], "%sw", diffargv[1]);
267 			else
268 				sprintf(diffargv[1], "%s%c", diffargv[1], ch);
269 			break;
270 		case 'H':
271 			diffargv[diffargc++] = speed_lf;
272 			break;
273 		case DIFFPROG_OPT:
274 			diffargv[0] = diffprog = optarg;
275 			break;
276 		case 'I':
277 			Iflag = 1;
278 			diffargv[diffargc++] = I_arg;
279 			diffargv[diffargc++] = optarg;
280 			break;
281 		case 'l':
282 			lflag = 1;
283 			break;
284 		case 'o':
285 			outfile = optarg;
286 			break;
287 		case 's':
288 			sflag = 1;
289 			break;
290 		case 'w':
291 			wflag = strtonum(optarg, WIDTH_MIN,
292 			    INT_MAX, &errstr);
293 			if (errstr)
294 				errx(2, "width is %s: %s", errstr, optarg);
295 			break;
296 		case HELP_OPT:
297 			for (i = 0; help_msg[i] != NULL; i++)
298 				printf("%s\n", help_msg[i]);
299 			exit(0);
300 			break;
301 		default:
302 			usage();
303 			break;
304 		}
305 	}
306 
307 	/* no single switches were used */
308 	if (strcmp(diffargv[1], "-") == 0 ) {
309 		for ( i = 1; i < argc-1; i++) {
310 			diffargv[i] = diffargv[i+1];
311 		}
312 		diffargv[diffargc-1] = NULL;
313 		diffargc--;
314 	}
315 
316 	argc -= optind;
317 	argv += optind;
318 
319 	if (argc != 2)
320 		usage();
321 
322 	if (outfile && (outfp = fopen(outfile, "w")) == NULL)
323 		err(2, "could not open: %s", optarg);
324 
325 	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
326 		tmpdir = _PATH_TMP;
327 
328 	filename1 = argv[0];
329 	filename2 = argv[1];
330 
331 	/*
332 	 * Create temporary files for diff and sdiff to share if file1
333 	 * or file2 are not regular files.  This allows sdiff and diff
334 	 * to read the same inputs if one or both inputs are stdin.
335 	 *
336 	 * If any temporary files were created, their names would be
337 	 * saved in tmp1 or tmp2.  tmp1 should never equal tmp2.
338 	 */
339 	tmp1 = tmp2 = NULL;
340 	/* file1 and file2 are the same, so copy to same temp file. */
341 	if (strcmp(filename1, filename2) == 0) {
342 		if ((tmp1 = mktmpcpy(filename1)))
343 			filename1 = filename2 = tmp1;
344 	/* Copy file1 and file2 into separate temp files. */
345 	} else {
346 		if ((tmp1 = mktmpcpy(filename1)))
347 			filename1 = tmp1;
348 		if ((tmp2 = mktmpcpy(filename2)))
349 			filename2 = tmp2;
350 	}
351 
352 	diffargv[diffargc++] = filename1;
353 	diffargv[diffargc++] = filename2;
354 	/* Add NULL to end of array to indicate end of array. */
355 	diffargv[diffargc++] = NULL;
356 
357 	/* Subtract column divider and divide by two. */
358 	width = (wflag - 3) / 2;
359 	/* Make sure line_width can fit in size_t. */
360 	if (width > (SIZE_MAX - 3) / 2)
361 		errx(2, "width is too large: %zu", width);
362 	line_width = width * 2 + 3;
363 
364 	if (pipe(fd))
365 		err(2, "pipe");
366 
367 	switch (pid = fork()) {
368 	case 0:
369 		/* child */
370 		/* We don't read from the pipe. */
371 		close(fd[0]);
372 		if (dup2(fd[1], STDOUT_FILENO) == -1)
373 			err(2, "child could not duplicate descriptor");
374 		/* Free unused descriptor. */
375 		close(fd[1]);
376 		execvp(diffprog, diffargv);
377 		err(2, "could not execute diff: %s", diffprog);
378 		break;
379 	case -1:
380 		err(2, "could not fork");
381 		break;
382 	}
383 
384 	/* parent */
385 	/* We don't write to the pipe. */
386 	close(fd[1]);
387 
388 	/* Open pipe to diff command. */
389 	if ((diffpipe = fdopen(fd[0], "r")) == NULL)
390 		err(2, "could not open diff pipe");
391 
392 	if ((file1 = fopen(filename1, "r")) == NULL)
393 		err(2, "could not open %s", filename1);
394 	if ((file2 = fopen(filename2, "r")) == NULL)
395 		err(2, "could not open %s", filename2);
396 	if (!istextfile(file1) || !istextfile(file2)) {
397 		/* Close open files and pipe, delete temps */
398 		fclose(file1);
399 		fclose(file2);
400 		if (diffpipe != NULL)
401 			fclose(diffpipe);
402 		if (tmp1)
403 			if (unlink(tmp1))
404 				warn("Error deleting %s.", tmp1);
405 		if (tmp2)
406 			if (unlink(tmp2))
407 				warn("Error deleting %s.", tmp2);
408 		free(tmp1);
409 		free(tmp2);
410 		binexec(diffprog, filename1, filename2);
411 	}
412 	/* Line numbers start at one. */
413 	file1ln = file2ln = 1;
414 
415 	/* Read and parse diff output. */
416 	while (parsecmd(diffpipe, file1, file2) != EOF)
417 		;
418 	fclose(diffpipe);
419 
420 	/* Wait for diff to exit. */
421 	if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
422 	    WEXITSTATUS(status) >= 2)
423 		err(2, "diff exited abnormally.");
424 
425 	/* Delete and free unneeded temporary files. */
426 	if (tmp1)
427 		if (unlink(tmp1))
428 			warn("Error deleting %s.", tmp1);
429 	if (tmp2)
430 		if (unlink(tmp2))
431 			warn("Error deleting %s.", tmp2);
432 	free(tmp1);
433 	free(tmp2);
434 	filename1 = filename2 = tmp1 = tmp2 = NULL;
435 
436 	/* No more diffs, so print common lines. */
437 	if (lflag)
438 		while ((s1 = xfgets(file1)))
439 			enqueue(s1, ' ', NULL);
440 	else
441 		for (;;) {
442 			s1 = xfgets(file1);
443 			s2 = xfgets(file2);
444 			if (s1 || s2)
445 				enqueue(s1, ' ', s2);
446 			else
447 				break;
448 		}
449 	fclose(file1);
450 	fclose(file2);
451 	/* Process unmodified lines. */
452 	processq();
453 
454 	/* Return diff exit status. */
455 	return (WEXITSTATUS(status));
456 }
457 
458 /*
459  * When sdiff detects a binary file as input, executes them with
460  * diff to maintain the same behavior as GNU sdiff with binary input.
461  */
462 static void
463 binexec(char *diffprog, char *f1, char *f2)
464 {
465 
466 	char *args[] = {diffprog, f1, f2, (char *) 0};
467 	execv(diffprog, args);
468 
469 	/* If execv() fails, sdiff's execution will continue below. */
470 	errx(1, "could not execute diff process");
471 }
472 
473 /*
474  * Checks whether a file appears to be a text file.
475  */
476 static int
477 istextfile(FILE *f)
478 {
479 	int	ch, i;
480 
481 	if (f == NULL)
482 		return (1);
483 	rewind(f);
484 	for (i = 0; i <= MAX_CHECK; i++) {
485 		ch = fgetc(f);
486 		if (ch == '\0') {
487 			rewind(f);
488 			return (0);
489 		}
490 		if (ch == EOF)
491 			break;
492 	}
493 	rewind(f);
494 	return (1);
495 }
496 
497 /*
498  * Prints an individual column (left or right), taking into account
499  * that tabs are variable-width.  Takes a string, the current column
500  * the cursor is on the screen, and the maximum value of the column.
501  * The column value is updated as we go along.
502  */
503 static void
504 printcol(const char *s, size_t *col, const size_t col_max)
505 {
506 
507 	for (; *s && *col < col_max; ++s) {
508 		size_t new_col;
509 
510 		switch (*s) {
511 		case '\t':
512 			/*
513 			 * If rounding to next multiple of eight causes
514 			 * an integer overflow, just return.
515 			 */
516 			if (*col > SIZE_MAX - 8)
517 				return;
518 
519 			/* Round to next multiple of eight. */
520 			new_col = (*col / 8 + 1) * 8;
521 
522 			/*
523 			 * If printing the tab goes past the column
524 			 * width, don't print it and just quit.
525 			 */
526 			if (new_col > col_max)
527 				return;
528 			*col = new_col;
529 			break;
530 		default:
531 			++(*col);
532 		}
533 		putchar(*s);
534 	}
535 }
536 
537 /*
538  * Prompts user to either choose between two strings or edit one, both,
539  * or neither.
540  */
541 static void
542 prompt(const char *s1, const char *s2)
543 {
544 	char *cmd;
545 
546 	/* Print command prompt. */
547 	putchar('%');
548 
549 	/* Get user input. */
550 	for (; (cmd = xfgets(stdin)); free(cmd)) {
551 		const char *p;
552 
553 		/* Skip leading whitespace. */
554 		for (p = cmd; isspace(*p); ++p)
555 			;
556 		switch (*p) {
557 		case 'e':
558 			/* Skip `e'. */
559 			++p;
560 			if (eparse(p, s1, s2) == -1)
561 				goto USAGE;
562 			break;
563 		case 'l':
564 		case '1':
565 			/* Choose left column as-is. */
566 			if (s1 != NULL)
567 				fprintf(outfp, "%s\n", s1);
568 			/* End of command parsing. */
569 			break;
570 		case 'q':
571 			goto QUIT;
572 		case 'r':
573 		case '2':
574 			/* Choose right column as-is. */
575 			if (s2 != NULL)
576 				fprintf(outfp, "%s\n", s2);
577 			/* End of command parsing. */
578 			break;
579 		case 's':
580 			sflag = 1;
581 			goto PROMPT;
582 		case 'v':
583 			sflag = 0;
584 			/* FALLTHROUGH */
585 		default:
586 			/* Interactive usage help. */
587 USAGE:
588 			int_usage();
589 PROMPT:
590 			putchar('%');
591 
592 			/* Prompt user again. */
593 			continue;
594 		}
595 		free(cmd);
596 		return;
597 	}
598 
599 	/*
600 	 * If there was no error, we received an EOF from stdin, so we
601 	 * should quit.
602 	 */
603 QUIT:
604 	fclose(outfp);
605 	exit(0);
606 }
607 
608 /*
609  * Takes two strings, separated by a column divider.  NULL strings are
610  * treated as empty columns.  If the divider is the ` ' character, the
611  * second column is not printed (-l flag).  In this case, the second
612  * string must be NULL.  When the second column is NULL, the divider
613  * does not print the trailing space following the divider character.
614  *
615  * Takes into account that tabs can take multiple columns.
616  */
617 static void
618 println(const char *s1, const char divider, const char *s2)
619 {
620 	size_t col;
621 
622 	/* Print first column.  Skips if s1 == NULL. */
623 	col = 0;
624 	if (s1) {
625 		/* Skip angle bracket and space. */
626 		printcol(s1, &col, width);
627 
628 	}
629 
630 	/* Otherwise, we pad this column up to width. */
631 	for (; col < width; ++col)
632 		putchar(' ');
633 
634 	/* Only print left column. */
635 	if (divider == ' ' && !s2) {
636 		printf(" (\n");
637 		return;
638 	}
639 
640 	/*
641 	 * Print column divider.  If there is no second column, we don't
642 	 * need to add the space for padding.
643 	 */
644 	if (!s2) {
645 		printf(" %c\n", divider);
646 		return;
647 	}
648 	printf(" %c ", divider);
649 	col += 3;
650 
651 	/* Skip angle bracket and space. */
652 	printcol(s2, &col, line_width);
653 
654 	putchar('\n');
655 }
656 
657 /*
658  * Reads a line from file and returns as a string.  If EOF is reached,
659  * NULL is returned.  The returned string must be freed afterwards.
660  */
661 static char *
662 xfgets(FILE *file)
663 {
664 	size_t linecap;
665 	ssize_t l;
666 	char *s;
667 
668 	clearerr(file);
669 	linecap = 0;
670 	s = NULL;
671 
672 	if ((l = getline(&s, &linecap, file)) == -1) {
673 		if (ferror(file))
674 			err(2, "error reading file");
675 		return (NULL);
676 	}
677 
678 	if (s[l-1] == '\n')
679 		s[l-1] = '\0';
680 
681 	return (s);
682 }
683 
684 /*
685  * Parse ed commands from diffpipe and print lines from file1 (lines
686  * to change or delete) or file2 (lines to add or change).
687  * Returns EOF or 0.
688  */
689 static int
690 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
691 {
692 	size_t file1start, file1end, file2start, file2end, n;
693 	/* ed command line and pointer to characters in line */
694 	char *line, *p, *q;
695 	const char *errstr;
696 	char c, cmd;
697 
698 	/* Read ed command. */
699 	if (!(line = xfgets(diffpipe)))
700 		return (EOF);
701 
702 	p = line;
703 	/* Go to character after line number. */
704 	while (isdigit(*p))
705 		++p;
706 	c = *p;
707 	*p++ = 0;
708 	file1start = strtonum(line, 0, INT_MAX, &errstr);
709 	if (errstr)
710 		errx(2, "file1 start is %s: %s", errstr, line);
711 
712 	/* A range is specified for file1. */
713 	if (c == ',') {
714 		q = p;
715 		/* Go to character after file2end. */
716 		while (isdigit(*p))
717 			++p;
718 		c = *p;
719 		*p++ = 0;
720 		file1end = strtonum(q, 0, INT_MAX, &errstr);
721 		if (errstr)
722 			errx(2, "file1 end is %s: %s", errstr, line);
723 		if (file1start > file1end)
724 			errx(2, "invalid line range in file1: %s", line);
725 	} else
726 		file1end = file1start;
727 
728 	cmd = c;
729 	/* Check that cmd is valid. */
730 	if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
731 		errx(2, "ed command not recognized: %c: %s", cmd, line);
732 
733 	q = p;
734 	/* Go to character after line number. */
735 	while (isdigit(*p))
736 		++p;
737 	c = *p;
738 	*p++ = 0;
739 	file2start = strtonum(q, 0, INT_MAX, &errstr);
740 	if (errstr)
741 		errx(2, "file2 start is %s: %s", errstr, line);
742 
743 	/*
744 	 * There should either be a comma signifying a second line
745 	 * number or the line should just end here.
746 	 */
747 	if (c != ',' && c != '\0')
748 		errx(2, "invalid line range in file2: %c: %s", c, line);
749 
750 	if (c == ',') {
751 
752 		file2end = strtonum(p, 0, INT_MAX, &errstr);
753 		if (errstr)
754 			errx(2, "file2 end is %s: %s", errstr, line);
755 		if (file2start >= file2end)
756 			errx(2, "invalid line range in file2: %s", line);
757 	} else
758 		file2end = file2start;
759 
760 	/* Appends happen _after_ stated line. */
761 	if (cmd == 'a') {
762 		if (file1start != file1end)
763 			errx(2, "append cannot have a file1 range: %s",
764 			    line);
765 		if (file1start == SIZE_MAX)
766 			errx(2, "file1 line range too high: %s", line);
767 		file1start = ++file1end;
768 	}
769 	/*
770 	 * I'm not sure what the deal is with the line numbers for
771 	 * deletes, though.
772 	 */
773 	else if (cmd == 'd') {
774 		if (file2start != file2end)
775 			errx(2, "delete cannot have a file2 range: %s",
776 			    line);
777 		if (file2start == SIZE_MAX)
778 			errx(2, "file2 line range too high: %s", line);
779 		file2start = ++file2end;
780 	}
781 
782 	/*
783 	 * Continue reading file1 and file2 until we reach line numbers
784 	 * specified by diff.  Should only happen with -I flag.
785 	 */
786 	for (; file1ln < file1start && file2ln < file2start;
787 	    ++file1ln, ++file2ln) {
788 		char *s1, *s2;
789 
790 		if (!(s1 = xfgets(file1)))
791 			errx(2, "file1 shorter than expected");
792 		if (!(s2 = xfgets(file2)))
793 			errx(2, "file2 shorter than expected");
794 
795 		/* If the -l flag was specified, print only left column. */
796 		if (lflag) {
797 			free(s2);
798 			/*
799 			 * XXX - If -l and -I are both specified, all
800 			 * unchanged or ignored lines are shown with a
801 			 * `(' divider.  This matches GNU sdiff, but I
802 			 * believe it is a bug.  Just check out:
803 			 * gsdiff -l -I '^$' samefile samefile.
804 			 */
805 			if (Iflag)
806 				enqueue(s1, '(', NULL);
807 			else
808 				enqueue(s1, ' ', NULL);
809 		} else
810 			enqueue(s1, ' ', s2);
811 	}
812 	/* Ignore deleted lines. */
813 	for (; file1ln < file1start; ++file1ln) {
814 		char *s;
815 
816 		if (!(s = xfgets(file1)))
817 			errx(2, "file1 shorter than expected");
818 
819 		enqueue(s, '(', NULL);
820 	}
821 	/* Ignore added lines. */
822 	for (; file2ln < file2start; ++file2ln) {
823 		char *s;
824 
825 		if (!(s = xfgets(file2)))
826 			errx(2, "file2 shorter than expected");
827 
828 		/* If -l flag was given, don't print right column. */
829 		if (lflag)
830 			free(s);
831 		else
832 			enqueue(NULL, ')', s);
833 	}
834 
835 	/* Process unmodified or skipped lines. */
836 	processq();
837 
838 	switch (cmd) {
839 	case 'a':
840 		printa(file2, file2end);
841 		n = file2end - file2start + 1;
842 		break;
843 	case 'c':
844 		printc(file1, file1end, file2, file2end);
845 		n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
846 		break;
847 	case 'd':
848 		printd(file1, file1end);
849 		n = file1end - file1start + 1;
850 		break;
851 	default:
852 		errx(2, "invalid diff command: %c: %s", cmd, line);
853 	}
854 	free(line);
855 
856 	/* Skip to next ed line. */
857 	while (n--) {
858 		if (!(line = xfgets(diffpipe)))
859 			errx(2, "diff ended early");
860 		free(line);
861 	}
862 
863 	return (0);
864 }
865 
866 /*
867  * Queues up a diff line.
868  */
869 static void
870 enqueue(char *left, char divider, char *right)
871 {
872 	struct diffline *diffp;
873 
874 	if (!(diffp = malloc(sizeof(struct diffline))))
875 		err(2, "enqueue");
876 	diffp->left = left;
877 	diffp->div = divider;
878 	diffp->right = right;
879 	STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries);
880 }
881 
882 /*
883  * Free a diffline structure and its elements.
884  */
885 static void
886 freediff(struct diffline *diffp)
887 {
888 
889 	free(diffp->left);
890 	free(diffp->right);
891 	free(diffp);
892 }
893 
894 /*
895  * Append second string into first.  Repeated appends to the same string
896  * are cached, making this an O(n) function, where n = strlen(append).
897  */
898 static void
899 astrcat(char **s, const char *append)
900 {
901 	/* Length of string in previous run. */
902 	static size_t offset = 0;
903 	size_t newsiz;
904 	/*
905 	 * String from previous run.  Compared to *s to see if we are
906 	 * dealing with the same string.  If so, we can use offset.
907 	 */
908 	static const char *oldstr = NULL;
909 	char *newstr;
910 
911 	/*
912 	 * First string is NULL, so just copy append.
913 	 */
914 	if (!*s) {
915 		if (!(*s = strdup(append)))
916 			err(2, "astrcat");
917 
918 		/* Keep track of string. */
919 		offset = strlen(*s);
920 		oldstr = *s;
921 
922 		return;
923 	}
924 
925 	/*
926 	 * *s is a string so concatenate.
927 	 */
928 
929 	/* Did we process the same string in the last run? */
930 	/*
931 	 * If this is a different string from the one we just processed
932 	 * cache new string.
933 	 */
934 	if (oldstr != *s) {
935 		offset = strlen(*s);
936 		oldstr = *s;
937 	}
938 
939 	/* Size = strlen(*s) + \n + strlen(append) + '\0'. */
940 	newsiz = offset + 1 + strlen(append) + 1;
941 
942 	/* Resize *s to fit new string. */
943 	newstr = realloc(*s, newsiz);
944 	if (newstr == NULL)
945 		err(2, "astrcat");
946 	*s = newstr;
947 
948 	/* *s + offset should be end of string. */
949 	/* Concatenate. */
950 	strlcpy(*s + offset, "\n", newsiz - offset);
951 	strlcat(*s + offset, append, newsiz - offset);
952 
953 	/* New string length should be exactly newsiz - 1 characters. */
954 	/* Store generated string's values. */
955 	offset = newsiz - 1;
956 	oldstr = *s;
957 }
958 
959 /*
960  * Process diff set queue, printing, prompting, and saving each diff
961  * line stored in queue.
962  */
963 static void
964 processq(void)
965 {
966 	struct diffline *diffp;
967 	char divc, *left, *right;
968 
969 	/* Don't process empty queue. */
970 	if (STAILQ_EMPTY(&diffhead))
971 		return;
972 
973 	/* Remember the divider. */
974 	divc = STAILQ_FIRST(&diffhead)->div;
975 
976 	left = NULL;
977 	right = NULL;
978 	/*
979 	 * Go through set of diffs, concatenating each line in left or
980 	 * right column into two long strings, `left' and `right'.
981 	 */
982 	STAILQ_FOREACH(diffp, &diffhead, diffentries) {
983 		/*
984 		 * Print changed lines if -s was given,
985 		 * print all lines if -s was not given.
986 		 */
987 		if (!sflag || diffp->div == '|' || diffp->div == '<' ||
988 		    diffp->div == '>')
989 			println(diffp->left, diffp->div, diffp->right);
990 
991 		/* Append new lines to diff set. */
992 		if (diffp->left)
993 			astrcat(&left, diffp->left);
994 		if (diffp->right)
995 			astrcat(&right, diffp->right);
996 	}
997 
998 	/* Empty queue and free each diff line and its elements. */
999 	while (!STAILQ_EMPTY(&diffhead)) {
1000 		diffp = STAILQ_FIRST(&diffhead);
1001 		STAILQ_REMOVE_HEAD(&diffhead, diffentries);
1002 		freediff(diffp);
1003 	}
1004 
1005 	/* Write to outfp, prompting user if lines are different. */
1006 	if (outfp)
1007 		switch (divc) {
1008 		case ' ': case '(': case ')':
1009 			fprintf(outfp, "%s\n", left);
1010 			break;
1011 		case '|': case '<': case '>':
1012 			prompt(left, right);
1013 			break;
1014 		default:
1015 			errx(2, "invalid divider: %c", divc);
1016 		}
1017 
1018 	/* Free left and right. */
1019 	free(left);
1020 	free(right);
1021 }
1022 
1023 /*
1024  * Print lines following an (a)ppend command.
1025  */
1026 static void
1027 printa(FILE *file, size_t line2)
1028 {
1029 	char *line;
1030 
1031 	for (; file2ln <= line2; ++file2ln) {
1032 		if (!(line = xfgets(file)))
1033 			errx(2, "append ended early");
1034 		enqueue(NULL, '>', line);
1035 	}
1036 	processq();
1037 }
1038 
1039 /*
1040  * Print lines following a (c)hange command, from file1ln to file1end
1041  * and from file2ln to file2end.
1042  */
1043 static void
1044 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
1045 {
1046 	struct fileline {
1047 		STAILQ_ENTRY(fileline)	 fileentries;
1048 		char			*line;
1049 	};
1050 	STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead);
1051 
1052 	/* Read lines to be deleted. */
1053 	for (; file1ln <= file1end; ++file1ln) {
1054 		struct fileline *linep;
1055 		char *line1;
1056 
1057 		/* Read lines from both. */
1058 		if (!(line1 = xfgets(file1)))
1059 			errx(2, "error reading file1 in delete in change");
1060 
1061 		/* Add to delete queue. */
1062 		if (!(linep = malloc(sizeof(struct fileline))))
1063 			err(2, "printc");
1064 		linep->line = line1;
1065 		STAILQ_INSERT_TAIL(&delqhead, linep, fileentries);
1066 	}
1067 
1068 	/* Process changed lines.. */
1069 	for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end;
1070 	    ++file2ln) {
1071 		struct fileline *del;
1072 		char *add;
1073 
1074 		/* Get add line. */
1075 		if (!(add = xfgets(file2)))
1076 			errx(2, "error reading add in change");
1077 
1078 		del = STAILQ_FIRST(&delqhead);
1079 		enqueue(del->line, '|', add);
1080 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1081 		/*
1082 		 * Free fileline structure but not its elements since
1083 		 * they are queued up.
1084 		 */
1085 		free(del);
1086 	}
1087 	processq();
1088 
1089 	/* Process remaining lines to add. */
1090 	for (; file2ln <= file2end; ++file2ln) {
1091 		char *add;
1092 
1093 		/* Get add line. */
1094 		if (!(add = xfgets(file2)))
1095 			errx(2, "error reading add in change");
1096 
1097 		enqueue(NULL, '>', add);
1098 	}
1099 	processq();
1100 
1101 	/* Process remaining lines to delete. */
1102 	while (!STAILQ_EMPTY(&delqhead)) {
1103 		struct fileline *filep;
1104 
1105 		filep = STAILQ_FIRST(&delqhead);
1106 		enqueue(filep->line, '<', NULL);
1107 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1108 		free(filep);
1109 	}
1110 	processq();
1111 }
1112 
1113 /*
1114  * Print deleted lines from file, from file1ln to file1end.
1115  */
1116 static void
1117 printd(FILE *file1, size_t file1end)
1118 {
1119 	char *line1;
1120 
1121 	/* Print out lines file1ln to line2. */
1122 	for (; file1ln <= file1end; ++file1ln) {
1123 		if (!(line1 = xfgets(file1)))
1124 			errx(2, "file1 ended early in delete");
1125 		enqueue(line1, '<', NULL);
1126 	}
1127 	processq();
1128 }
1129 
1130 /*
1131  * Interactive mode usage.
1132  */
1133 static void
1134 int_usage(void)
1135 {
1136 
1137 	puts("e:\tedit blank diff\n"
1138 	    "eb:\tedit both diffs concatenated\n"
1139 	    "el:\tedit left diff\n"
1140 	    "er:\tedit right diff\n"
1141 	    "l | 1:\tchoose left diff\n"
1142 	    "r | 2:\tchoose right diff\n"
1143 	    "s:\tsilent mode--don't print identical lines\n"
1144 	    "v:\tverbose mode--print identical lines\n"
1145 	    "q:\tquit");
1146 }
1147 
1148 static void
1149 usage(void)
1150 {
1151 
1152 	fprintf(stderr,
1153 	    "usage: sdiff [-abdilstHW] [-I regexp] [-o outfile] [-w width] file1"
1154 	    " file2\n");
1155 	exit(2);
1156 }
1157