xref: /freebsd/usr.bin/sdiff/sdiff.c (revision a58ece87303f882367105c92a27268ed6befa655)
1 /*	$OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */
2 
3 /*
4  * Written by Raymond Lai <ray@cyth.net>.
5  * Public domain.
6  */
7 
8 #include <sys/param.h>
9 #include <sys/queue.h>
10 #include <sys/stat.h>
11 #include <sys/wait.h>
12 
13 #include <ctype.h>
14 #include <err.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <getopt.h>
18 #include <limits.h>
19 #include <paths.h>
20 #include <stdbool.h>
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26 
27 #include "extern.h"
28 
29 static char diff_path[] = "/usr/bin/diff";
30 
31 #define WIDTH 126
32 /*
33  * Each column must be at least one character wide, plus three
34  * characters between the columns (space, [<|>], space).
35  */
36 #define WIDTH_MIN 5
37 
38 /* 3 kilobytes of chars */
39 #define MAX_CHECK 768
40 
41 /* A single diff line. */
42 struct diffline {
43 	STAILQ_ENTRY(diffline) diffentries;
44 	char	*left;
45 	char	 div;
46 	char	*right;
47 };
48 
49 static void astrcat(char **, const char *);
50 static void enqueue(char *, char, char *);
51 static char *mktmpcpy(const char *);
52 static int istextfile(FILE *);
53 static int bindiff(FILE *, char *, FILE *, char *);
54 static void freediff(struct diffline *);
55 static void int_usage(void);
56 static int parsecmd(FILE *, FILE *, FILE *);
57 static void printa(FILE *, size_t);
58 static void printc(FILE *, size_t, FILE *, size_t);
59 static void printcol(const char *, size_t *, const size_t);
60 static void printd(FILE *, size_t);
61 static void println(const char *, const char, const char *);
62 static void processq(void);
63 static void prompt(const char *, const char *);
64 static void usage(void) __dead2;
65 static char *xfgets(FILE *);
66 
67 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
68 static size_t line_width;	/* width of a line (two columns and divider) */
69 static size_t width;		/* width of each column */
70 static size_t file1ln, file2ln;	/* line number of file1 and file2 */
71 static bool Iflag;		/* ignore sets matching regexp */
72 static bool lflag;		/* print only left column for identical lines */
73 static bool sflag;		/* skip identical lines */
74 static bool tflag;		/* expand tabs */
75 static int tabsize = 8;		/* tab size */
76 FILE *outfp;			/* file to save changes to */
77 const char *tmpdir;		/* TMPDIR or /tmp */
78 
79 enum {
80 	HELP_OPT = CHAR_MAX + 1,
81 	NORMAL_OPT,
82 	FCASE_SENSITIVE_OPT,
83 	FCASE_IGNORE_OPT,
84 	STRIPCR_OPT,
85 	TSIZE_OPT,
86 	DIFFPROG_OPT,
87 };
88 
89 static struct option longopts[] = {
90 	/* options only processed in sdiff */
91 	{ "suppress-common-lines",	no_argument,		NULL,	's' },
92 	{ "width",			required_argument,	NULL,	'w' },
93 
94 	{ "output",			required_argument,	NULL,	'o' },
95 	{ "diff-program",		required_argument,	NULL,	DIFFPROG_OPT },
96 
97 	/* Options processed by diff. */
98 	{ "ignore-file-name-case",	no_argument,		NULL,	FCASE_IGNORE_OPT },
99 	{ "no-ignore-file-name-case",	no_argument,		NULL,	FCASE_SENSITIVE_OPT },
100 	{ "strip-trailing-cr",		no_argument,		NULL,	STRIPCR_OPT },
101 	{ "tabsize",			required_argument,	NULL,	TSIZE_OPT },
102 	{ "help",			no_argument,		NULL,	HELP_OPT },
103 	{ "text",			no_argument,		NULL,	'a' },
104 	{ "ignore-blank-lines",		no_argument,		NULL,	'B' },
105 	{ "ignore-space-change",	no_argument,		NULL,	'b' },
106 	{ "minimal",			no_argument,		NULL,	'd' },
107 	{ "ignore-tab-expansion",	no_argument,		NULL,	'E' },
108 	{ "ignore-matching-lines",	required_argument,	NULL,	'I' },
109 	{ "ignore-case",		no_argument,		NULL,	'i' },
110 	{ "left-column",		no_argument,		NULL,	'l' },
111 	{ "expand-tabs",		no_argument,		NULL,	't' },
112 	{ "speed-large-files",		no_argument,		NULL,	'H' },
113 	{ "ignore-all-space",		no_argument,		NULL,	'W' },
114 
115 	{ NULL,				0,			NULL,	'\0'}
116 };
117 
118 static const char *help_msg[] = {
119 	"usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
120 	"-l, --left-column: only print the left column for identical lines.",
121 	"-o OUTFILE, --output=OUTFILE: interactively merge file1 and file2 into outfile.",
122 	"-s, --suppress-common-lines: skip identical lines.",
123 	"-w WIDTH, --width=WIDTH: print a maximum of WIDTH characters on each line.",
124 	"",
125 	"Options passed to diff(1) are:",
126 	"\t-a, --text: treat file1 and file2 as text files.",
127 	"\t-b, --ignore-trailing-cr: ignore trailing blank spaces.",
128 	"\t-d, --minimal: minimize diff size.",
129 	"\t-I RE, --ignore-matching-lines=RE: ignore changes whose line matches RE.",
130 	"\t-i, --ignore-case: do a case-insensitive comparison.",
131 	"\t-t, --expand-tabs: expand tabs to spaces.",
132 	"\t-W, --ignore-all-space: ignore all whitespace.",
133 	"\t--speed-large-files: assume large file with scattered changes.",
134 	"\t--strip-trailing-cr: strip trailing carriage return.",
135 	"\t--ignore-file-name-case: ignore case of file names.",
136 	"\t--no-ignore-file-name-case: do not ignore file name case",
137 	"\t--tabsize NUM: change size of tabs (default 8.)",
138 
139 	NULL,
140 };
141 
142 /*
143  * Create temporary file if source_file is not a regular file.
144  * Returns temporary file name if one was malloced, NULL if unnecessary.
145  */
146 static char *
147 mktmpcpy(const char *source_file)
148 {
149 	struct stat sb;
150 	ssize_t rcount;
151 	int ifd, ofd;
152 	u_char buf[BUFSIZ];
153 	char *target_file;
154 
155 	/* Open input and output. */
156 	ifd = open(source_file, O_RDONLY, 0);
157 	/* File was opened successfully. */
158 	if (ifd != -1) {
159 		if (fstat(ifd, &sb) == -1)
160 			err(2, "error getting file status from %s", source_file);
161 
162 		/* Regular file. */
163 		if (S_ISREG(sb.st_mode)) {
164 			close(ifd);
165 			return (NULL);
166 		}
167 	} else {
168 		/* If ``-'' does not exist the user meant stdin. */
169 		if (errno == ENOENT && strcmp(source_file, "-") == 0)
170 			ifd = STDIN_FILENO;
171 		else
172 			err(2, "error opening %s", source_file);
173 	}
174 
175 	/* Not a regular file, so copy input into temporary file. */
176 	if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
177 		err(2, "asprintf");
178 	if ((ofd = mkstemp(target_file)) == -1) {
179 		warn("error opening %s", target_file);
180 		goto FAIL;
181 	}
182 	while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
183 	    rcount != 0) {
184 		ssize_t wcount;
185 
186 		wcount = write(ofd, buf, (size_t)rcount);
187 		if (-1 == wcount || rcount != wcount) {
188 			warn("error writing to %s", target_file);
189 			goto FAIL;
190 		}
191 	}
192 	if (rcount == -1) {
193 		warn("error reading from %s", source_file);
194 		goto FAIL;
195 	}
196 
197 	close(ifd);
198 	close(ofd);
199 
200 	return (target_file);
201 
202 FAIL:
203 	unlink(target_file);
204 	exit(2);
205 }
206 
207 int
208 main(int argc, char **argv)
209 {
210 	FILE *diffpipe, *file1, *file2;
211 	size_t diffargc = 0, flagc = 0, wval = WIDTH;
212 	int ch, fd[2], i, ret, status;
213 	pid_t pid;
214 	const char *errstr, *outfile = NULL;
215 	char **diffargv, *diffprog = diff_path, *flagv;
216 	char *filename1, *filename2, *tmp1, *tmp2, *s1, *s2;
217 	char I_arg[] = "-I";
218 	char speed_lf[] = "--speed-large-files";
219 
220 	/*
221 	 * Process diff flags.
222 	 */
223 	/*
224 	 * Allocate memory for diff arguments and NULL.
225 	 * Each flag has at most one argument, so doubling argc gives an
226 	 * upper limit of how many diff args can be passed.  argv[0],
227 	 * file1, and file2 won't have arguments so doubling them will
228 	 * waste some memory; however we need an extra space for the
229 	 * NULL at the end, so it sort of works out.
230 	 */
231 	if ((diffargv = calloc(argc, sizeof(char *) * 2)) == NULL)
232 		err(2, NULL);
233 
234 	/* Add first argument, the program name. */
235 	diffargv[diffargc++] = diffprog;
236 
237 	/* create a dynamic string for merging single-character options */
238 	if ((flagv = malloc(flagc + 2)) == NULL)
239 		err(2, NULL);
240 	flagv[flagc] = '-';
241 	flagv[flagc + 1] = '\0';
242 	diffargv[diffargc++] = flagv;
243 
244 	while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
245 	    longopts, NULL)) != -1) {
246 		switch (ch) {
247 		/* only compatible --long-name-form with diff */
248 		case FCASE_IGNORE_OPT:
249 		case FCASE_SENSITIVE_OPT:
250 		case STRIPCR_OPT:
251 		case 'S':
252 		break;
253 		/* combine no-arg single switches */
254 		case 'a':
255 		case 'B':
256 		case 'b':
257 		case 'd':
258 		case 'E':
259 		case 'i':
260 		case 'W':
261 			flagc++;
262 			flagv = realloc(flagv, flagc + 2);
263 			/*
264 			 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
265 			 */
266 			flagv[flagc] = ch == 'W' ? 'w' : ch;
267 			flagv[flagc + 1] = '\0';
268 			break;
269 		case 'H':
270 			diffargv[diffargc++] = speed_lf;
271 			break;
272 		case DIFFPROG_OPT:
273 			diffargv[0] = diffprog = optarg;
274 			break;
275 		case 'I':
276 			Iflag = true;
277 			diffargv[diffargc++] = I_arg;
278 			diffargv[diffargc++] = optarg;
279 			break;
280 		case 'l':
281 			lflag = true;
282 			break;
283 		case 'o':
284 			outfile = optarg;
285 			break;
286 		case 's':
287 			sflag = true;
288 			break;
289 		case 't':
290 			tflag = true;
291 			break;
292 		case 'w':
293 			wval = strtonum(optarg, WIDTH_MIN,
294 			    INT_MAX, &errstr);
295 			if (errstr)
296 				errx(2, "width is %s: %s", errstr, optarg);
297 			break;
298 		case HELP_OPT:
299 			for (i = 0; help_msg[i] != NULL; i++)
300 				printf("%s\n", help_msg[i]);
301 			exit(0);
302 			break;
303 		case TSIZE_OPT:
304 			tabsize = strtonum(optarg, 1, INT_MAX, &errstr);
305 			if (errstr)
306 				errx(2, "tabsize is %s: %s", errstr, optarg);
307 			break;
308 		default:
309 			usage();
310 			break;
311 		}
312 	}
313 
314 	/* no single-character options were used */
315 	if (flagc == 0) {
316 		memmove(diffargv + 1, diffargv + 2,
317 		    sizeof(char *) * (diffargc - 2));
318 		diffargc--;
319 		free(flagv);
320 	}
321 
322 	argc -= optind;
323 	argv += optind;
324 
325 	if (argc != 2)
326 		usage();
327 
328 	if (outfile && (outfp = fopen(outfile, "w")) == NULL)
329 		err(2, "could not open: %s", optarg);
330 
331 	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
332 		tmpdir = _PATH_TMP;
333 
334 	filename1 = argv[0];
335 	filename2 = argv[1];
336 
337 	/*
338 	 * Create temporary files for diff and sdiff to share if file1
339 	 * or file2 are not regular files.  This allows sdiff and diff
340 	 * to read the same inputs if one or both inputs are stdin.
341 	 *
342 	 * If any temporary files were created, their names would be
343 	 * saved in tmp1 or tmp2.  tmp1 should never equal tmp2.
344 	 */
345 	tmp1 = tmp2 = NULL;
346 	/* file1 and file2 are the same, so copy to same temp file. */
347 	if (strcmp(filename1, filename2) == 0) {
348 		if ((tmp1 = mktmpcpy(filename1)))
349 			filename1 = filename2 = tmp1;
350 	/* Copy file1 and file2 into separate temp files. */
351 	} else {
352 		if ((tmp1 = mktmpcpy(filename1)))
353 			filename1 = tmp1;
354 		if ((tmp2 = mktmpcpy(filename2)))
355 			filename2 = tmp2;
356 	}
357 
358 	if ((file1 = fopen(filename1, "r")) == NULL)
359 		err(2, "could not open %s", filename1);
360 	if ((file2 = fopen(filename2, "r")) == NULL)
361 		err(2, "could not open %s", filename2);
362 	if (!istextfile(file1) || !istextfile(file2)) {
363 		ret = bindiff(file1, filename1, file2, filename2);
364 		goto done;
365 	}
366 
367 	diffargv[diffargc++] = filename1;
368 	diffargv[diffargc++] = filename2;
369 	/* Add NULL to end of array to indicate end of array. */
370 	diffargv[diffargc++] = NULL;
371 
372 	/* Subtract column divider and divide by two. */
373 	width = (wval - 3) / 2;
374 	/* Make sure line_width can fit in size_t. */
375 	if (width > (SIZE_MAX - 3) / 2)
376 		errx(2, "width is too large: %zu", width);
377 	line_width = width * 2 + 3;
378 
379 	if (pipe(fd))
380 		err(2, "pipe");
381 
382 	if ((pid = fork()) < 0)
383 		err(1, "fork()");
384 	if (pid == 0) {
385 		/* child */
386 		/* We don't read from the pipe. */
387 		close(fd[0]);
388 		if (dup2(fd[1], STDOUT_FILENO) != STDOUT_FILENO)
389 			_exit(2);
390 		/* Free unused descriptor. */
391 		close(fd[1]);
392 		execvp(diffprog, diffargv);
393 		_exit(2);
394 	}
395 
396 	/* parent */
397 	/* We don't write to the pipe. */
398 	close(fd[1]);
399 
400 	/* Open pipe to diff command. */
401 	if ((diffpipe = fdopen(fd[0], "r")) == NULL)
402 		err(2, "could not open diff pipe");
403 
404 	/* Line numbers start at one. */
405 	file1ln = file2ln = 1;
406 
407 	/* Read and parse diff output. */
408 	while (parsecmd(diffpipe, file1, file2) != EOF)
409 		;
410 	fclose(diffpipe);
411 
412 	/* Wait for diff to exit. */
413 	if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
414 	    WEXITSTATUS(status) >= 2)
415 		errx(2, "diff exited abnormally");
416 	ret = WEXITSTATUS(status);
417 
418 	/* No more diffs, so enqueue common lines. */
419 	if (lflag)
420 		while ((s1 = xfgets(file1)))
421 			enqueue(s1, ' ', NULL);
422 	else
423 		for (;;) {
424 			s1 = xfgets(file1);
425 			s2 = xfgets(file2);
426 			if (s1 || s2)
427 				enqueue(s1, ' ', s2);
428 			else
429 				break;
430 		}
431 	fclose(file1);
432 	fclose(file2);
433 	/* Process unmodified lines. */
434 	processq();
435 
436 done:
437 	/* Delete and free unneeded temporary files. */
438 	if (tmp1 != NULL) {
439 		if (unlink(tmp1) != 0)
440 			warn("failed to delete %s", tmp1);
441 		free(tmp1);
442 	}
443 	if (tmp2 != NULL) {
444 		if (unlink(tmp2) != 0)
445 			warn("failed to delete %s", tmp2);
446 		free(tmp2);
447 	}
448 
449 	/* Return diff exit status. */
450 	free(diffargv);
451 	if (flagc > 0)
452 		free(flagv);
453 	return (ret);
454 }
455 
456 /*
457  * When sdiff detects a binary file as input.
458  */
459 static int
460 bindiff(FILE *f1, char *fn1, FILE *f2, char *fn2)
461 {
462 	int ch1, ch2;
463 
464 	flockfile(f1);
465 	flockfile(f2);
466 	do {
467 		ch1 = getc_unlocked(f1);
468 		ch2 = getc_unlocked(f2);
469 	} while (ch1 != EOF && ch2 != EOF && ch1 == ch2);
470 	funlockfile(f2);
471 	funlockfile(f1);
472 	if (ferror(f1)) {
473 		warn("%s", fn1);
474 		return (2);
475 	}
476 	if (ferror(f2)) {
477 		warn("%s", fn2);
478 		return (2);
479 	}
480 	if (ch1 != EOF || ch2 != EOF) {
481 		printf("Binary files %s and %s differ\n", fn1, fn2);
482 		return (1);
483 	}
484 	return (0);
485 }
486 
487 /*
488  * Checks whether a file appears to be a text file.
489  */
490 static int
491 istextfile(FILE *f)
492 {
493 	int	ch, i;
494 
495 	if (f == NULL)
496 		return (1);
497 	rewind(f);
498 	for (i = 0; i <= MAX_CHECK; i++) {
499 		ch = fgetc(f);
500 		if (ch == '\0') {
501 			rewind(f);
502 			return (0);
503 		}
504 		if (ch == EOF)
505 			break;
506 	}
507 	rewind(f);
508 	return (1);
509 }
510 
511 /*
512  * Prints an individual column (left or right), taking into account
513  * that tabs are variable-width.  Takes a string, the current column
514  * the cursor is on the screen, and the maximum value of the column.
515  * The column value is updated as we go along.
516  */
517 static void
518 printcol(const char *s, size_t *col, const size_t col_max)
519 {
520 
521 	for (; *s && *col < col_max; ++s) {
522 		size_t new_col;
523 
524 		switch (*s) {
525 		case '\t':
526 			/*
527 			 * If rounding to next multiple of eight causes
528 			 * an integer overflow, just return.
529 			 */
530 			if (*col > SIZE_MAX - tabsize)
531 				return;
532 
533 			/* Round to next multiple of eight. */
534 			new_col = (*col / tabsize + 1) * tabsize;
535 
536 			/*
537 			 * If printing the tab goes past the column
538 			 * width, don't print it and just quit.
539 			 */
540 			if (new_col > col_max)
541 				return;
542 
543 			if (tflag) {
544 				do {
545 					putchar(' ');
546 				} while (++*col < new_col);
547 			} else {
548 				putchar(*s);
549 				*col = new_col;
550 			}
551 			break;
552 		default:
553 			++*col;
554 			putchar(*s);
555 		}
556 	}
557 }
558 
559 /*
560  * Prompts user to either choose between two strings or edit one, both,
561  * or neither.
562  */
563 static void
564 prompt(const char *s1, const char *s2)
565 {
566 	char *cmd;
567 
568 	/* Print command prompt. */
569 	putchar('%');
570 
571 	/* Get user input. */
572 	for (; (cmd = xfgets(stdin)); free(cmd)) {
573 		const char *p;
574 
575 		/* Skip leading whitespace. */
576 		for (p = cmd; isspace((unsigned char)*p); ++p)
577 			;
578 		switch (*p) {
579 		case 'e':
580 			/* Skip `e'. */
581 			++p;
582 			if (eparse(p, s1, s2) == -1)
583 				goto USAGE;
584 			break;
585 		case 'l':
586 		case '1':
587 			/* Choose left column as-is. */
588 			if (s1 != NULL)
589 				fprintf(outfp, "%s\n", s1);
590 			/* End of command parsing. */
591 			break;
592 		case 'q':
593 			goto QUIT;
594 		case 'r':
595 		case '2':
596 			/* Choose right column as-is. */
597 			if (s2 != NULL)
598 				fprintf(outfp, "%s\n", s2);
599 			/* End of command parsing. */
600 			break;
601 		case 's':
602 			sflag = true;
603 			goto PROMPT;
604 		case 'v':
605 			sflag = false;
606 			/* FALLTHROUGH */
607 		default:
608 			/* Interactive usage help. */
609 USAGE:
610 			int_usage();
611 PROMPT:
612 			putchar('%');
613 
614 			/* Prompt user again. */
615 			continue;
616 		}
617 		free(cmd);
618 		return;
619 	}
620 
621 	/*
622 	 * If there was no error, we received an EOF from stdin, so we
623 	 * should quit.
624 	 */
625 QUIT:
626 	fclose(outfp);
627 	exit(0);
628 }
629 
630 /*
631  * Takes two strings, separated by a column divider.  NULL strings are
632  * treated as empty columns.  If the divider is the ` ' character, the
633  * second column is not printed (-l flag).  In this case, the second
634  * string must be NULL.  When the second column is NULL, the divider
635  * does not print the trailing space following the divider character.
636  *
637  * Takes into account that tabs can take multiple columns.
638  */
639 static void
640 println(const char *s1, const char divider, const char *s2)
641 {
642 	size_t col;
643 
644 	/* Print first column.  Skips if s1 == NULL. */
645 	col = 0;
646 	if (s1) {
647 		/* Skip angle bracket and space. */
648 		printcol(s1, &col, width);
649 
650 	}
651 
652 	/* Otherwise, we pad this column up to width. */
653 	for (; col < width; ++col)
654 		putchar(' ');
655 
656 	/* Only print left column. */
657 	if (divider == ' ' && !s2) {
658 		printf(" (\n");
659 		return;
660 	}
661 
662 	/*
663 	 * Print column divider.  If there is no second column, we don't
664 	 * need to add the space for padding.
665 	 */
666 	if (!s2) {
667 		printf(" %c\n", divider);
668 		return;
669 	}
670 	printf(" %c ", divider);
671 	col += 3;
672 
673 	/* Skip angle bracket and space. */
674 	printcol(s2, &col, line_width);
675 
676 	putchar('\n');
677 }
678 
679 /*
680  * Reads a line from file and returns as a string.  If EOF is reached,
681  * NULL is returned.  The returned string must be freed afterwards.
682  */
683 static char *
684 xfgets(FILE *file)
685 {
686 	size_t linecap;
687 	ssize_t l;
688 	char *s;
689 
690 	clearerr(file);
691 	linecap = 0;
692 	s = NULL;
693 
694 	if ((l = getline(&s, &linecap, file)) == -1) {
695 		if (ferror(file))
696 			err(2, "error reading file");
697 		return (NULL);
698 	}
699 
700 	if (s[l-1] == '\n')
701 		s[l-1] = '\0';
702 
703 	return (s);
704 }
705 
706 /*
707  * Parse ed commands from diffpipe and print lines from file1 (lines
708  * to change or delete) or file2 (lines to add or change).
709  * Returns EOF or 0.
710  */
711 static int
712 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
713 {
714 	size_t file1start, file1end, file2start, file2end, n;
715 	/* ed command line and pointer to characters in line */
716 	char *line, *p, *q;
717 	const char *errstr;
718 	char c, cmd;
719 
720 	/* Read ed command. */
721 	if (!(line = xfgets(diffpipe)))
722 		return (EOF);
723 
724 	p = line;
725 	/* Go to character after line number. */
726 	while (isdigit((unsigned char)*p))
727 		++p;
728 	c = *p;
729 	*p++ = 0;
730 	file1start = strtonum(line, 0, INT_MAX, &errstr);
731 	if (errstr)
732 		errx(2, "file1 start is %s: %s", errstr, line);
733 
734 	/* A range is specified for file1. */
735 	if (c == ',') {
736 		q = p;
737 		/* Go to character after file2end. */
738 		while (isdigit((unsigned char)*p))
739 			++p;
740 		c = *p;
741 		*p++ = 0;
742 		file1end = strtonum(q, 0, INT_MAX, &errstr);
743 		if (errstr)
744 			errx(2, "file1 end is %s: %s", errstr, line);
745 		if (file1start > file1end)
746 			errx(2, "invalid line range in file1: %s", line);
747 	} else
748 		file1end = file1start;
749 
750 	cmd = c;
751 	/* Check that cmd is valid. */
752 	if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
753 		errx(2, "ed command not recognized: %c: %s", cmd, line);
754 
755 	q = p;
756 	/* Go to character after line number. */
757 	while (isdigit((unsigned char)*p))
758 		++p;
759 	c = *p;
760 	*p++ = 0;
761 	file2start = strtonum(q, 0, INT_MAX, &errstr);
762 	if (errstr)
763 		errx(2, "file2 start is %s: %s", errstr, line);
764 
765 	/*
766 	 * There should either be a comma signifying a second line
767 	 * number or the line should just end here.
768 	 */
769 	if (c != ',' && c != '\0')
770 		errx(2, "invalid line range in file2: %c: %s", c, line);
771 
772 	if (c == ',') {
773 
774 		file2end = strtonum(p, 0, INT_MAX, &errstr);
775 		if (errstr)
776 			errx(2, "file2 end is %s: %s", errstr, line);
777 		if (file2start >= file2end)
778 			errx(2, "invalid line range in file2: %s", line);
779 	} else
780 		file2end = file2start;
781 
782 	/* Appends happen _after_ stated line. */
783 	if (cmd == 'a') {
784 		if (file1start != file1end)
785 			errx(2, "append cannot have a file1 range: %s",
786 			    line);
787 		if (file1start == SIZE_MAX)
788 			errx(2, "file1 line range too high: %s", line);
789 		file1start = ++file1end;
790 	}
791 	/*
792 	 * I'm not sure what the deal is with the line numbers for
793 	 * deletes, though.
794 	 */
795 	else if (cmd == 'd') {
796 		if (file2start != file2end)
797 			errx(2, "delete cannot have a file2 range: %s",
798 			    line);
799 		if (file2start == SIZE_MAX)
800 			errx(2, "file2 line range too high: %s", line);
801 		file2start = ++file2end;
802 	}
803 
804 	/*
805 	 * Continue reading file1 and file2 until we reach line numbers
806 	 * specified by diff.  Should only happen with -I flag.
807 	 */
808 	for (; file1ln < file1start && file2ln < file2start;
809 	    ++file1ln, ++file2ln) {
810 		char *s1, *s2;
811 
812 		if (!(s1 = xfgets(file1)))
813 			errx(2, "file1 shorter than expected");
814 		if (!(s2 = xfgets(file2)))
815 			errx(2, "file2 shorter than expected");
816 
817 		/* If the -l flag was specified, print only left column. */
818 		if (lflag) {
819 			free(s2);
820 			/*
821 			 * XXX - If -l and -I are both specified, all
822 			 * unchanged or ignored lines are shown with a
823 			 * `(' divider.  This matches GNU sdiff, but I
824 			 * believe it is a bug.  Just check out:
825 			 * gsdiff -l -I '^$' samefile samefile.
826 			 */
827 			if (Iflag)
828 				enqueue(s1, '(', NULL);
829 			else
830 				enqueue(s1, ' ', NULL);
831 		} else
832 			enqueue(s1, ' ', s2);
833 	}
834 	/* Ignore deleted lines. */
835 	for (; file1ln < file1start; ++file1ln) {
836 		char *s;
837 
838 		if (!(s = xfgets(file1)))
839 			errx(2, "file1 shorter than expected");
840 
841 		enqueue(s, '(', NULL);
842 	}
843 	/* Ignore added lines. */
844 	for (; file2ln < file2start; ++file2ln) {
845 		char *s;
846 
847 		if (!(s = xfgets(file2)))
848 			errx(2, "file2 shorter than expected");
849 
850 		/* If -l flag was given, don't print right column. */
851 		if (lflag)
852 			free(s);
853 		else
854 			enqueue(NULL, ')', s);
855 	}
856 
857 	/* Process unmodified or skipped lines. */
858 	processq();
859 
860 	switch (cmd) {
861 	case 'a':
862 		printa(file2, file2end);
863 		n = file2end - file2start + 1;
864 		break;
865 	case 'c':
866 		printc(file1, file1end, file2, file2end);
867 		n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
868 		break;
869 	case 'd':
870 		printd(file1, file1end);
871 		n = file1end - file1start + 1;
872 		break;
873 	default:
874 		errx(2, "invalid diff command: %c: %s", cmd, line);
875 	}
876 	free(line);
877 
878 	/* Skip to next ed line. */
879 	while (n--) {
880 		if (!(line = xfgets(diffpipe)))
881 			errx(2, "diff ended early");
882 		free(line);
883 	}
884 
885 	return (0);
886 }
887 
888 /*
889  * Queues up a diff line.
890  */
891 static void
892 enqueue(char *left, char divider, char *right)
893 {
894 	struct diffline *diffp;
895 
896 	if (!(diffp = malloc(sizeof(struct diffline))))
897 		err(2, "enqueue");
898 	diffp->left = left;
899 	diffp->div = divider;
900 	diffp->right = right;
901 	STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries);
902 }
903 
904 /*
905  * Free a diffline structure and its elements.
906  */
907 static void
908 freediff(struct diffline *diffp)
909 {
910 
911 	free(diffp->left);
912 	free(diffp->right);
913 	free(diffp);
914 }
915 
916 /*
917  * Append second string into first.  Repeated appends to the same string
918  * are cached, making this an O(n) function, where n = strlen(append).
919  */
920 static void
921 astrcat(char **s, const char *append)
922 {
923 	/* Length of string in previous run. */
924 	static size_t offset = 0;
925 	size_t newsiz;
926 	/*
927 	 * String from previous run.  Compared to *s to see if we are
928 	 * dealing with the same string.  If so, we can use offset.
929 	 */
930 	static const char *oldstr = NULL;
931 	char *newstr;
932 
933 	/*
934 	 * First string is NULL, so just copy append.
935 	 */
936 	if (!*s) {
937 		if (!(*s = strdup(append)))
938 			err(2, "astrcat");
939 
940 		/* Keep track of string. */
941 		offset = strlen(*s);
942 		oldstr = *s;
943 
944 		return;
945 	}
946 
947 	/*
948 	 * *s is a string so concatenate.
949 	 */
950 
951 	/* Did we process the same string in the last run? */
952 	/*
953 	 * If this is a different string from the one we just processed
954 	 * cache new string.
955 	 */
956 	if (oldstr != *s) {
957 		offset = strlen(*s);
958 		oldstr = *s;
959 	}
960 
961 	/* Size = strlen(*s) + \n + strlen(append) + '\0'. */
962 	newsiz = offset + 1 + strlen(append) + 1;
963 
964 	/* Resize *s to fit new string. */
965 	newstr = realloc(*s, newsiz);
966 	if (newstr == NULL)
967 		err(2, "astrcat");
968 	*s = newstr;
969 
970 	/* *s + offset should be end of string. */
971 	/* Concatenate. */
972 	strlcpy(*s + offset, "\n", newsiz - offset);
973 	strlcat(*s + offset, append, newsiz - offset);
974 
975 	/* New string length should be exactly newsiz - 1 characters. */
976 	/* Store generated string's values. */
977 	offset = newsiz - 1;
978 	oldstr = *s;
979 }
980 
981 /*
982  * Process diff set queue, printing, prompting, and saving each diff
983  * line stored in queue.
984  */
985 static void
986 processq(void)
987 {
988 	struct diffline *diffp;
989 	char divc, *left, *right;
990 
991 	/* Don't process empty queue. */
992 	if (STAILQ_EMPTY(&diffhead))
993 		return;
994 
995 	/* Remember the divider. */
996 	divc = STAILQ_FIRST(&diffhead)->div;
997 
998 	left = NULL;
999 	right = NULL;
1000 	/*
1001 	 * Go through set of diffs, concatenating each line in left or
1002 	 * right column into two long strings, `left' and `right'.
1003 	 */
1004 	STAILQ_FOREACH(diffp, &diffhead, diffentries) {
1005 		/*
1006 		 * Print changed lines if -s was given,
1007 		 * print all lines if -s was not given.
1008 		 */
1009 		if (!sflag || diffp->div == '|' || diffp->div == '<' ||
1010 		    diffp->div == '>')
1011 			println(diffp->left, diffp->div, diffp->right);
1012 
1013 		/* Append new lines to diff set. */
1014 		if (diffp->left)
1015 			astrcat(&left, diffp->left);
1016 		if (diffp->right)
1017 			astrcat(&right, diffp->right);
1018 	}
1019 
1020 	/* Empty queue and free each diff line and its elements. */
1021 	while (!STAILQ_EMPTY(&diffhead)) {
1022 		diffp = STAILQ_FIRST(&diffhead);
1023 		STAILQ_REMOVE_HEAD(&diffhead, diffentries);
1024 		freediff(diffp);
1025 	}
1026 
1027 	/* Write to outfp, prompting user if lines are different. */
1028 	if (outfp)
1029 		switch (divc) {
1030 		case ' ': case '(': case ')':
1031 			fprintf(outfp, "%s\n", left);
1032 			break;
1033 		case '|': case '<': case '>':
1034 			prompt(left, right);
1035 			break;
1036 		default:
1037 			errx(2, "invalid divider: %c", divc);
1038 		}
1039 
1040 	/* Free left and right. */
1041 	free(left);
1042 	free(right);
1043 }
1044 
1045 /*
1046  * Print lines following an (a)ppend command.
1047  */
1048 static void
1049 printa(FILE *file, size_t line2)
1050 {
1051 	char *line;
1052 
1053 	for (; file2ln <= line2; ++file2ln) {
1054 		if (!(line = xfgets(file)))
1055 			errx(2, "append ended early");
1056 		enqueue(NULL, '>', line);
1057 	}
1058 	processq();
1059 }
1060 
1061 /*
1062  * Print lines following a (c)hange command, from file1ln to file1end
1063  * and from file2ln to file2end.
1064  */
1065 static void
1066 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
1067 {
1068 	struct fileline {
1069 		STAILQ_ENTRY(fileline)	 fileentries;
1070 		char			*line;
1071 	};
1072 	STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead);
1073 
1074 	/* Read lines to be deleted. */
1075 	for (; file1ln <= file1end; ++file1ln) {
1076 		struct fileline *linep;
1077 		char *line1;
1078 
1079 		/* Read lines from both. */
1080 		if (!(line1 = xfgets(file1)))
1081 			errx(2, "error reading file1 in delete in change");
1082 
1083 		/* Add to delete queue. */
1084 		if (!(linep = malloc(sizeof(struct fileline))))
1085 			err(2, "printc");
1086 		linep->line = line1;
1087 		STAILQ_INSERT_TAIL(&delqhead, linep, fileentries);
1088 	}
1089 
1090 	/* Process changed lines.. */
1091 	for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end;
1092 	    ++file2ln) {
1093 		struct fileline *del;
1094 		char *add;
1095 
1096 		/* Get add line. */
1097 		if (!(add = xfgets(file2)))
1098 			errx(2, "error reading add in change");
1099 
1100 		del = STAILQ_FIRST(&delqhead);
1101 		enqueue(del->line, '|', add);
1102 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1103 		/*
1104 		 * Free fileline structure but not its elements since
1105 		 * they are queued up.
1106 		 */
1107 		free(del);
1108 	}
1109 	processq();
1110 
1111 	/* Process remaining lines to add. */
1112 	for (; file2ln <= file2end; ++file2ln) {
1113 		char *add;
1114 
1115 		/* Get add line. */
1116 		if (!(add = xfgets(file2)))
1117 			errx(2, "error reading add in change");
1118 
1119 		enqueue(NULL, '>', add);
1120 	}
1121 	processq();
1122 
1123 	/* Process remaining lines to delete. */
1124 	while (!STAILQ_EMPTY(&delqhead)) {
1125 		struct fileline *filep;
1126 
1127 		filep = STAILQ_FIRST(&delqhead);
1128 		enqueue(filep->line, '<', NULL);
1129 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1130 		free(filep);
1131 	}
1132 	processq();
1133 }
1134 
1135 /*
1136  * Print deleted lines from file, from file1ln to file1end.
1137  */
1138 static void
1139 printd(FILE *file1, size_t file1end)
1140 {
1141 	char *line1;
1142 
1143 	/* Print out lines file1ln to line2. */
1144 	for (; file1ln <= file1end; ++file1ln) {
1145 		if (!(line1 = xfgets(file1)))
1146 			errx(2, "file1 ended early in delete");
1147 		enqueue(line1, '<', NULL);
1148 	}
1149 	processq();
1150 }
1151 
1152 /*
1153  * Interactive mode usage.
1154  */
1155 static void
1156 int_usage(void)
1157 {
1158 
1159 	puts("e:\tedit blank diff\n"
1160 	    "eb:\tedit both diffs concatenated\n"
1161 	    "el:\tedit left diff\n"
1162 	    "er:\tedit right diff\n"
1163 	    "l | 1:\tchoose left diff\n"
1164 	    "r | 2:\tchoose right diff\n"
1165 	    "s:\tsilent mode--don't print identical lines\n"
1166 	    "v:\tverbose mode--print identical lines\n"
1167 	    "q:\tquit");
1168 }
1169 
1170 static void
1171 usage(void)
1172 {
1173 
1174 	fprintf(stderr,
1175 	    "usage: sdiff [-abdilstHW] [-I regexp] [-o outfile] [-w width] file1"
1176 	    " file2\n");
1177 	exit(2);
1178 }
1179