xref: /freebsd/usr.bin/sdiff/sdiff.c (revision 999c1fd64b489eda8c04f1e1529f828ebe5c7794)
1 /*	$OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */
2 
3 /*
4  * Written by Raymond Lai <ray@cyth.net>.
5  * Public domain.
6  */
7 
8 #include <sys/cdefs.h>
9 __FBSDID("$FreeBSD$");
10 
11 #include <sys/param.h>
12 #include <sys/queue.h>
13 #include <sys/stat.h>
14 #include <sys/types.h>
15 #include <sys/wait.h>
16 
17 #include <ctype.h>
18 #include <err.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <getopt.h>
22 #include <limits.h>
23 #include <paths.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 
30 #include "common.h"
31 #include "extern.h"
32 
33 #define DIFF_PATH	"/usr/bin/diff"
34 
35 #define WIDTH 126
36 /*
37  * Each column must be at least one character wide, plus three
38  * characters between the columns (space, [<|>], space).
39  */
40 #define WIDTH_MIN 5
41 
42 /* 3 kilobytes of chars */
43 #define MAX_CHECK 768
44 
45 /* A single diff line. */
46 struct diffline {
47 	STAILQ_ENTRY(diffline) diffentries;
48 	char	*left;
49 	char	 div;
50 	char	*right;
51 };
52 
53 static void astrcat(char **, const char *);
54 static void enqueue(char *, char, char *);
55 static char *mktmpcpy(const char *);
56 static int istextfile(FILE *);
57 static void binexec(char *, char *, char *) __dead2;
58 static void freediff(struct diffline *);
59 static void int_usage(void);
60 static int parsecmd(FILE *, FILE *, FILE *);
61 static void printa(FILE *, size_t);
62 static void printc(FILE *, size_t, FILE *, size_t);
63 static void printcol(const char *, size_t *, const size_t);
64 static void printd(FILE *, size_t);
65 static void println(const char *, const char, const char *);
66 static void processq(void);
67 static void prompt(const char *, const char *);
68 static void usage(void) __dead2;
69 static char *xfgets(FILE *);
70 
71 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
72 static size_t line_width;	/* width of a line (two columns and divider) */
73 static size_t width;		/* width of each column */
74 static size_t file1ln, file2ln;	/* line number of file1 and file2 */
75 static int Iflag = 0;	/* ignore sets matching regexp */
76 static int	lflag;		/* print only left column for identical lines */
77 static int	sflag;		/* skip identical lines */
78 FILE *outfp;		/* file to save changes to */
79 const char *tmpdir;	/* TMPDIR or /tmp */
80 
81 enum {
82 	HELP_OPT = CHAR_MAX + 1,
83 	NORMAL_OPT,
84 	FCASE_SENSITIVE_OPT,
85 	FCASE_IGNORE_OPT,
86 	FROMFILE_OPT,
87 	TOFILE_OPT,
88 	UNIDIR_OPT,
89 	STRIPCR_OPT,
90 	HORIZ_OPT,
91 	LEFTC_OPT,
92 	SUPCL_OPT,
93 	LF_OPT,
94 	/* the following groupings must be in sequence */
95 	OLDGF_OPT,
96 	NEWGF_OPT,
97 	UNCGF_OPT,
98 	CHGF_OPT,
99 	OLDLF_OPT,
100 	NEWLF_OPT,
101 	UNCLF_OPT,
102 	/* end order-sensitive enums */
103 	TSIZE_OPT,
104 	HLINES_OPT,
105 	LFILES_OPT,
106 	DIFFPROG_OPT,
107 	PIPE_FD,
108 	/* pid from the diff parent (if applicable) */
109 	DIFF_PID,
110 
111 	NOOP_OPT,
112 };
113 
114 static struct option longopts[] = {
115 	/* options only processed in sdiff */
116 	{ "left-column",		no_argument,		NULL,	LEFTC_OPT },
117 	{ "suppress-common-lines",	no_argument,		NULL,	's' },
118 	{ "width",			required_argument,	NULL,	'w' },
119 
120 	{ "output",			required_argument,	NULL,	'o' },
121 	{ "diff-program",		required_argument,	NULL,	DIFFPROG_OPT },
122 
123 	{ "pipe-fd",			required_argument,	NULL,	PIPE_FD },
124 	{ "diff-pid",			required_argument,	NULL,	DIFF_PID },
125 	/* Options processed by diff. */
126 	{ "ignore-file-name-case",	no_argument,		NULL,	FCASE_IGNORE_OPT },
127 	{ "no-ignore-file-name-case",	no_argument,		NULL,	FCASE_SENSITIVE_OPT },
128 	{ "strip-trailing-cr",		no_argument,		NULL,	STRIPCR_OPT },
129 	{ "tabsize",			required_argument,	NULL,	TSIZE_OPT },
130 	{ "help",			no_argument,		NULL,	HELP_OPT },
131 	{ "text",			no_argument,		NULL,	'a' },
132 	{ "ignore-blank-lines",		no_argument,		NULL,	'B' },
133 	{ "ignore-space-change",	no_argument,		NULL,	'b' },
134 	{ "minimal",			no_argument,		NULL,	'd' },
135 	{ "ignore-tab-expansion",	no_argument,		NULL,	'E' },
136 	{ "ignore-matching-lines",	required_argument,	NULL,	'I' },
137 	{ "ignore-case",		no_argument,		NULL,	'i' },
138 	{ "expand-tabs",		no_argument,		NULL,	't' },
139 	{ "speed-large-files",		no_argument,		NULL,	'H' },
140 	{ "ignore-all-space",		no_argument,		NULL,	'W' },
141 
142 	{ NULL,				0,			NULL,	'\0'}
143 };
144 
145 static const char *help_msg[] = {
146 	"\nusage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
147 	"\t-l, --left-column, Only print the left column for identical lines.",
148 	"\t-o OUTFILE, --output=OUTFILE, nteractively merge file1 and file2 into outfile.",
149 	"\t-s, --suppress-common-lines, Skip identical lines.",
150 	"\t-w WIDTH, --width=WIDTH, Print a maximum of WIDTH characters on each line.",
151 	"\tOptions passed to diff(1) are:",
152 	"\t\t-a, --text, Treat file1 and file2 as text files.",
153 	"\t\t-b, --ignore-trailing-cr, Ignore trailing blank spaces.",
154 	"\t\t-d, --minimal, Minimize diff size.",
155 	"\t\t-I RE, --ignore-matching-lines=RE, Ignore changes whose line matches RE.",
156 	"\t\t-i, --ignore-case, Do a case-insensitive comparison.",
157 	"\t\t-t, --expand-tabs Expand tabs to spaces.",
158 	"\t\t-W, --ignore-all-spaces, Ignore all spaces.",
159 	"\t\t--speed-large-files, Assume large file with scattered changes.",
160 	"\t\t--strip-trailing-cr, Strip trailing carriage return.",
161 	"\t\t--ignore-file-name-case, Ignore case of file names.",
162 	"\t\t--no-ignore-file-name-case, Do not ignore file name case",
163 	"\t\t--tabsize NUM, Change size of tabs (default 8.)",
164 
165 	NULL,
166 };
167 
168 /*
169  * Create temporary file if source_file is not a regular file.
170  * Returns temporary file name if one was malloced, NULL if unnecessary.
171  */
172 static char *
173 mktmpcpy(const char *source_file)
174 {
175 	struct stat sb;
176 	ssize_t rcount;
177 	int ifd, ofd;
178 	u_char buf[BUFSIZ];
179 	char *target_file;
180 
181 	/* Open input and output. */
182 	ifd = open(source_file, O_RDONLY, 0);
183 	/* File was opened successfully. */
184 	if (ifd != -1) {
185 		if (fstat(ifd, &sb) == -1)
186 			err(2, "error getting file status from %s", source_file);
187 
188 		/* Regular file. */
189 		if (S_ISREG(sb.st_mode)) {
190 			close(ifd);
191 			return (NULL);
192 		}
193 	} else {
194 		/* If ``-'' does not exist the user meant stdin. */
195 		if (errno == ENOENT && strcmp(source_file, "-") == 0)
196 			ifd = STDIN_FILENO;
197 		else
198 			err(2, "error opening %s", source_file);
199 	}
200 
201 	/* Not a regular file, so copy input into temporary file. */
202 	if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
203 		err(2, "asprintf");
204 	if ((ofd = mkstemp(target_file)) == -1) {
205 		warn("error opening %s", target_file);
206 		goto FAIL;
207 	}
208 	while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
209 	    rcount != 0) {
210 		ssize_t wcount;
211 
212 		wcount = write(ofd, buf, (size_t)rcount);
213 		if (-1 == wcount || rcount != wcount) {
214 			warn("error writing to %s", target_file);
215 			goto FAIL;
216 		}
217 	}
218 	if (rcount == -1) {
219 		warn("error reading from %s", source_file);
220 		goto FAIL;
221 	}
222 
223 	close(ifd);
224 	close(ofd);
225 
226 	return (target_file);
227 
228 FAIL:
229 	unlink(target_file);
230 	exit(2);
231 }
232 
233 int
234 main(int argc, char **argv)
235 {
236 	FILE *diffpipe=NULL, *file1, *file2;
237 	size_t diffargc = 0, wflag = WIDTH;
238 	int ch, fd[2] = {-1}, status;
239 	pid_t pid=0; pid_t ppid =-1;
240 	const char *outfile = NULL;
241 	struct option *popt;
242 	char **diffargv, *diffprog = DIFF_PATH, *filename1, *filename2,
243 	     *tmp1, *tmp2, *s1, *s2;
244 	int i;
245 
246 	/*
247 	 * Process diff flags.
248 	 */
249 	/*
250 	 * Allocate memory for diff arguments and NULL.
251 	 * Each flag has at most one argument, so doubling argc gives an
252 	 * upper limit of how many diff args can be passed.  argv[0],
253 	 * file1, and file2 won't have arguments so doubling them will
254 	 * waste some memory; however we need an extra space for the
255 	 * NULL at the end, so it sort of works out.
256 	 */
257 	if (!(diffargv = calloc(argc, sizeof(char **) * 2)))
258 		err(2, "main");
259 
260 	/* Add first argument, the program name. */
261 	diffargv[diffargc++] = diffprog;
262 
263 	/* create a dynamic string for merging single-switch options */
264 	if ( asprintf(&diffargv[diffargc++], "-")  < 0 )
265 		err(2, "main");
266 
267 	while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
268 	    longopts, NULL)) != -1) {
269 		const char *errstr;
270 
271 		switch (ch) {
272 		/* only compatible --long-name-form with diff */
273 		case FCASE_IGNORE_OPT:
274 		case FCASE_SENSITIVE_OPT:
275 		case STRIPCR_OPT:
276 		case TSIZE_OPT:
277 		case 'S':
278 		break;
279 		/* combine no-arg single switches */
280 		case 'a':
281 		case 'B':
282 		case 'b':
283 		case 'd':
284 		case 'E':
285 		case 'i':
286 		case 't':
287 		case 'H':
288 		case 'W':
289 			for(popt = longopts; ch != popt->val && popt->name != NULL; popt++);
290 			diffargv[1]  = realloc(diffargv[1], sizeof(char) * strlen(diffargv[1]) + 2);
291 			/*
292 			 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
293 			 */
294 			if (ch == 'W')
295 				sprintf(diffargv[1], "%sw", diffargv[1]);
296 			else
297 				sprintf(diffargv[1], "%s%c", diffargv[1], ch);
298 			break;
299 		case DIFFPROG_OPT:
300 			diffargv[0] = diffprog = optarg;
301 			break;
302 		case 'I':
303 			Iflag = 1;
304 			diffargv[diffargc++] = "-I";
305 			diffargv[diffargc++] = optarg;
306 			break;
307 		case 'l':
308 			lflag = 1;
309 			break;
310 		case 'o':
311 			outfile = optarg;
312 			break;
313 		case 's':
314 			sflag = 1;
315 			break;
316 		case 'w':
317 			wflag = strtonum(optarg, WIDTH_MIN,
318 			    INT_MAX, &errstr);
319 			if (errstr)
320 				errx(2, "width is %s: %s", errstr, optarg);
321 			break;
322 		case DIFF_PID:
323 			ppid = strtonum(optarg, 0, INT_MAX, &errstr);
324 			if (errstr)
325 				errx(2, "diff pid value is %s: %s", errstr, optarg);
326 			break;
327 		case HELP_OPT:
328 			for (i = 0; help_msg[i] != NULL; i++)
329 				printf("%s\n", help_msg[i]);
330 			exit(0);
331 			break;
332 		default:
333 			usage();
334 			break;
335 		}
336 	}
337 
338 	/* no single switches were used */
339 	if (strcmp(diffargv[1], "-") == 0 ) {
340 		for ( i = 1; i < argc-1; i++) {
341 			diffargv[i] = diffargv[i+1];
342 		}
343 		diffargv[diffargc-1] = NULL;
344 		diffargc--;
345 	}
346 
347 	argc -= optind;
348 	argv += optind;
349 
350 	if (argc != 2)
351 		usage();
352 
353 	if (outfile && (outfp = fopen(outfile, "w")) == NULL)
354 		err(2, "could not open: %s", optarg);
355 
356 	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
357 		tmpdir = _PATH_TMP;
358 
359 	filename1 = argv[0];
360 	filename2 = argv[1];
361 
362 	/*
363 	 * Create temporary files for diff and sdiff to share if file1
364 	 * or file2 are not regular files.  This allows sdiff and diff
365 	 * to read the same inputs if one or both inputs are stdin.
366 	 *
367 	 * If any temporary files were created, their names would be
368 	 * saved in tmp1 or tmp2.  tmp1 should never equal tmp2.
369 	 */
370 	tmp1 = tmp2 = NULL;
371 	/* file1 and file2 are the same, so copy to same temp file. */
372 	if (strcmp(filename1, filename2) == 0) {
373 		if ((tmp1 = mktmpcpy(filename1)))
374 			filename1 = filename2 = tmp1;
375 	/* Copy file1 and file2 into separate temp files. */
376 	} else {
377 		if ((tmp1 = mktmpcpy(filename1)))
378 			filename1 = tmp1;
379 		if ((tmp2 = mktmpcpy(filename2)))
380 			filename2 = tmp2;
381 	}
382 
383 	diffargv[diffargc++] = filename1;
384 	diffargv[diffargc++] = filename2;
385 	/* Add NULL to end of array to indicate end of array. */
386 	diffargv[diffargc++] = NULL;
387 
388 	/* Subtract column divider and divide by two. */
389 	width = (wflag - 3) / 2;
390 	/* Make sure line_width can fit in size_t. */
391 	if (width > (SIZE_MAX - 3) / 2)
392 		errx(2, "width is too large: %zu", width);
393 	line_width = width * 2 + 3;
394 
395 	if (ppid == -1 ) {
396 		if (pipe(fd))
397 			err(2, "pipe");
398 
399 		switch (pid = fork()) {
400 		case 0:
401 			/* child */
402 			/* We don't read from the pipe. */
403 			close(fd[0]);
404 			if (dup2(fd[1], STDOUT_FILENO) == -1)
405 				err(2, "child could not duplicate descriptor");
406 			/* Free unused descriptor. */
407 			close(fd[1]);
408 			execvp(diffprog, diffargv);
409 			err(2, "could not execute diff: %s", diffprog);
410 			break;
411 		case -1:
412 			err(2, "could not fork");
413 			break;
414 		}
415 
416 		/* parent */
417 		/* We don't write to the pipe. */
418 		close(fd[1]);
419 
420 		/* Open pipe to diff command. */
421 		if ((diffpipe = fdopen(fd[0], "r")) == NULL)
422 			err(2, "could not open diff pipe");
423 	}
424 	if ((file1 = fopen(filename1, "r")) == NULL)
425 		err(2, "could not open %s", filename1);
426 	if ((file2 = fopen(filename2, "r")) == NULL)
427 		err(2, "could not open %s", filename2);
428 	if (!istextfile(file1) || !istextfile(file2)) {
429 		/* Close open files and pipe, delete temps */
430 		fclose(file1);
431 		fclose(file2);
432 		if (diffpipe != NULL)
433 			fclose(diffpipe);
434 		if (tmp1)
435 			if (unlink(tmp1))
436 				warn("Error deleting %s.", tmp1);
437 		if (tmp2)
438 			if (unlink(tmp2))
439 				warn("Error deleting %s.", tmp2);
440 		free(tmp1);
441 		free(tmp2);
442 		binexec(diffprog, filename1, filename2);
443 	}
444 	/* Line numbers start at one. */
445 	file1ln = file2ln = 1;
446 
447 	/* Read and parse diff output. */
448 	while (parsecmd(diffpipe, file1, file2) != EOF)
449 		;
450 	fclose(diffpipe);
451 
452 	/* Wait for diff to exit. */
453 	if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
454 	    WEXITSTATUS(status) >= 2)
455 		err(2, "diff exited abnormally.");
456 
457 	/* Delete and free unneeded temporary files. */
458 	if (tmp1)
459 		if (unlink(tmp1))
460 			warn("Error deleting %s.", tmp1);
461 	if (tmp2)
462 		if (unlink(tmp2))
463 			warn("Error deleting %s.", tmp2);
464 	free(tmp1);
465 	free(tmp2);
466 	filename1 = filename2 = tmp1 = tmp2 = NULL;
467 
468 	/* No more diffs, so print common lines. */
469 	if (lflag)
470 		while ((s1 = xfgets(file1)))
471 			enqueue(s1, ' ', NULL);
472 	else
473 		for (;;) {
474 			s1 = xfgets(file1);
475 			s2 = xfgets(file2);
476 			if (s1 || s2)
477 				enqueue(s1, ' ', s2);
478 			else
479 				break;
480 		}
481 	fclose(file1);
482 	fclose(file2);
483 	/* Process unmodified lines. */
484 	processq();
485 
486 	/* Return diff exit status. */
487 	return (WEXITSTATUS(status));
488 }
489 
490 /*
491  * When sdiff/zsdiff detects a binary file as input, executes them with
492  * diff/zdiff to maintain the same behavior as GNU sdiff with binary input.
493  */
494 static void
495 binexec(char *diffprog, char *f1, char *f2)
496 {
497 
498 	char *args[] = {diffprog, f1, f2, (char *) 0};
499 	execv(diffprog, args);
500 
501 	/* If execv() fails, sdiff's execution will continue below. */
502 	errx(1, "Could not execute diff process.\n");
503 }
504 
505 /*
506  * Checks whether a file appears to be a text file.
507  */
508 static int
509 istextfile(FILE *f)
510 {
511 	int	ch, i;
512 
513 	if (f == NULL)
514 		return (1);
515 	rewind(f);
516 	for (i = 0; i <= MAX_CHECK; i++) {
517 		ch = fgetc(f);
518 		if (ch == '\0') {
519 			rewind(f);
520 			return (0);
521 		}
522 		if (ch == EOF)
523 			break;
524 	}
525 	rewind(f);
526 	return (1);
527 }
528 
529 /*
530  * Prints an individual column (left or right), taking into account
531  * that tabs are variable-width.  Takes a string, the current column
532  * the cursor is on the screen, and the maximum value of the column.
533  * The column value is updated as we go along.
534  */
535 static void
536 printcol(const char *s, size_t *col, const size_t col_max)
537 {
538 
539 	for (; *s && *col < col_max; ++s) {
540 		size_t new_col;
541 
542 		switch (*s) {
543 		case '\t':
544 			/*
545 			 * If rounding to next multiple of eight causes
546 			 * an integer overflow, just return.
547 			 */
548 			if (*col > SIZE_MAX - 8)
549 				return;
550 
551 			/* Round to next multiple of eight. */
552 			new_col = (*col / 8 + 1) * 8;
553 
554 			/*
555 			 * If printing the tab goes past the column
556 			 * width, don't print it and just quit.
557 			 */
558 			if (new_col > col_max)
559 				return;
560 			*col = new_col;
561 			break;
562 		default:
563 			++(*col);
564 		}
565 		putchar(*s);
566 	}
567 }
568 
569 /*
570  * Prompts user to either choose between two strings or edit one, both,
571  * or neither.
572  */
573 static void
574 prompt(const char *s1, const char *s2)
575 {
576 	char *cmd;
577 
578 	/* Print command prompt. */
579 	putchar('%');
580 
581 	/* Get user input. */
582 	for (; (cmd = xfgets(stdin)); free(cmd)) {
583 		const char *p;
584 
585 		/* Skip leading whitespace. */
586 		for (p = cmd; isspace(*p); ++p)
587 			;
588 		switch (*p) {
589 		case 'e':
590 			/* Skip `e'. */
591 			++p;
592 			if (eparse(p, s1, s2) == -1)
593 				goto USAGE;
594 			break;
595 		case 'l':
596 		case '1':
597 			/* Choose left column as-is. */
598 			if (s1 != NULL)
599 				fprintf(outfp, "%s\n", s1);
600 			/* End of command parsing. */
601 			break;
602 		case 'q':
603 			goto QUIT;
604 		case 'r':
605 		case '2':
606 			/* Choose right column as-is. */
607 			if (s2 != NULL)
608 				fprintf(outfp, "%s\n", s2);
609 			/* End of command parsing. */
610 			break;
611 		case 's':
612 			sflag = 1;
613 			goto PROMPT;
614 		case 'v':
615 			sflag = 0;
616 			/* FALLTHROUGH */
617 		default:
618 			/* Interactive usage help. */
619 USAGE:
620 			int_usage();
621 PROMPT:
622 			putchar('%');
623 
624 			/* Prompt user again. */
625 			continue;
626 		}
627 		free(cmd);
628 		return;
629 	}
630 
631 	/*
632 	 * If there was no error, we received an EOF from stdin, so we
633 	 * should quit.
634 	 */
635 QUIT:
636 	fclose(outfp);
637 	exit(0);
638 }
639 
640 /*
641  * Takes two strings, separated by a column divider.  NULL strings are
642  * treated as empty columns.  If the divider is the ` ' character, the
643  * second column is not printed (-l flag).  In this case, the second
644  * string must be NULL.  When the second column is NULL, the divider
645  * does not print the trailing space following the divider character.
646  *
647  * Takes into account that tabs can take multiple columns.
648  */
649 static void
650 println(const char *s1, const char div, const char *s2)
651 {
652 	size_t col;
653 
654 	/* Print first column.  Skips if s1 == NULL. */
655 	col = 0;
656 	if (s1) {
657 		/* Skip angle bracket and space. */
658 		printcol(s1, &col, width);
659 
660 	}
661 
662 	/* Otherwise, we pad this column up to width. */
663 	for (; col < width; ++col)
664 		putchar(' ');
665 
666 	/* Only print left column. */
667 	if (div == ' ' && !s2) {
668 		printf(" (\n");
669 		return;
670 	}
671 
672 	/*
673 	 * Print column divider.  If there is no second column, we don't
674 	 * need to add the space for padding.
675 	 */
676 	if (!s2) {
677 		printf(" %c\n", div);
678 		return;
679 	}
680 	printf(" %c ", div);
681 	col += 3;
682 
683 	/* Skip angle bracket and space. */
684 	printcol(s2, &col, line_width);
685 
686 	putchar('\n');
687 }
688 
689 /*
690  * Reads a line from file and returns as a string.  If EOF is reached,
691  * NULL is returned.  The returned string must be freed afterwards.
692  */
693 static char *
694 xfgets(FILE *file)
695 {
696 	size_t linecap;
697 	ssize_t l;
698 	char *s;
699 
700 	clearerr(file);
701 	linecap = 0;
702 	s = NULL;
703 
704 	if ((l = getline(&s, &linecap, file)) == -1) {
705 		if (ferror(file))
706 			err(2, "error reading file");
707 		return (NULL);
708 	}
709 
710 	if (s[l-1] == '\n')
711 		s[l-1] = '\0';
712 
713 	return (s);
714 }
715 
716 /*
717  * Parse ed commands from diffpipe and print lines from file1 (lines
718  * to change or delete) or file2 (lines to add or change).
719  * Returns EOF or 0.
720  */
721 static int
722 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
723 {
724 	size_t file1start, file1end, file2start, file2end, n;
725 	/* ed command line and pointer to characters in line */
726 	char *line, *p, *q;
727 	const char *errstr;
728 	char c, cmd;
729 
730 	/* Read ed command. */
731 	if (!(line = xfgets(diffpipe)))
732 		return (EOF);
733 
734 	p = line;
735 	/* Go to character after line number. */
736 	while (isdigit(*p))
737 		++p;
738 	c = *p;
739 	*p++ = 0;
740 	file1start = strtonum(line, 0, INT_MAX, &errstr);
741 	if (errstr)
742 		errx(2, "file1 start is %s: %s", errstr, line);
743 
744 	/* A range is specified for file1. */
745 	if (c == ',') {
746 		q = p;
747 		/* Go to character after file2end. */
748 		while (isdigit(*p))
749 			++p;
750 		c = *p;
751 		*p++ = 0;
752 		file1end = strtonum(q, 0, INT_MAX, &errstr);
753 		if (errstr)
754 			errx(2, "file1 end is %s: %s", errstr, line);
755 		if (file1start > file1end)
756 			errx(2, "invalid line range in file1: %s", line);
757 	} else
758 		file1end = file1start;
759 
760 	cmd = c;
761 	/* Check that cmd is valid. */
762 	if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
763 		errx(2, "ed command not recognized: %c: %s", cmd, line);
764 
765 	q = p;
766 	/* Go to character after line number. */
767 	while (isdigit(*p))
768 		++p;
769 	c = *p;
770 	*p++ = 0;
771 	file2start = strtonum(q, 0, INT_MAX, &errstr);
772 	if (errstr)
773 		errx(2, "file2 start is %s: %s", errstr, line);
774 
775 	/*
776 	 * There should either be a comma signifying a second line
777 	 * number or the line should just end here.
778 	 */
779 	if (c != ',' && c != '\0')
780 		errx(2, "invalid line range in file2: %c: %s", c, line);
781 
782 	if (c == ',') {
783 
784 		file2end = strtonum(p, 0, INT_MAX, &errstr);
785 		if (errstr)
786 			errx(2, "file2 end is %s: %s", errstr, line);
787 		if (file2start >= file2end)
788 			errx(2, "invalid line range in file2: %s", line);
789 	} else
790 		file2end = file2start;
791 
792 	/* Appends happen _after_ stated line. */
793 	if (cmd == 'a') {
794 		if (file1start != file1end)
795 			errx(2, "append cannot have a file1 range: %s",
796 			    line);
797 		if (file1start == SIZE_MAX)
798 			errx(2, "file1 line range too high: %s", line);
799 		file1start = ++file1end;
800 	}
801 	/*
802 	 * I'm not sure what the deal is with the line numbers for
803 	 * deletes, though.
804 	 */
805 	else if (cmd == 'd') {
806 		if (file2start != file2end)
807 			errx(2, "delete cannot have a file2 range: %s",
808 			    line);
809 		if (file2start == SIZE_MAX)
810 			errx(2, "file2 line range too high: %s", line);
811 		file2start = ++file2end;
812 	}
813 
814 	/*
815 	 * Continue reading file1 and file2 until we reach line numbers
816 	 * specified by diff.  Should only happen with -I flag.
817 	 */
818 	for (; file1ln < file1start && file2ln < file2start;
819 	    ++file1ln, ++file2ln) {
820 		char *s1, *s2;
821 
822 		if (!(s1 = xfgets(file1)))
823 			errx(2, "file1 shorter than expected");
824 		if (!(s2 = xfgets(file2)))
825 			errx(2, "file2 shorter than expected");
826 
827 		/* If the -l flag was specified, print only left column. */
828 		if (lflag) {
829 			free(s2);
830 			/*
831 			 * XXX - If -l and -I are both specified, all
832 			 * unchanged or ignored lines are shown with a
833 			 * `(' divider.  This matches GNU sdiff, but I
834 			 * believe it is a bug.  Just check out:
835 			 * gsdiff -l -I '^$' samefile samefile.
836 			 */
837 			if (Iflag)
838 				enqueue(s1, '(', NULL);
839 			else
840 				enqueue(s1, ' ', NULL);
841 		} else
842 			enqueue(s1, ' ', s2);
843 	}
844 	/* Ignore deleted lines. */
845 	for (; file1ln < file1start; ++file1ln) {
846 		char *s;
847 
848 		if (!(s = xfgets(file1)))
849 			errx(2, "file1 shorter than expected");
850 
851 		enqueue(s, '(', NULL);
852 	}
853 	/* Ignore added lines. */
854 	for (; file2ln < file2start; ++file2ln) {
855 		char *s;
856 
857 		if (!(s = xfgets(file2)))
858 			errx(2, "file2 shorter than expected");
859 
860 		/* If -l flag was given, don't print right column. */
861 		if (lflag)
862 			free(s);
863 		else
864 			enqueue(NULL, ')', s);
865 	}
866 
867 	/* Process unmodified or skipped lines. */
868 	processq();
869 
870 	switch (cmd) {
871 	case 'a':
872 		printa(file2, file2end);
873 		n = file2end - file2start + 1;
874 		break;
875 	case 'c':
876 		printc(file1, file1end, file2, file2end);
877 		n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
878 		break;
879 	case 'd':
880 		printd(file1, file1end);
881 		n = file1end - file1start + 1;
882 		break;
883 	default:
884 		errx(2, "invalid diff command: %c: %s", cmd, line);
885 	}
886 	free(line);
887 
888 	/* Skip to next ed line. */
889 	while (n--) {
890 		if (!(line = xfgets(diffpipe)))
891 			errx(2, "diff ended early");
892 		free(line);
893 	}
894 
895 	return (0);
896 }
897 
898 /*
899  * Queues up a diff line.
900  */
901 static void
902 enqueue(char *left, char div, char *right)
903 {
904 	struct diffline *diffp;
905 
906 	if (!(diffp = malloc(sizeof(struct diffline))))
907 		err(2, "enqueue");
908 	diffp->left = left;
909 	diffp->div = div;
910 	diffp->right = right;
911 	STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries);
912 }
913 
914 /*
915  * Free a diffline structure and its elements.
916  */
917 static void
918 freediff(struct diffline *diffp)
919 {
920 
921 	free(diffp->left);
922 	free(diffp->right);
923 	free(diffp);
924 }
925 
926 /*
927  * Append second string into first.  Repeated appends to the same string
928  * are cached, making this an O(n) function, where n = strlen(append).
929  */
930 static void
931 astrcat(char **s, const char *append)
932 {
933 	/* Length of string in previous run. */
934 	static size_t offset = 0;
935 	size_t newsiz;
936 	/*
937 	 * String from previous run.  Compared to *s to see if we are
938 	 * dealing with the same string.  If so, we can use offset.
939 	 */
940 	static const char *oldstr = NULL;
941 	char *newstr;
942 
943 	/*
944 	 * First string is NULL, so just copy append.
945 	 */
946 	if (!*s) {
947 		if (!(*s = strdup(append)))
948 			err(2, "astrcat");
949 
950 		/* Keep track of string. */
951 		offset = strlen(*s);
952 		oldstr = *s;
953 
954 		return;
955 	}
956 
957 	/*
958 	 * *s is a string so concatenate.
959 	 */
960 
961 	/* Did we process the same string in the last run? */
962 	/*
963 	 * If this is a different string from the one we just processed
964 	 * cache new string.
965 	 */
966 	if (oldstr != *s) {
967 		offset = strlen(*s);
968 		oldstr = *s;
969 	}
970 
971 	/* Size = strlen(*s) + \n + strlen(append) + '\0'. */
972 	newsiz = offset + 1 + strlen(append) + 1;
973 
974 	/* Resize *s to fit new string. */
975 	newstr = realloc(*s, newsiz);
976 	if (newstr == NULL)
977 		err(2, "astrcat");
978 	*s = newstr;
979 
980 	/* *s + offset should be end of string. */
981 	/* Concatenate. */
982 	strlcpy(*s + offset, "\n", newsiz - offset);
983 	strlcat(*s + offset, append, newsiz - offset);
984 
985 	/* New string length should be exactly newsiz - 1 characters. */
986 	/* Store generated string's values. */
987 	offset = newsiz - 1;
988 	oldstr = *s;
989 }
990 
991 /*
992  * Process diff set queue, printing, prompting, and saving each diff
993  * line stored in queue.
994  */
995 static void
996 processq(void)
997 {
998 	struct diffline *diffp;
999 	char divc, *left, *right;
1000 
1001 	/* Don't process empty queue. */
1002 	if (STAILQ_EMPTY(&diffhead))
1003 		return;
1004 
1005 	/* Remember the divider. */
1006 	divc = STAILQ_FIRST(&diffhead)->div;
1007 
1008 	left = NULL;
1009 	right = NULL;
1010 	/*
1011 	 * Go through set of diffs, concatenating each line in left or
1012 	 * right column into two long strings, `left' and `right'.
1013 	 */
1014 	STAILQ_FOREACH(diffp, &diffhead, diffentries) {
1015 		/*
1016 		 * Print changed lines if -s was given,
1017 		 * print all lines if -s was not given.
1018 		 */
1019 		if (!sflag || diffp->div == '|' || diffp->div == '<' ||
1020 		    diffp->div == '>')
1021 			println(diffp->left, diffp->div, diffp->right);
1022 
1023 		/* Append new lines to diff set. */
1024 		if (diffp->left)
1025 			astrcat(&left, diffp->left);
1026 		if (diffp->right)
1027 			astrcat(&right, diffp->right);
1028 	}
1029 
1030 	/* Empty queue and free each diff line and its elements. */
1031 	while (!STAILQ_EMPTY(&diffhead)) {
1032 		diffp = STAILQ_FIRST(&diffhead);
1033 		STAILQ_REMOVE_HEAD(&diffhead, diffentries);
1034 		freediff(diffp);
1035 	}
1036 
1037 	/* Write to outfp, prompting user if lines are different. */
1038 	if (outfp)
1039 		switch (divc) {
1040 		case ' ': case '(': case ')':
1041 			fprintf(outfp, "%s\n", left);
1042 			break;
1043 		case '|': case '<': case '>':
1044 			prompt(left, right);
1045 			break;
1046 		default:
1047 			errx(2, "invalid divider: %c", divc);
1048 		}
1049 
1050 	/* Free left and right. */
1051 	free(left);
1052 	free(right);
1053 }
1054 
1055 /*
1056  * Print lines following an (a)ppend command.
1057  */
1058 static void
1059 printa(FILE *file, size_t line2)
1060 {
1061 	char *line;
1062 
1063 	for (; file2ln <= line2; ++file2ln) {
1064 		if (!(line = xfgets(file)))
1065 			errx(2, "append ended early");
1066 		enqueue(NULL, '>', line);
1067 	}
1068 	processq();
1069 }
1070 
1071 /*
1072  * Print lines following a (c)hange command, from file1ln to file1end
1073  * and from file2ln to file2end.
1074  */
1075 static void
1076 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
1077 {
1078 	struct fileline {
1079 		STAILQ_ENTRY(fileline)	 fileentries;
1080 		char			*line;
1081 	};
1082 	STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead);
1083 
1084 	/* Read lines to be deleted. */
1085 	for (; file1ln <= file1end; ++file1ln) {
1086 		struct fileline *linep;
1087 		char *line1;
1088 
1089 		/* Read lines from both. */
1090 		if (!(line1 = xfgets(file1)))
1091 			errx(2, "error reading file1 in delete in change");
1092 
1093 		/* Add to delete queue. */
1094 		if (!(linep = malloc(sizeof(struct fileline))))
1095 			err(2, "printc");
1096 		linep->line = line1;
1097 		STAILQ_INSERT_TAIL(&delqhead, linep, fileentries);
1098 	}
1099 
1100 	/* Process changed lines.. */
1101 	for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end;
1102 	    ++file2ln) {
1103 		struct fileline *del;
1104 		char *add;
1105 
1106 		/* Get add line. */
1107 		if (!(add = xfgets(file2)))
1108 			errx(2, "error reading add in change");
1109 
1110 		del = STAILQ_FIRST(&delqhead);
1111 		enqueue(del->line, '|', add);
1112 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1113 		/*
1114 		 * Free fileline structure but not its elements since
1115 		 * they are queued up.
1116 		 */
1117 		free(del);
1118 	}
1119 	processq();
1120 
1121 	/* Process remaining lines to add. */
1122 	for (; file2ln <= file2end; ++file2ln) {
1123 		char *add;
1124 
1125 		/* Get add line. */
1126 		if (!(add = xfgets(file2)))
1127 			errx(2, "error reading add in change");
1128 
1129 		enqueue(NULL, '>', add);
1130 	}
1131 	processq();
1132 
1133 	/* Process remaining lines to delete. */
1134 	while (!STAILQ_EMPTY(&delqhead)) {
1135 		struct fileline *filep;
1136 
1137 		filep = STAILQ_FIRST(&delqhead);
1138 		enqueue(filep->line, '<', NULL);
1139 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1140 		free(filep);
1141 	}
1142 	processq();
1143 }
1144 
1145 /*
1146  * Print deleted lines from file, from file1ln to file1end.
1147  */
1148 static void
1149 printd(FILE *file1, size_t file1end)
1150 {
1151 	char *line1;
1152 
1153 	/* Print out lines file1ln to line2. */
1154 	for (; file1ln <= file1end; ++file1ln) {
1155 		if (!(line1 = xfgets(file1)))
1156 			errx(2, "file1 ended early in delete");
1157 		enqueue(line1, '<', NULL);
1158 	}
1159 	processq();
1160 }
1161 
1162 /*
1163  * Interactive mode usage.
1164  */
1165 static void
1166 int_usage(void)
1167 {
1168 
1169 	puts("e:\tedit blank diff\n"
1170 	    "eb:\tedit both diffs concatenated\n"
1171 	    "el:\tedit left diff\n"
1172 	    "er:\tedit right diff\n"
1173 	    "l | 1:\tchoose left diff\n"
1174 	    "r | 2:\tchoose right diff\n"
1175 	    "s:\tsilent mode--don't print identical lines\n"
1176 	    "v:\tverbose mode--print identical lines\n"
1177 	    "q:\tquit");
1178 }
1179 
1180 static void
1181 usage(void)
1182 {
1183 
1184 	fprintf(stderr,
1185 	    "usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1"
1186 	    " file2\n");
1187 	exit(2);
1188 }
1189