xref: /freebsd/usr.bin/sdiff/sdiff.c (revision 87d5d10d7d7af72e5529b1cd684f8da41664183b)
1 /*	$OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */
2 
3 /*
4  * Written by Raymond Lai <ray@cyth.net>.
5  * Public domain.
6  */
7 
8 #include <sys/cdefs.h>
9 __FBSDID("$FreeBSD$");
10 
11 #include <sys/param.h>
12 #include <sys/queue.h>
13 #include <sys/stat.h>
14 #include <sys/types.h>
15 #include <sys/wait.h>
16 
17 #include <ctype.h>
18 #include <err.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <getopt.h>
22 #include <limits.h>
23 #include <paths.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 
30 #include "common.h"
31 #include "extern.h"
32 
33 #define DIFF_PATH	"/usr/bin/diff"
34 
35 #define WIDTH 126
36 /*
37  * Each column must be at least one character wide, plus three
38  * characters between the columns (space, [<|>], space).
39  */
40 #define WIDTH_MIN 5
41 
42 /* 3 kilobytes of chars */
43 #define MAX_CHECK 768
44 
45 /* A single diff line. */
46 struct diffline {
47 	STAILQ_ENTRY(diffline) diffentries;
48 	char	*left;
49 	char	 div;
50 	char	*right;
51 };
52 
53 static void astrcat(char **, const char *);
54 static void enqueue(char *, char, char *);
55 static char *mktmpcpy(const char *);
56 static int istextfile(FILE *);
57 static void binexec(char *, char *, char *) __dead2;
58 static void freediff(struct diffline *);
59 static void int_usage(void);
60 static int parsecmd(FILE *, FILE *, FILE *);
61 static void printa(FILE *, size_t);
62 static void printc(FILE *, size_t, FILE *, size_t);
63 static void printcol(const char *, size_t *, const size_t);
64 static void printd(FILE *, size_t);
65 static void println(const char *, const char, const char *);
66 static void processq(void);
67 static void prompt(const char *, const char *);
68 static void usage(void) __dead2;
69 static char *xfgets(FILE *);
70 
71 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
72 static size_t line_width;	/* width of a line (two columns and divider) */
73 static size_t width;		/* width of each column */
74 static size_t file1ln, file2ln;	/* line number of file1 and file2 */
75 static int Iflag = 0;	/* ignore sets matching regexp */
76 static int	lflag;		/* print only left column for identical lines */
77 static int	sflag;		/* skip identical lines */
78 FILE *outfp;		/* file to save changes to */
79 const char *tmpdir;	/* TMPDIR or /tmp */
80 
81 enum {
82 	HELP_OPT = CHAR_MAX + 1,
83 	NORMAL_OPT,
84 	FCASE_SENSITIVE_OPT,
85 	FCASE_IGNORE_OPT,
86 	FROMFILE_OPT,
87 	TOFILE_OPT,
88 	UNIDIR_OPT,
89 	STRIPCR_OPT,
90 	HORIZ_OPT,
91 	LEFTC_OPT,
92 	SUPCL_OPT,
93 	LF_OPT,
94 	/* the following groupings must be in sequence */
95 	OLDGF_OPT,
96 	NEWGF_OPT,
97 	UNCGF_OPT,
98 	CHGF_OPT,
99 	OLDLF_OPT,
100 	NEWLF_OPT,
101 	UNCLF_OPT,
102 	/* end order-sensitive enums */
103 	TSIZE_OPT,
104 	HLINES_OPT,
105 	LFILES_OPT,
106 	DIFFPROG_OPT,
107 
108 	NOOP_OPT,
109 };
110 
111 static struct option longopts[] = {
112 	/* options only processed in sdiff */
113 	{ "left-column",		no_argument,		NULL,	LEFTC_OPT },
114 	{ "suppress-common-lines",	no_argument,		NULL,	's' },
115 	{ "width",			required_argument,	NULL,	'w' },
116 
117 	{ "output",			required_argument,	NULL,	'o' },
118 	{ "diff-program",		required_argument,	NULL,	DIFFPROG_OPT },
119 
120 	/* Options processed by diff. */
121 	{ "ignore-file-name-case",	no_argument,		NULL,	FCASE_IGNORE_OPT },
122 	{ "no-ignore-file-name-case",	no_argument,		NULL,	FCASE_SENSITIVE_OPT },
123 	{ "strip-trailing-cr",		no_argument,		NULL,	STRIPCR_OPT },
124 	{ "tabsize",			required_argument,	NULL,	TSIZE_OPT },
125 	{ "help",			no_argument,		NULL,	HELP_OPT },
126 	{ "text",			no_argument,		NULL,	'a' },
127 	{ "ignore-blank-lines",		no_argument,		NULL,	'B' },
128 	{ "ignore-space-change",	no_argument,		NULL,	'b' },
129 	{ "minimal",			no_argument,		NULL,	'd' },
130 	{ "ignore-tab-expansion",	no_argument,		NULL,	'E' },
131 	{ "ignore-matching-lines",	required_argument,	NULL,	'I' },
132 	{ "ignore-case",		no_argument,		NULL,	'i' },
133 	{ "expand-tabs",		no_argument,		NULL,	't' },
134 	{ "speed-large-files",		no_argument,		NULL,	'H' },
135 	{ "ignore-all-space",		no_argument,		NULL,	'W' },
136 
137 	{ NULL,				0,			NULL,	'\0'}
138 };
139 
140 static const char *help_msg[] = {
141 	"\nusage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
142 	"\t-l, --left-column, Only print the left column for identical lines.",
143 	"\t-o OUTFILE, --output=OUTFILE, nteractively merge file1 and file2 into outfile.",
144 	"\t-s, --suppress-common-lines, Skip identical lines.",
145 	"\t-w WIDTH, --width=WIDTH, Print a maximum of WIDTH characters on each line.",
146 	"\tOptions passed to diff(1) are:",
147 	"\t\t-a, --text, Treat file1 and file2 as text files.",
148 	"\t\t-b, --ignore-trailing-cr, Ignore trailing blank spaces.",
149 	"\t\t-d, --minimal, Minimize diff size.",
150 	"\t\t-I RE, --ignore-matching-lines=RE, Ignore changes whose line matches RE.",
151 	"\t\t-i, --ignore-case, Do a case-insensitive comparison.",
152 	"\t\t-t, --expand-tabs Expand tabs to spaces.",
153 	"\t\t-W, --ignore-all-spaces, Ignore all spaces.",
154 	"\t\t--speed-large-files, Assume large file with scattered changes.",
155 	"\t\t--strip-trailing-cr, Strip trailing carriage return.",
156 	"\t\t--ignore-file-name-case, Ignore case of file names.",
157 	"\t\t--no-ignore-file-name-case, Do not ignore file name case",
158 	"\t\t--tabsize NUM, Change size of tabs (default 8.)",
159 
160 	NULL,
161 };
162 
163 /*
164  * Create temporary file if source_file is not a regular file.
165  * Returns temporary file name if one was malloced, NULL if unnecessary.
166  */
167 static char *
168 mktmpcpy(const char *source_file)
169 {
170 	struct stat sb;
171 	ssize_t rcount;
172 	int ifd, ofd;
173 	u_char buf[BUFSIZ];
174 	char *target_file;
175 
176 	/* Open input and output. */
177 	ifd = open(source_file, O_RDONLY, 0);
178 	/* File was opened successfully. */
179 	if (ifd != -1) {
180 		if (fstat(ifd, &sb) == -1)
181 			err(2, "error getting file status from %s", source_file);
182 
183 		/* Regular file. */
184 		if (S_ISREG(sb.st_mode)) {
185 			close(ifd);
186 			return (NULL);
187 		}
188 	} else {
189 		/* If ``-'' does not exist the user meant stdin. */
190 		if (errno == ENOENT && strcmp(source_file, "-") == 0)
191 			ifd = STDIN_FILENO;
192 		else
193 			err(2, "error opening %s", source_file);
194 	}
195 
196 	/* Not a regular file, so copy input into temporary file. */
197 	if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
198 		err(2, "asprintf");
199 	if ((ofd = mkstemp(target_file)) == -1) {
200 		warn("error opening %s", target_file);
201 		goto FAIL;
202 	}
203 	while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
204 	    rcount != 0) {
205 		ssize_t wcount;
206 
207 		wcount = write(ofd, buf, (size_t)rcount);
208 		if (-1 == wcount || rcount != wcount) {
209 			warn("error writing to %s", target_file);
210 			goto FAIL;
211 		}
212 	}
213 	if (rcount == -1) {
214 		warn("error reading from %s", source_file);
215 		goto FAIL;
216 	}
217 
218 	close(ifd);
219 	close(ofd);
220 
221 	return (target_file);
222 
223 FAIL:
224 	unlink(target_file);
225 	exit(2);
226 }
227 
228 int
229 main(int argc, char **argv)
230 {
231 	FILE *diffpipe=NULL, *file1, *file2;
232 	size_t diffargc = 0, wflag = WIDTH;
233 	int ch, fd[2] = {-1}, status;
234 	pid_t pid=0;
235 	const char *outfile = NULL;
236 	struct option *popt;
237 	char **diffargv, *diffprog = DIFF_PATH, *filename1, *filename2,
238 	     *tmp1, *tmp2, *s1, *s2;
239 	int i;
240 
241 	/*
242 	 * Process diff flags.
243 	 */
244 	/*
245 	 * Allocate memory for diff arguments and NULL.
246 	 * Each flag has at most one argument, so doubling argc gives an
247 	 * upper limit of how many diff args can be passed.  argv[0],
248 	 * file1, and file2 won't have arguments so doubling them will
249 	 * waste some memory; however we need an extra space for the
250 	 * NULL at the end, so it sort of works out.
251 	 */
252 	if (!(diffargv = calloc(argc, sizeof(char **) * 2)))
253 		err(2, "main");
254 
255 	/* Add first argument, the program name. */
256 	diffargv[diffargc++] = diffprog;
257 
258 	/* create a dynamic string for merging single-switch options */
259 	if ( asprintf(&diffargv[diffargc++], "-")  < 0 )
260 		err(2, "main");
261 
262 	while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
263 	    longopts, NULL)) != -1) {
264 		const char *errstr;
265 
266 		switch (ch) {
267 		/* only compatible --long-name-form with diff */
268 		case FCASE_IGNORE_OPT:
269 		case FCASE_SENSITIVE_OPT:
270 		case STRIPCR_OPT:
271 		case TSIZE_OPT:
272 		case 'S':
273 		break;
274 		/* combine no-arg single switches */
275 		case 'a':
276 		case 'B':
277 		case 'b':
278 		case 'd':
279 		case 'E':
280 		case 'i':
281 		case 't':
282 		case 'H':
283 		case 'W':
284 			for(popt = longopts; ch != popt->val && popt->name != NULL; popt++);
285 			diffargv[1]  = realloc(diffargv[1], sizeof(char) * strlen(diffargv[1]) + 2);
286 			/*
287 			 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
288 			 */
289 			if (ch == 'W')
290 				sprintf(diffargv[1], "%sw", diffargv[1]);
291 			else
292 				sprintf(diffargv[1], "%s%c", diffargv[1], ch);
293 			break;
294 		case DIFFPROG_OPT:
295 			diffargv[0] = diffprog = optarg;
296 			break;
297 		case 'I':
298 			Iflag = 1;
299 			diffargv[diffargc++] = "-I";
300 			diffargv[diffargc++] = optarg;
301 			break;
302 		case 'l':
303 			lflag = 1;
304 			break;
305 		case 'o':
306 			outfile = optarg;
307 			break;
308 		case 's':
309 			sflag = 1;
310 			break;
311 		case 'w':
312 			wflag = strtonum(optarg, WIDTH_MIN,
313 			    INT_MAX, &errstr);
314 			if (errstr)
315 				errx(2, "width is %s: %s", errstr, optarg);
316 			break;
317 		case HELP_OPT:
318 			for (i = 0; help_msg[i] != NULL; i++)
319 				printf("%s\n", help_msg[i]);
320 			exit(0);
321 			break;
322 		default:
323 			usage();
324 			break;
325 		}
326 	}
327 
328 	/* no single switches were used */
329 	if (strcmp(diffargv[1], "-") == 0 ) {
330 		for ( i = 1; i < argc-1; i++) {
331 			diffargv[i] = diffargv[i+1];
332 		}
333 		diffargv[diffargc-1] = NULL;
334 		diffargc--;
335 	}
336 
337 	argc -= optind;
338 	argv += optind;
339 
340 	if (argc != 2)
341 		usage();
342 
343 	if (outfile && (outfp = fopen(outfile, "w")) == NULL)
344 		err(2, "could not open: %s", optarg);
345 
346 	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
347 		tmpdir = _PATH_TMP;
348 
349 	filename1 = argv[0];
350 	filename2 = argv[1];
351 
352 	/*
353 	 * Create temporary files for diff and sdiff to share if file1
354 	 * or file2 are not regular files.  This allows sdiff and diff
355 	 * to read the same inputs if one or both inputs are stdin.
356 	 *
357 	 * If any temporary files were created, their names would be
358 	 * saved in tmp1 or tmp2.  tmp1 should never equal tmp2.
359 	 */
360 	tmp1 = tmp2 = NULL;
361 	/* file1 and file2 are the same, so copy to same temp file. */
362 	if (strcmp(filename1, filename2) == 0) {
363 		if ((tmp1 = mktmpcpy(filename1)))
364 			filename1 = filename2 = tmp1;
365 	/* Copy file1 and file2 into separate temp files. */
366 	} else {
367 		if ((tmp1 = mktmpcpy(filename1)))
368 			filename1 = tmp1;
369 		if ((tmp2 = mktmpcpy(filename2)))
370 			filename2 = tmp2;
371 	}
372 
373 	diffargv[diffargc++] = filename1;
374 	diffargv[diffargc++] = filename2;
375 	/* Add NULL to end of array to indicate end of array. */
376 	diffargv[diffargc++] = NULL;
377 
378 	/* Subtract column divider and divide by two. */
379 	width = (wflag - 3) / 2;
380 	/* Make sure line_width can fit in size_t. */
381 	if (width > (SIZE_MAX - 3) / 2)
382 		errx(2, "width is too large: %zu", width);
383 	line_width = width * 2 + 3;
384 
385 	if (pipe(fd))
386 		err(2, "pipe");
387 
388 	switch (pid = fork()) {
389 	case 0:
390 		/* child */
391 		/* We don't read from the pipe. */
392 		close(fd[0]);
393 		if (dup2(fd[1], STDOUT_FILENO) == -1)
394 			err(2, "child could not duplicate descriptor");
395 		/* Free unused descriptor. */
396 		close(fd[1]);
397 		execvp(diffprog, diffargv);
398 		err(2, "could not execute diff: %s", diffprog);
399 		break;
400 	case -1:
401 		err(2, "could not fork");
402 		break;
403 	}
404 
405 	/* parent */
406 	/* We don't write to the pipe. */
407 	close(fd[1]);
408 
409 	/* Open pipe to diff command. */
410 	if ((diffpipe = fdopen(fd[0], "r")) == NULL)
411 		err(2, "could not open diff pipe");
412 
413 	if ((file1 = fopen(filename1, "r")) == NULL)
414 		err(2, "could not open %s", filename1);
415 	if ((file2 = fopen(filename2, "r")) == NULL)
416 		err(2, "could not open %s", filename2);
417 	if (!istextfile(file1) || !istextfile(file2)) {
418 		/* Close open files and pipe, delete temps */
419 		fclose(file1);
420 		fclose(file2);
421 		if (diffpipe != NULL)
422 			fclose(diffpipe);
423 		if (tmp1)
424 			if (unlink(tmp1))
425 				warn("Error deleting %s.", tmp1);
426 		if (tmp2)
427 			if (unlink(tmp2))
428 				warn("Error deleting %s.", tmp2);
429 		free(tmp1);
430 		free(tmp2);
431 		binexec(diffprog, filename1, filename2);
432 	}
433 	/* Line numbers start at one. */
434 	file1ln = file2ln = 1;
435 
436 	/* Read and parse diff output. */
437 	while (parsecmd(diffpipe, file1, file2) != EOF)
438 		;
439 	fclose(diffpipe);
440 
441 	/* Wait for diff to exit. */
442 	if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
443 	    WEXITSTATUS(status) >= 2)
444 		err(2, "diff exited abnormally.");
445 
446 	/* Delete and free unneeded temporary files. */
447 	if (tmp1)
448 		if (unlink(tmp1))
449 			warn("Error deleting %s.", tmp1);
450 	if (tmp2)
451 		if (unlink(tmp2))
452 			warn("Error deleting %s.", tmp2);
453 	free(tmp1);
454 	free(tmp2);
455 	filename1 = filename2 = tmp1 = tmp2 = NULL;
456 
457 	/* No more diffs, so print common lines. */
458 	if (lflag)
459 		while ((s1 = xfgets(file1)))
460 			enqueue(s1, ' ', NULL);
461 	else
462 		for (;;) {
463 			s1 = xfgets(file1);
464 			s2 = xfgets(file2);
465 			if (s1 || s2)
466 				enqueue(s1, ' ', s2);
467 			else
468 				break;
469 		}
470 	fclose(file1);
471 	fclose(file2);
472 	/* Process unmodified lines. */
473 	processq();
474 
475 	/* Return diff exit status. */
476 	return (WEXITSTATUS(status));
477 }
478 
479 /*
480  * When sdiff/zsdiff detects a binary file as input, executes them with
481  * diff/zdiff to maintain the same behavior as GNU sdiff with binary input.
482  */
483 static void
484 binexec(char *diffprog, char *f1, char *f2)
485 {
486 
487 	char *args[] = {diffprog, f1, f2, (char *) 0};
488 	execv(diffprog, args);
489 
490 	/* If execv() fails, sdiff's execution will continue below. */
491 	errx(1, "could not execute diff process");
492 }
493 
494 /*
495  * Checks whether a file appears to be a text file.
496  */
497 static int
498 istextfile(FILE *f)
499 {
500 	int	ch, i;
501 
502 	if (f == NULL)
503 		return (1);
504 	rewind(f);
505 	for (i = 0; i <= MAX_CHECK; i++) {
506 		ch = fgetc(f);
507 		if (ch == '\0') {
508 			rewind(f);
509 			return (0);
510 		}
511 		if (ch == EOF)
512 			break;
513 	}
514 	rewind(f);
515 	return (1);
516 }
517 
518 /*
519  * Prints an individual column (left or right), taking into account
520  * that tabs are variable-width.  Takes a string, the current column
521  * the cursor is on the screen, and the maximum value of the column.
522  * The column value is updated as we go along.
523  */
524 static void
525 printcol(const char *s, size_t *col, const size_t col_max)
526 {
527 
528 	for (; *s && *col < col_max; ++s) {
529 		size_t new_col;
530 
531 		switch (*s) {
532 		case '\t':
533 			/*
534 			 * If rounding to next multiple of eight causes
535 			 * an integer overflow, just return.
536 			 */
537 			if (*col > SIZE_MAX - 8)
538 				return;
539 
540 			/* Round to next multiple of eight. */
541 			new_col = (*col / 8 + 1) * 8;
542 
543 			/*
544 			 * If printing the tab goes past the column
545 			 * width, don't print it and just quit.
546 			 */
547 			if (new_col > col_max)
548 				return;
549 			*col = new_col;
550 			break;
551 		default:
552 			++(*col);
553 		}
554 		putchar(*s);
555 	}
556 }
557 
558 /*
559  * Prompts user to either choose between two strings or edit one, both,
560  * or neither.
561  */
562 static void
563 prompt(const char *s1, const char *s2)
564 {
565 	char *cmd;
566 
567 	/* Print command prompt. */
568 	putchar('%');
569 
570 	/* Get user input. */
571 	for (; (cmd = xfgets(stdin)); free(cmd)) {
572 		const char *p;
573 
574 		/* Skip leading whitespace. */
575 		for (p = cmd; isspace(*p); ++p)
576 			;
577 		switch (*p) {
578 		case 'e':
579 			/* Skip `e'. */
580 			++p;
581 			if (eparse(p, s1, s2) == -1)
582 				goto USAGE;
583 			break;
584 		case 'l':
585 		case '1':
586 			/* Choose left column as-is. */
587 			if (s1 != NULL)
588 				fprintf(outfp, "%s\n", s1);
589 			/* End of command parsing. */
590 			break;
591 		case 'q':
592 			goto QUIT;
593 		case 'r':
594 		case '2':
595 			/* Choose right column as-is. */
596 			if (s2 != NULL)
597 				fprintf(outfp, "%s\n", s2);
598 			/* End of command parsing. */
599 			break;
600 		case 's':
601 			sflag = 1;
602 			goto PROMPT;
603 		case 'v':
604 			sflag = 0;
605 			/* FALLTHROUGH */
606 		default:
607 			/* Interactive usage help. */
608 USAGE:
609 			int_usage();
610 PROMPT:
611 			putchar('%');
612 
613 			/* Prompt user again. */
614 			continue;
615 		}
616 		free(cmd);
617 		return;
618 	}
619 
620 	/*
621 	 * If there was no error, we received an EOF from stdin, so we
622 	 * should quit.
623 	 */
624 QUIT:
625 	fclose(outfp);
626 	exit(0);
627 }
628 
629 /*
630  * Takes two strings, separated by a column divider.  NULL strings are
631  * treated as empty columns.  If the divider is the ` ' character, the
632  * second column is not printed (-l flag).  In this case, the second
633  * string must be NULL.  When the second column is NULL, the divider
634  * does not print the trailing space following the divider character.
635  *
636  * Takes into account that tabs can take multiple columns.
637  */
638 static void
639 println(const char *s1, const char div, const char *s2)
640 {
641 	size_t col;
642 
643 	/* Print first column.  Skips if s1 == NULL. */
644 	col = 0;
645 	if (s1) {
646 		/* Skip angle bracket and space. */
647 		printcol(s1, &col, width);
648 
649 	}
650 
651 	/* Otherwise, we pad this column up to width. */
652 	for (; col < width; ++col)
653 		putchar(' ');
654 
655 	/* Only print left column. */
656 	if (div == ' ' && !s2) {
657 		printf(" (\n");
658 		return;
659 	}
660 
661 	/*
662 	 * Print column divider.  If there is no second column, we don't
663 	 * need to add the space for padding.
664 	 */
665 	if (!s2) {
666 		printf(" %c\n", div);
667 		return;
668 	}
669 	printf(" %c ", div);
670 	col += 3;
671 
672 	/* Skip angle bracket and space. */
673 	printcol(s2, &col, line_width);
674 
675 	putchar('\n');
676 }
677 
678 /*
679  * Reads a line from file and returns as a string.  If EOF is reached,
680  * NULL is returned.  The returned string must be freed afterwards.
681  */
682 static char *
683 xfgets(FILE *file)
684 {
685 	size_t linecap;
686 	ssize_t l;
687 	char *s;
688 
689 	clearerr(file);
690 	linecap = 0;
691 	s = NULL;
692 
693 	if ((l = getline(&s, &linecap, file)) == -1) {
694 		if (ferror(file))
695 			err(2, "error reading file");
696 		return (NULL);
697 	}
698 
699 	if (s[l-1] == '\n')
700 		s[l-1] = '\0';
701 
702 	return (s);
703 }
704 
705 /*
706  * Parse ed commands from diffpipe and print lines from file1 (lines
707  * to change or delete) or file2 (lines to add or change).
708  * Returns EOF or 0.
709  */
710 static int
711 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
712 {
713 	size_t file1start, file1end, file2start, file2end, n;
714 	/* ed command line and pointer to characters in line */
715 	char *line, *p, *q;
716 	const char *errstr;
717 	char c, cmd;
718 
719 	/* Read ed command. */
720 	if (!(line = xfgets(diffpipe)))
721 		return (EOF);
722 
723 	p = line;
724 	/* Go to character after line number. */
725 	while (isdigit(*p))
726 		++p;
727 	c = *p;
728 	*p++ = 0;
729 	file1start = strtonum(line, 0, INT_MAX, &errstr);
730 	if (errstr)
731 		errx(2, "file1 start is %s: %s", errstr, line);
732 
733 	/* A range is specified for file1. */
734 	if (c == ',') {
735 		q = p;
736 		/* Go to character after file2end. */
737 		while (isdigit(*p))
738 			++p;
739 		c = *p;
740 		*p++ = 0;
741 		file1end = strtonum(q, 0, INT_MAX, &errstr);
742 		if (errstr)
743 			errx(2, "file1 end is %s: %s", errstr, line);
744 		if (file1start > file1end)
745 			errx(2, "invalid line range in file1: %s", line);
746 	} else
747 		file1end = file1start;
748 
749 	cmd = c;
750 	/* Check that cmd is valid. */
751 	if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
752 		errx(2, "ed command not recognized: %c: %s", cmd, line);
753 
754 	q = p;
755 	/* Go to character after line number. */
756 	while (isdigit(*p))
757 		++p;
758 	c = *p;
759 	*p++ = 0;
760 	file2start = strtonum(q, 0, INT_MAX, &errstr);
761 	if (errstr)
762 		errx(2, "file2 start is %s: %s", errstr, line);
763 
764 	/*
765 	 * There should either be a comma signifying a second line
766 	 * number or the line should just end here.
767 	 */
768 	if (c != ',' && c != '\0')
769 		errx(2, "invalid line range in file2: %c: %s", c, line);
770 
771 	if (c == ',') {
772 
773 		file2end = strtonum(p, 0, INT_MAX, &errstr);
774 		if (errstr)
775 			errx(2, "file2 end is %s: %s", errstr, line);
776 		if (file2start >= file2end)
777 			errx(2, "invalid line range in file2: %s", line);
778 	} else
779 		file2end = file2start;
780 
781 	/* Appends happen _after_ stated line. */
782 	if (cmd == 'a') {
783 		if (file1start != file1end)
784 			errx(2, "append cannot have a file1 range: %s",
785 			    line);
786 		if (file1start == SIZE_MAX)
787 			errx(2, "file1 line range too high: %s", line);
788 		file1start = ++file1end;
789 	}
790 	/*
791 	 * I'm not sure what the deal is with the line numbers for
792 	 * deletes, though.
793 	 */
794 	else if (cmd == 'd') {
795 		if (file2start != file2end)
796 			errx(2, "delete cannot have a file2 range: %s",
797 			    line);
798 		if (file2start == SIZE_MAX)
799 			errx(2, "file2 line range too high: %s", line);
800 		file2start = ++file2end;
801 	}
802 
803 	/*
804 	 * Continue reading file1 and file2 until we reach line numbers
805 	 * specified by diff.  Should only happen with -I flag.
806 	 */
807 	for (; file1ln < file1start && file2ln < file2start;
808 	    ++file1ln, ++file2ln) {
809 		char *s1, *s2;
810 
811 		if (!(s1 = xfgets(file1)))
812 			errx(2, "file1 shorter than expected");
813 		if (!(s2 = xfgets(file2)))
814 			errx(2, "file2 shorter than expected");
815 
816 		/* If the -l flag was specified, print only left column. */
817 		if (lflag) {
818 			free(s2);
819 			/*
820 			 * XXX - If -l and -I are both specified, all
821 			 * unchanged or ignored lines are shown with a
822 			 * `(' divider.  This matches GNU sdiff, but I
823 			 * believe it is a bug.  Just check out:
824 			 * gsdiff -l -I '^$' samefile samefile.
825 			 */
826 			if (Iflag)
827 				enqueue(s1, '(', NULL);
828 			else
829 				enqueue(s1, ' ', NULL);
830 		} else
831 			enqueue(s1, ' ', s2);
832 	}
833 	/* Ignore deleted lines. */
834 	for (; file1ln < file1start; ++file1ln) {
835 		char *s;
836 
837 		if (!(s = xfgets(file1)))
838 			errx(2, "file1 shorter than expected");
839 
840 		enqueue(s, '(', NULL);
841 	}
842 	/* Ignore added lines. */
843 	for (; file2ln < file2start; ++file2ln) {
844 		char *s;
845 
846 		if (!(s = xfgets(file2)))
847 			errx(2, "file2 shorter than expected");
848 
849 		/* If -l flag was given, don't print right column. */
850 		if (lflag)
851 			free(s);
852 		else
853 			enqueue(NULL, ')', s);
854 	}
855 
856 	/* Process unmodified or skipped lines. */
857 	processq();
858 
859 	switch (cmd) {
860 	case 'a':
861 		printa(file2, file2end);
862 		n = file2end - file2start + 1;
863 		break;
864 	case 'c':
865 		printc(file1, file1end, file2, file2end);
866 		n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
867 		break;
868 	case 'd':
869 		printd(file1, file1end);
870 		n = file1end - file1start + 1;
871 		break;
872 	default:
873 		errx(2, "invalid diff command: %c: %s", cmd, line);
874 	}
875 	free(line);
876 
877 	/* Skip to next ed line. */
878 	while (n--) {
879 		if (!(line = xfgets(diffpipe)))
880 			errx(2, "diff ended early");
881 		free(line);
882 	}
883 
884 	return (0);
885 }
886 
887 /*
888  * Queues up a diff line.
889  */
890 static void
891 enqueue(char *left, char div, char *right)
892 {
893 	struct diffline *diffp;
894 
895 	if (!(diffp = malloc(sizeof(struct diffline))))
896 		err(2, "enqueue");
897 	diffp->left = left;
898 	diffp->div = div;
899 	diffp->right = right;
900 	STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries);
901 }
902 
903 /*
904  * Free a diffline structure and its elements.
905  */
906 static void
907 freediff(struct diffline *diffp)
908 {
909 
910 	free(diffp->left);
911 	free(diffp->right);
912 	free(diffp);
913 }
914 
915 /*
916  * Append second string into first.  Repeated appends to the same string
917  * are cached, making this an O(n) function, where n = strlen(append).
918  */
919 static void
920 astrcat(char **s, const char *append)
921 {
922 	/* Length of string in previous run. */
923 	static size_t offset = 0;
924 	size_t newsiz;
925 	/*
926 	 * String from previous run.  Compared to *s to see if we are
927 	 * dealing with the same string.  If so, we can use offset.
928 	 */
929 	static const char *oldstr = NULL;
930 	char *newstr;
931 
932 	/*
933 	 * First string is NULL, so just copy append.
934 	 */
935 	if (!*s) {
936 		if (!(*s = strdup(append)))
937 			err(2, "astrcat");
938 
939 		/* Keep track of string. */
940 		offset = strlen(*s);
941 		oldstr = *s;
942 
943 		return;
944 	}
945 
946 	/*
947 	 * *s is a string so concatenate.
948 	 */
949 
950 	/* Did we process the same string in the last run? */
951 	/*
952 	 * If this is a different string from the one we just processed
953 	 * cache new string.
954 	 */
955 	if (oldstr != *s) {
956 		offset = strlen(*s);
957 		oldstr = *s;
958 	}
959 
960 	/* Size = strlen(*s) + \n + strlen(append) + '\0'. */
961 	newsiz = offset + 1 + strlen(append) + 1;
962 
963 	/* Resize *s to fit new string. */
964 	newstr = realloc(*s, newsiz);
965 	if (newstr == NULL)
966 		err(2, "astrcat");
967 	*s = newstr;
968 
969 	/* *s + offset should be end of string. */
970 	/* Concatenate. */
971 	strlcpy(*s + offset, "\n", newsiz - offset);
972 	strlcat(*s + offset, append, newsiz - offset);
973 
974 	/* New string length should be exactly newsiz - 1 characters. */
975 	/* Store generated string's values. */
976 	offset = newsiz - 1;
977 	oldstr = *s;
978 }
979 
980 /*
981  * Process diff set queue, printing, prompting, and saving each diff
982  * line stored in queue.
983  */
984 static void
985 processq(void)
986 {
987 	struct diffline *diffp;
988 	char divc, *left, *right;
989 
990 	/* Don't process empty queue. */
991 	if (STAILQ_EMPTY(&diffhead))
992 		return;
993 
994 	/* Remember the divider. */
995 	divc = STAILQ_FIRST(&diffhead)->div;
996 
997 	left = NULL;
998 	right = NULL;
999 	/*
1000 	 * Go through set of diffs, concatenating each line in left or
1001 	 * right column into two long strings, `left' and `right'.
1002 	 */
1003 	STAILQ_FOREACH(diffp, &diffhead, diffentries) {
1004 		/*
1005 		 * Print changed lines if -s was given,
1006 		 * print all lines if -s was not given.
1007 		 */
1008 		if (!sflag || diffp->div == '|' || diffp->div == '<' ||
1009 		    diffp->div == '>')
1010 			println(diffp->left, diffp->div, diffp->right);
1011 
1012 		/* Append new lines to diff set. */
1013 		if (diffp->left)
1014 			astrcat(&left, diffp->left);
1015 		if (diffp->right)
1016 			astrcat(&right, diffp->right);
1017 	}
1018 
1019 	/* Empty queue and free each diff line and its elements. */
1020 	while (!STAILQ_EMPTY(&diffhead)) {
1021 		diffp = STAILQ_FIRST(&diffhead);
1022 		STAILQ_REMOVE_HEAD(&diffhead, diffentries);
1023 		freediff(diffp);
1024 	}
1025 
1026 	/* Write to outfp, prompting user if lines are different. */
1027 	if (outfp)
1028 		switch (divc) {
1029 		case ' ': case '(': case ')':
1030 			fprintf(outfp, "%s\n", left);
1031 			break;
1032 		case '|': case '<': case '>':
1033 			prompt(left, right);
1034 			break;
1035 		default:
1036 			errx(2, "invalid divider: %c", divc);
1037 		}
1038 
1039 	/* Free left and right. */
1040 	free(left);
1041 	free(right);
1042 }
1043 
1044 /*
1045  * Print lines following an (a)ppend command.
1046  */
1047 static void
1048 printa(FILE *file, size_t line2)
1049 {
1050 	char *line;
1051 
1052 	for (; file2ln <= line2; ++file2ln) {
1053 		if (!(line = xfgets(file)))
1054 			errx(2, "append ended early");
1055 		enqueue(NULL, '>', line);
1056 	}
1057 	processq();
1058 }
1059 
1060 /*
1061  * Print lines following a (c)hange command, from file1ln to file1end
1062  * and from file2ln to file2end.
1063  */
1064 static void
1065 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
1066 {
1067 	struct fileline {
1068 		STAILQ_ENTRY(fileline)	 fileentries;
1069 		char			*line;
1070 	};
1071 	STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead);
1072 
1073 	/* Read lines to be deleted. */
1074 	for (; file1ln <= file1end; ++file1ln) {
1075 		struct fileline *linep;
1076 		char *line1;
1077 
1078 		/* Read lines from both. */
1079 		if (!(line1 = xfgets(file1)))
1080 			errx(2, "error reading file1 in delete in change");
1081 
1082 		/* Add to delete queue. */
1083 		if (!(linep = malloc(sizeof(struct fileline))))
1084 			err(2, "printc");
1085 		linep->line = line1;
1086 		STAILQ_INSERT_TAIL(&delqhead, linep, fileentries);
1087 	}
1088 
1089 	/* Process changed lines.. */
1090 	for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end;
1091 	    ++file2ln) {
1092 		struct fileline *del;
1093 		char *add;
1094 
1095 		/* Get add line. */
1096 		if (!(add = xfgets(file2)))
1097 			errx(2, "error reading add in change");
1098 
1099 		del = STAILQ_FIRST(&delqhead);
1100 		enqueue(del->line, '|', add);
1101 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1102 		/*
1103 		 * Free fileline structure but not its elements since
1104 		 * they are queued up.
1105 		 */
1106 		free(del);
1107 	}
1108 	processq();
1109 
1110 	/* Process remaining lines to add. */
1111 	for (; file2ln <= file2end; ++file2ln) {
1112 		char *add;
1113 
1114 		/* Get add line. */
1115 		if (!(add = xfgets(file2)))
1116 			errx(2, "error reading add in change");
1117 
1118 		enqueue(NULL, '>', add);
1119 	}
1120 	processq();
1121 
1122 	/* Process remaining lines to delete. */
1123 	while (!STAILQ_EMPTY(&delqhead)) {
1124 		struct fileline *filep;
1125 
1126 		filep = STAILQ_FIRST(&delqhead);
1127 		enqueue(filep->line, '<', NULL);
1128 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1129 		free(filep);
1130 	}
1131 	processq();
1132 }
1133 
1134 /*
1135  * Print deleted lines from file, from file1ln to file1end.
1136  */
1137 static void
1138 printd(FILE *file1, size_t file1end)
1139 {
1140 	char *line1;
1141 
1142 	/* Print out lines file1ln to line2. */
1143 	for (; file1ln <= file1end; ++file1ln) {
1144 		if (!(line1 = xfgets(file1)))
1145 			errx(2, "file1 ended early in delete");
1146 		enqueue(line1, '<', NULL);
1147 	}
1148 	processq();
1149 }
1150 
1151 /*
1152  * Interactive mode usage.
1153  */
1154 static void
1155 int_usage(void)
1156 {
1157 
1158 	puts("e:\tedit blank diff\n"
1159 	    "eb:\tedit both diffs concatenated\n"
1160 	    "el:\tedit left diff\n"
1161 	    "er:\tedit right diff\n"
1162 	    "l | 1:\tchoose left diff\n"
1163 	    "r | 2:\tchoose right diff\n"
1164 	    "s:\tsilent mode--don't print identical lines\n"
1165 	    "v:\tverbose mode--print identical lines\n"
1166 	    "q:\tquit");
1167 }
1168 
1169 static void
1170 usage(void)
1171 {
1172 
1173 	fprintf(stderr,
1174 	    "usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1"
1175 	    " file2\n");
1176 	exit(2);
1177 }
1178