xref: /freebsd/usr.bin/sdiff/sdiff.c (revision 3c4ba5f55438f7afd4f4b0b56f88f2bb505fd6a6)
1 /*	$OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */
2 
3 /*
4  * Written by Raymond Lai <ray@cyth.net>.
5  * Public domain.
6  */
7 
8 #include <sys/cdefs.h>
9 __FBSDID("$FreeBSD$");
10 
11 #include <sys/param.h>
12 #include <sys/queue.h>
13 #include <sys/stat.h>
14 #include <sys/types.h>
15 #include <sys/wait.h>
16 
17 #include <ctype.h>
18 #include <err.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <getopt.h>
22 #include <limits.h>
23 #include <paths.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 
30 #include "extern.h"
31 
32 static char diff_path[] = "/usr/bin/diff";
33 
34 #define WIDTH 126
35 /*
36  * Each column must be at least one character wide, plus three
37  * characters between the columns (space, [<|>], space).
38  */
39 #define WIDTH_MIN 5
40 
41 /* 3 kilobytes of chars */
42 #define MAX_CHECK 768
43 
44 /* A single diff line. */
45 struct diffline {
46 	STAILQ_ENTRY(diffline) diffentries;
47 	char	*left;
48 	char	 div;
49 	char	*right;
50 };
51 
52 static void astrcat(char **, const char *);
53 static void enqueue(char *, char, char *);
54 static char *mktmpcpy(const char *);
55 static int istextfile(FILE *);
56 static void binexec(char *, char *, char *) __dead2;
57 static void freediff(struct diffline *);
58 static void int_usage(void);
59 static int parsecmd(FILE *, FILE *, FILE *);
60 static void printa(FILE *, size_t);
61 static void printc(FILE *, size_t, FILE *, size_t);
62 static void printcol(const char *, size_t *, const size_t);
63 static void printd(FILE *, size_t);
64 static void println(const char *, const char, const char *);
65 static void processq(void);
66 static void prompt(const char *, const char *);
67 static void usage(void) __dead2;
68 static char *xfgets(FILE *);
69 
70 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
71 static size_t line_width;	/* width of a line (two columns and divider) */
72 static size_t width;		/* width of each column */
73 static size_t file1ln, file2ln;	/* line number of file1 and file2 */
74 static int Iflag = 0;	/* ignore sets matching regexp */
75 static int	lflag;		/* print only left column for identical lines */
76 static int	sflag;		/* skip identical lines */
77 FILE *outfp;		/* file to save changes to */
78 const char *tmpdir;	/* TMPDIR or /tmp */
79 
80 enum {
81 	HELP_OPT = CHAR_MAX + 1,
82 	NORMAL_OPT,
83 	FCASE_SENSITIVE_OPT,
84 	FCASE_IGNORE_OPT,
85 	STRIPCR_OPT,
86 	TSIZE_OPT,
87 	DIFFPROG_OPT,
88 };
89 
90 static struct option longopts[] = {
91 	/* options only processed in sdiff */
92 	{ "suppress-common-lines",	no_argument,		NULL,	's' },
93 	{ "width",			required_argument,	NULL,	'w' },
94 
95 	{ "output",			required_argument,	NULL,	'o' },
96 	{ "diff-program",		required_argument,	NULL,	DIFFPROG_OPT },
97 
98 	/* Options processed by diff. */
99 	{ "ignore-file-name-case",	no_argument,		NULL,	FCASE_IGNORE_OPT },
100 	{ "no-ignore-file-name-case",	no_argument,		NULL,	FCASE_SENSITIVE_OPT },
101 	{ "strip-trailing-cr",		no_argument,		NULL,	STRIPCR_OPT },
102 	{ "tabsize",			required_argument,	NULL,	TSIZE_OPT },
103 	{ "help",			no_argument,		NULL,	HELP_OPT },
104 	{ "text",			no_argument,		NULL,	'a' },
105 	{ "ignore-blank-lines",		no_argument,		NULL,	'B' },
106 	{ "ignore-space-change",	no_argument,		NULL,	'b' },
107 	{ "minimal",			no_argument,		NULL,	'd' },
108 	{ "ignore-tab-expansion",	no_argument,		NULL,	'E' },
109 	{ "ignore-matching-lines",	required_argument,	NULL,	'I' },
110 	{ "ignore-case",		no_argument,		NULL,	'i' },
111 	{ "left-column",		no_argument,		NULL,	'l' },
112 	{ "expand-tabs",		no_argument,		NULL,	't' },
113 	{ "speed-large-files",		no_argument,		NULL,	'H' },
114 	{ "ignore-all-space",		no_argument,		NULL,	'W' },
115 
116 	{ NULL,				0,			NULL,	'\0'}
117 };
118 
119 static const char *help_msg[] = {
120 	"usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
121 	"-l, --left-column: only print the left column for identical lines.",
122 	"-o OUTFILE, --output=OUTFILE: interactively merge file1 and file2 into outfile.",
123 	"-s, --suppress-common-lines: skip identical lines.",
124 	"-w WIDTH, --width=WIDTH: print a maximum of WIDTH characters on each line.",
125 	"",
126 	"Options passed to diff(1) are:",
127 	"\t-a, --text: treat file1 and file2 as text files.",
128 	"\t-b, --ignore-trailing-cr: ignore trailing blank spaces.",
129 	"\t-d, --minimal: minimize diff size.",
130 	"\t-I RE, --ignore-matching-lines=RE: ignore changes whose line matches RE.",
131 	"\t-i, --ignore-case: do a case-insensitive comparison.",
132 	"\t-t, --expand-tabs: sxpand tabs to spaces.",
133 	"\t-W, --ignore-all-spaces: ignore all spaces.",
134 	"\t--speed-large-files: assume large file with scattered changes.",
135 	"\t--strip-trailing-cr: strip trailing carriage return.",
136 	"\t--ignore-file-name-case: ignore case of file names.",
137 	"\t--no-ignore-file-name-case: do not ignore file name case",
138 	"\t--tabsize NUM: change size of tabs (default 8.)",
139 
140 	NULL,
141 };
142 
143 /*
144  * Create temporary file if source_file is not a regular file.
145  * Returns temporary file name if one was malloced, NULL if unnecessary.
146  */
147 static char *
148 mktmpcpy(const char *source_file)
149 {
150 	struct stat sb;
151 	ssize_t rcount;
152 	int ifd, ofd;
153 	u_char buf[BUFSIZ];
154 	char *target_file;
155 
156 	/* Open input and output. */
157 	ifd = open(source_file, O_RDONLY, 0);
158 	/* File was opened successfully. */
159 	if (ifd != -1) {
160 		if (fstat(ifd, &sb) == -1)
161 			err(2, "error getting file status from %s", source_file);
162 
163 		/* Regular file. */
164 		if (S_ISREG(sb.st_mode)) {
165 			close(ifd);
166 			return (NULL);
167 		}
168 	} else {
169 		/* If ``-'' does not exist the user meant stdin. */
170 		if (errno == ENOENT && strcmp(source_file, "-") == 0)
171 			ifd = STDIN_FILENO;
172 		else
173 			err(2, "error opening %s", source_file);
174 	}
175 
176 	/* Not a regular file, so copy input into temporary file. */
177 	if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
178 		err(2, "asprintf");
179 	if ((ofd = mkstemp(target_file)) == -1) {
180 		warn("error opening %s", target_file);
181 		goto FAIL;
182 	}
183 	while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
184 	    rcount != 0) {
185 		ssize_t wcount;
186 
187 		wcount = write(ofd, buf, (size_t)rcount);
188 		if (-1 == wcount || rcount != wcount) {
189 			warn("error writing to %s", target_file);
190 			goto FAIL;
191 		}
192 	}
193 	if (rcount == -1) {
194 		warn("error reading from %s", source_file);
195 		goto FAIL;
196 	}
197 
198 	close(ifd);
199 	close(ofd);
200 
201 	return (target_file);
202 
203 FAIL:
204 	unlink(target_file);
205 	exit(2);
206 }
207 
208 int
209 main(int argc, char **argv)
210 {
211 	FILE *diffpipe=NULL, *file1, *file2;
212 	size_t diffargc = 0, wflag = WIDTH;
213 	int ch, fd[2] = {-1}, status;
214 	pid_t pid=0;
215 	const char *outfile = NULL;
216 	char **diffargv, *diffprog = diff_path, *filename1, *filename2,
217 	     *tmp1, *tmp2, *s1, *s2;
218 	int i;
219 	char I_arg[] = "-I";
220 	char speed_lf[] = "--speed-large-files";
221 
222 	/*
223 	 * Process diff flags.
224 	 */
225 	/*
226 	 * Allocate memory for diff arguments and NULL.
227 	 * Each flag has at most one argument, so doubling argc gives an
228 	 * upper limit of how many diff args can be passed.  argv[0],
229 	 * file1, and file2 won't have arguments so doubling them will
230 	 * waste some memory; however we need an extra space for the
231 	 * NULL at the end, so it sort of works out.
232 	 */
233 	if (!(diffargv = calloc(argc, sizeof(char **) * 2)))
234 		err(2, "main");
235 
236 	/* Add first argument, the program name. */
237 	diffargv[diffargc++] = diffprog;
238 
239 	/* create a dynamic string for merging single-switch options */
240 	if ( asprintf(&diffargv[diffargc++], "-")  < 0 )
241 		err(2, "main");
242 
243 	while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
244 	    longopts, NULL)) != -1) {
245 		const char *errstr;
246 
247 		switch (ch) {
248 		/* only compatible --long-name-form with diff */
249 		case FCASE_IGNORE_OPT:
250 		case FCASE_SENSITIVE_OPT:
251 		case STRIPCR_OPT:
252 		case TSIZE_OPT:
253 		case 'S':
254 		break;
255 		/* combine no-arg single switches */
256 		case 'a':
257 		case 'B':
258 		case 'b':
259 		case 'd':
260 		case 'E':
261 		case 'i':
262 		case 't':
263 		case 'W':
264 			diffargv[1]  = realloc(diffargv[1], sizeof(char) * strlen(diffargv[1]) + 2);
265 			/*
266 			 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
267 			 */
268 			if (ch == 'W')
269 				sprintf(diffargv[1], "%sw", diffargv[1]);
270 			else
271 				sprintf(diffargv[1], "%s%c", diffargv[1], ch);
272 			break;
273 		case 'H':
274 			diffargv[diffargc++] = speed_lf;
275 			break;
276 		case DIFFPROG_OPT:
277 			diffargv[0] = diffprog = optarg;
278 			break;
279 		case 'I':
280 			Iflag = 1;
281 			diffargv[diffargc++] = I_arg;
282 			diffargv[diffargc++] = optarg;
283 			break;
284 		case 'l':
285 			lflag = 1;
286 			break;
287 		case 'o':
288 			outfile = optarg;
289 			break;
290 		case 's':
291 			sflag = 1;
292 			break;
293 		case 'w':
294 			wflag = strtonum(optarg, WIDTH_MIN,
295 			    INT_MAX, &errstr);
296 			if (errstr)
297 				errx(2, "width is %s: %s", errstr, optarg);
298 			break;
299 		case HELP_OPT:
300 			for (i = 0; help_msg[i] != NULL; i++)
301 				printf("%s\n", help_msg[i]);
302 			exit(0);
303 			break;
304 		default:
305 			usage();
306 			break;
307 		}
308 	}
309 
310 	/* no single switches were used */
311 	if (strcmp(diffargv[1], "-") == 0 ) {
312 		for ( i = 1; i < argc-1; i++) {
313 			diffargv[i] = diffargv[i+1];
314 		}
315 		diffargv[diffargc-1] = NULL;
316 		diffargc--;
317 	}
318 
319 	argc -= optind;
320 	argv += optind;
321 
322 	if (argc != 2)
323 		usage();
324 
325 	if (outfile && (outfp = fopen(outfile, "w")) == NULL)
326 		err(2, "could not open: %s", optarg);
327 
328 	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
329 		tmpdir = _PATH_TMP;
330 
331 	filename1 = argv[0];
332 	filename2 = argv[1];
333 
334 	/*
335 	 * Create temporary files for diff and sdiff to share if file1
336 	 * or file2 are not regular files.  This allows sdiff and diff
337 	 * to read the same inputs if one or both inputs are stdin.
338 	 *
339 	 * If any temporary files were created, their names would be
340 	 * saved in tmp1 or tmp2.  tmp1 should never equal tmp2.
341 	 */
342 	tmp1 = tmp2 = NULL;
343 	/* file1 and file2 are the same, so copy to same temp file. */
344 	if (strcmp(filename1, filename2) == 0) {
345 		if ((tmp1 = mktmpcpy(filename1)))
346 			filename1 = filename2 = tmp1;
347 	/* Copy file1 and file2 into separate temp files. */
348 	} else {
349 		if ((tmp1 = mktmpcpy(filename1)))
350 			filename1 = tmp1;
351 		if ((tmp2 = mktmpcpy(filename2)))
352 			filename2 = tmp2;
353 	}
354 
355 	diffargv[diffargc++] = filename1;
356 	diffargv[diffargc++] = filename2;
357 	/* Add NULL to end of array to indicate end of array. */
358 	diffargv[diffargc++] = NULL;
359 
360 	/* Subtract column divider and divide by two. */
361 	width = (wflag - 3) / 2;
362 	/* Make sure line_width can fit in size_t. */
363 	if (width > (SIZE_MAX - 3) / 2)
364 		errx(2, "width is too large: %zu", width);
365 	line_width = width * 2 + 3;
366 
367 	if (pipe(fd))
368 		err(2, "pipe");
369 
370 	switch (pid = fork()) {
371 	case 0:
372 		/* child */
373 		/* We don't read from the pipe. */
374 		close(fd[0]);
375 		if (dup2(fd[1], STDOUT_FILENO) == -1)
376 			err(2, "child could not duplicate descriptor");
377 		/* Free unused descriptor. */
378 		close(fd[1]);
379 		execvp(diffprog, diffargv);
380 		err(2, "could not execute diff: %s", diffprog);
381 		break;
382 	case -1:
383 		err(2, "could not fork");
384 		break;
385 	}
386 
387 	/* parent */
388 	/* We don't write to the pipe. */
389 	close(fd[1]);
390 
391 	/* Open pipe to diff command. */
392 	if ((diffpipe = fdopen(fd[0], "r")) == NULL)
393 		err(2, "could not open diff pipe");
394 
395 	if ((file1 = fopen(filename1, "r")) == NULL)
396 		err(2, "could not open %s", filename1);
397 	if ((file2 = fopen(filename2, "r")) == NULL)
398 		err(2, "could not open %s", filename2);
399 	if (!istextfile(file1) || !istextfile(file2)) {
400 		/* Close open files and pipe, delete temps */
401 		fclose(file1);
402 		fclose(file2);
403 		if (diffpipe != NULL)
404 			fclose(diffpipe);
405 		if (tmp1)
406 			if (unlink(tmp1))
407 				warn("Error deleting %s.", tmp1);
408 		if (tmp2)
409 			if (unlink(tmp2))
410 				warn("Error deleting %s.", tmp2);
411 		free(tmp1);
412 		free(tmp2);
413 		binexec(diffprog, filename1, filename2);
414 	}
415 	/* Line numbers start at one. */
416 	file1ln = file2ln = 1;
417 
418 	/* Read and parse diff output. */
419 	while (parsecmd(diffpipe, file1, file2) != EOF)
420 		;
421 	fclose(diffpipe);
422 
423 	/* Wait for diff to exit. */
424 	if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
425 	    WEXITSTATUS(status) >= 2)
426 		err(2, "diff exited abnormally.");
427 
428 	/* Delete and free unneeded temporary files. */
429 	if (tmp1)
430 		if (unlink(tmp1))
431 			warn("Error deleting %s.", tmp1);
432 	if (tmp2)
433 		if (unlink(tmp2))
434 			warn("Error deleting %s.", tmp2);
435 	free(tmp1);
436 	free(tmp2);
437 	filename1 = filename2 = tmp1 = tmp2 = NULL;
438 
439 	/* No more diffs, so print common lines. */
440 	if (lflag)
441 		while ((s1 = xfgets(file1)))
442 			enqueue(s1, ' ', NULL);
443 	else
444 		for (;;) {
445 			s1 = xfgets(file1);
446 			s2 = xfgets(file2);
447 			if (s1 || s2)
448 				enqueue(s1, ' ', s2);
449 			else
450 				break;
451 		}
452 	fclose(file1);
453 	fclose(file2);
454 	/* Process unmodified lines. */
455 	processq();
456 
457 	/* Return diff exit status. */
458 	return (WEXITSTATUS(status));
459 }
460 
461 /*
462  * When sdiff detects a binary file as input, executes them with
463  * diff to maintain the same behavior as GNU sdiff with binary input.
464  */
465 static void
466 binexec(char *diffprog, char *f1, char *f2)
467 {
468 
469 	char *args[] = {diffprog, f1, f2, (char *) 0};
470 	execv(diffprog, args);
471 
472 	/* If execv() fails, sdiff's execution will continue below. */
473 	errx(1, "could not execute diff process");
474 }
475 
476 /*
477  * Checks whether a file appears to be a text file.
478  */
479 static int
480 istextfile(FILE *f)
481 {
482 	int	ch, i;
483 
484 	if (f == NULL)
485 		return (1);
486 	rewind(f);
487 	for (i = 0; i <= MAX_CHECK; i++) {
488 		ch = fgetc(f);
489 		if (ch == '\0') {
490 			rewind(f);
491 			return (0);
492 		}
493 		if (ch == EOF)
494 			break;
495 	}
496 	rewind(f);
497 	return (1);
498 }
499 
500 /*
501  * Prints an individual column (left or right), taking into account
502  * that tabs are variable-width.  Takes a string, the current column
503  * the cursor is on the screen, and the maximum value of the column.
504  * The column value is updated as we go along.
505  */
506 static void
507 printcol(const char *s, size_t *col, const size_t col_max)
508 {
509 
510 	for (; *s && *col < col_max; ++s) {
511 		size_t new_col;
512 
513 		switch (*s) {
514 		case '\t':
515 			/*
516 			 * If rounding to next multiple of eight causes
517 			 * an integer overflow, just return.
518 			 */
519 			if (*col > SIZE_MAX - 8)
520 				return;
521 
522 			/* Round to next multiple of eight. */
523 			new_col = (*col / 8 + 1) * 8;
524 
525 			/*
526 			 * If printing the tab goes past the column
527 			 * width, don't print it and just quit.
528 			 */
529 			if (new_col > col_max)
530 				return;
531 			*col = new_col;
532 			break;
533 		default:
534 			++(*col);
535 		}
536 		putchar(*s);
537 	}
538 }
539 
540 /*
541  * Prompts user to either choose between two strings or edit one, both,
542  * or neither.
543  */
544 static void
545 prompt(const char *s1, const char *s2)
546 {
547 	char *cmd;
548 
549 	/* Print command prompt. */
550 	putchar('%');
551 
552 	/* Get user input. */
553 	for (; (cmd = xfgets(stdin)); free(cmd)) {
554 		const char *p;
555 
556 		/* Skip leading whitespace. */
557 		for (p = cmd; isspace(*p); ++p)
558 			;
559 		switch (*p) {
560 		case 'e':
561 			/* Skip `e'. */
562 			++p;
563 			if (eparse(p, s1, s2) == -1)
564 				goto USAGE;
565 			break;
566 		case 'l':
567 		case '1':
568 			/* Choose left column as-is. */
569 			if (s1 != NULL)
570 				fprintf(outfp, "%s\n", s1);
571 			/* End of command parsing. */
572 			break;
573 		case 'q':
574 			goto QUIT;
575 		case 'r':
576 		case '2':
577 			/* Choose right column as-is. */
578 			if (s2 != NULL)
579 				fprintf(outfp, "%s\n", s2);
580 			/* End of command parsing. */
581 			break;
582 		case 's':
583 			sflag = 1;
584 			goto PROMPT;
585 		case 'v':
586 			sflag = 0;
587 			/* FALLTHROUGH */
588 		default:
589 			/* Interactive usage help. */
590 USAGE:
591 			int_usage();
592 PROMPT:
593 			putchar('%');
594 
595 			/* Prompt user again. */
596 			continue;
597 		}
598 		free(cmd);
599 		return;
600 	}
601 
602 	/*
603 	 * If there was no error, we received an EOF from stdin, so we
604 	 * should quit.
605 	 */
606 QUIT:
607 	fclose(outfp);
608 	exit(0);
609 }
610 
611 /*
612  * Takes two strings, separated by a column divider.  NULL strings are
613  * treated as empty columns.  If the divider is the ` ' character, the
614  * second column is not printed (-l flag).  In this case, the second
615  * string must be NULL.  When the second column is NULL, the divider
616  * does not print the trailing space following the divider character.
617  *
618  * Takes into account that tabs can take multiple columns.
619  */
620 static void
621 println(const char *s1, const char divider, const char *s2)
622 {
623 	size_t col;
624 
625 	/* Print first column.  Skips if s1 == NULL. */
626 	col = 0;
627 	if (s1) {
628 		/* Skip angle bracket and space. */
629 		printcol(s1, &col, width);
630 
631 	}
632 
633 	/* Otherwise, we pad this column up to width. */
634 	for (; col < width; ++col)
635 		putchar(' ');
636 
637 	/* Only print left column. */
638 	if (divider == ' ' && !s2) {
639 		printf(" (\n");
640 		return;
641 	}
642 
643 	/*
644 	 * Print column divider.  If there is no second column, we don't
645 	 * need to add the space for padding.
646 	 */
647 	if (!s2) {
648 		printf(" %c\n", divider);
649 		return;
650 	}
651 	printf(" %c ", divider);
652 	col += 3;
653 
654 	/* Skip angle bracket and space. */
655 	printcol(s2, &col, line_width);
656 
657 	putchar('\n');
658 }
659 
660 /*
661  * Reads a line from file and returns as a string.  If EOF is reached,
662  * NULL is returned.  The returned string must be freed afterwards.
663  */
664 static char *
665 xfgets(FILE *file)
666 {
667 	size_t linecap;
668 	ssize_t l;
669 	char *s;
670 
671 	clearerr(file);
672 	linecap = 0;
673 	s = NULL;
674 
675 	if ((l = getline(&s, &linecap, file)) == -1) {
676 		if (ferror(file))
677 			err(2, "error reading file");
678 		return (NULL);
679 	}
680 
681 	if (s[l-1] == '\n')
682 		s[l-1] = '\0';
683 
684 	return (s);
685 }
686 
687 /*
688  * Parse ed commands from diffpipe and print lines from file1 (lines
689  * to change or delete) or file2 (lines to add or change).
690  * Returns EOF or 0.
691  */
692 static int
693 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
694 {
695 	size_t file1start, file1end, file2start, file2end, n;
696 	/* ed command line and pointer to characters in line */
697 	char *line, *p, *q;
698 	const char *errstr;
699 	char c, cmd;
700 
701 	/* Read ed command. */
702 	if (!(line = xfgets(diffpipe)))
703 		return (EOF);
704 
705 	p = line;
706 	/* Go to character after line number. */
707 	while (isdigit(*p))
708 		++p;
709 	c = *p;
710 	*p++ = 0;
711 	file1start = strtonum(line, 0, INT_MAX, &errstr);
712 	if (errstr)
713 		errx(2, "file1 start is %s: %s", errstr, line);
714 
715 	/* A range is specified for file1. */
716 	if (c == ',') {
717 		q = p;
718 		/* Go to character after file2end. */
719 		while (isdigit(*p))
720 			++p;
721 		c = *p;
722 		*p++ = 0;
723 		file1end = strtonum(q, 0, INT_MAX, &errstr);
724 		if (errstr)
725 			errx(2, "file1 end is %s: %s", errstr, line);
726 		if (file1start > file1end)
727 			errx(2, "invalid line range in file1: %s", line);
728 	} else
729 		file1end = file1start;
730 
731 	cmd = c;
732 	/* Check that cmd is valid. */
733 	if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
734 		errx(2, "ed command not recognized: %c: %s", cmd, line);
735 
736 	q = p;
737 	/* Go to character after line number. */
738 	while (isdigit(*p))
739 		++p;
740 	c = *p;
741 	*p++ = 0;
742 	file2start = strtonum(q, 0, INT_MAX, &errstr);
743 	if (errstr)
744 		errx(2, "file2 start is %s: %s", errstr, line);
745 
746 	/*
747 	 * There should either be a comma signifying a second line
748 	 * number or the line should just end here.
749 	 */
750 	if (c != ',' && c != '\0')
751 		errx(2, "invalid line range in file2: %c: %s", c, line);
752 
753 	if (c == ',') {
754 
755 		file2end = strtonum(p, 0, INT_MAX, &errstr);
756 		if (errstr)
757 			errx(2, "file2 end is %s: %s", errstr, line);
758 		if (file2start >= file2end)
759 			errx(2, "invalid line range in file2: %s", line);
760 	} else
761 		file2end = file2start;
762 
763 	/* Appends happen _after_ stated line. */
764 	if (cmd == 'a') {
765 		if (file1start != file1end)
766 			errx(2, "append cannot have a file1 range: %s",
767 			    line);
768 		if (file1start == SIZE_MAX)
769 			errx(2, "file1 line range too high: %s", line);
770 		file1start = ++file1end;
771 	}
772 	/*
773 	 * I'm not sure what the deal is with the line numbers for
774 	 * deletes, though.
775 	 */
776 	else if (cmd == 'd') {
777 		if (file2start != file2end)
778 			errx(2, "delete cannot have a file2 range: %s",
779 			    line);
780 		if (file2start == SIZE_MAX)
781 			errx(2, "file2 line range too high: %s", line);
782 		file2start = ++file2end;
783 	}
784 
785 	/*
786 	 * Continue reading file1 and file2 until we reach line numbers
787 	 * specified by diff.  Should only happen with -I flag.
788 	 */
789 	for (; file1ln < file1start && file2ln < file2start;
790 	    ++file1ln, ++file2ln) {
791 		char *s1, *s2;
792 
793 		if (!(s1 = xfgets(file1)))
794 			errx(2, "file1 shorter than expected");
795 		if (!(s2 = xfgets(file2)))
796 			errx(2, "file2 shorter than expected");
797 
798 		/* If the -l flag was specified, print only left column. */
799 		if (lflag) {
800 			free(s2);
801 			/*
802 			 * XXX - If -l and -I are both specified, all
803 			 * unchanged or ignored lines are shown with a
804 			 * `(' divider.  This matches GNU sdiff, but I
805 			 * believe it is a bug.  Just check out:
806 			 * gsdiff -l -I '^$' samefile samefile.
807 			 */
808 			if (Iflag)
809 				enqueue(s1, '(', NULL);
810 			else
811 				enqueue(s1, ' ', NULL);
812 		} else
813 			enqueue(s1, ' ', s2);
814 	}
815 	/* Ignore deleted lines. */
816 	for (; file1ln < file1start; ++file1ln) {
817 		char *s;
818 
819 		if (!(s = xfgets(file1)))
820 			errx(2, "file1 shorter than expected");
821 
822 		enqueue(s, '(', NULL);
823 	}
824 	/* Ignore added lines. */
825 	for (; file2ln < file2start; ++file2ln) {
826 		char *s;
827 
828 		if (!(s = xfgets(file2)))
829 			errx(2, "file2 shorter than expected");
830 
831 		/* If -l flag was given, don't print right column. */
832 		if (lflag)
833 			free(s);
834 		else
835 			enqueue(NULL, ')', s);
836 	}
837 
838 	/* Process unmodified or skipped lines. */
839 	processq();
840 
841 	switch (cmd) {
842 	case 'a':
843 		printa(file2, file2end);
844 		n = file2end - file2start + 1;
845 		break;
846 	case 'c':
847 		printc(file1, file1end, file2, file2end);
848 		n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
849 		break;
850 	case 'd':
851 		printd(file1, file1end);
852 		n = file1end - file1start + 1;
853 		break;
854 	default:
855 		errx(2, "invalid diff command: %c: %s", cmd, line);
856 	}
857 	free(line);
858 
859 	/* Skip to next ed line. */
860 	while (n--) {
861 		if (!(line = xfgets(diffpipe)))
862 			errx(2, "diff ended early");
863 		free(line);
864 	}
865 
866 	return (0);
867 }
868 
869 /*
870  * Queues up a diff line.
871  */
872 static void
873 enqueue(char *left, char divider, char *right)
874 {
875 	struct diffline *diffp;
876 
877 	if (!(diffp = malloc(sizeof(struct diffline))))
878 		err(2, "enqueue");
879 	diffp->left = left;
880 	diffp->div = divider;
881 	diffp->right = right;
882 	STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries);
883 }
884 
885 /*
886  * Free a diffline structure and its elements.
887  */
888 static void
889 freediff(struct diffline *diffp)
890 {
891 
892 	free(diffp->left);
893 	free(diffp->right);
894 	free(diffp);
895 }
896 
897 /*
898  * Append second string into first.  Repeated appends to the same string
899  * are cached, making this an O(n) function, where n = strlen(append).
900  */
901 static void
902 astrcat(char **s, const char *append)
903 {
904 	/* Length of string in previous run. */
905 	static size_t offset = 0;
906 	size_t newsiz;
907 	/*
908 	 * String from previous run.  Compared to *s to see if we are
909 	 * dealing with the same string.  If so, we can use offset.
910 	 */
911 	static const char *oldstr = NULL;
912 	char *newstr;
913 
914 	/*
915 	 * First string is NULL, so just copy append.
916 	 */
917 	if (!*s) {
918 		if (!(*s = strdup(append)))
919 			err(2, "astrcat");
920 
921 		/* Keep track of string. */
922 		offset = strlen(*s);
923 		oldstr = *s;
924 
925 		return;
926 	}
927 
928 	/*
929 	 * *s is a string so concatenate.
930 	 */
931 
932 	/* Did we process the same string in the last run? */
933 	/*
934 	 * If this is a different string from the one we just processed
935 	 * cache new string.
936 	 */
937 	if (oldstr != *s) {
938 		offset = strlen(*s);
939 		oldstr = *s;
940 	}
941 
942 	/* Size = strlen(*s) + \n + strlen(append) + '\0'. */
943 	newsiz = offset + 1 + strlen(append) + 1;
944 
945 	/* Resize *s to fit new string. */
946 	newstr = realloc(*s, newsiz);
947 	if (newstr == NULL)
948 		err(2, "astrcat");
949 	*s = newstr;
950 
951 	/* *s + offset should be end of string. */
952 	/* Concatenate. */
953 	strlcpy(*s + offset, "\n", newsiz - offset);
954 	strlcat(*s + offset, append, newsiz - offset);
955 
956 	/* New string length should be exactly newsiz - 1 characters. */
957 	/* Store generated string's values. */
958 	offset = newsiz - 1;
959 	oldstr = *s;
960 }
961 
962 /*
963  * Process diff set queue, printing, prompting, and saving each diff
964  * line stored in queue.
965  */
966 static void
967 processq(void)
968 {
969 	struct diffline *diffp;
970 	char divc, *left, *right;
971 
972 	/* Don't process empty queue. */
973 	if (STAILQ_EMPTY(&diffhead))
974 		return;
975 
976 	/* Remember the divider. */
977 	divc = STAILQ_FIRST(&diffhead)->div;
978 
979 	left = NULL;
980 	right = NULL;
981 	/*
982 	 * Go through set of diffs, concatenating each line in left or
983 	 * right column into two long strings, `left' and `right'.
984 	 */
985 	STAILQ_FOREACH(diffp, &diffhead, diffentries) {
986 		/*
987 		 * Print changed lines if -s was given,
988 		 * print all lines if -s was not given.
989 		 */
990 		if (!sflag || diffp->div == '|' || diffp->div == '<' ||
991 		    diffp->div == '>')
992 			println(diffp->left, diffp->div, diffp->right);
993 
994 		/* Append new lines to diff set. */
995 		if (diffp->left)
996 			astrcat(&left, diffp->left);
997 		if (diffp->right)
998 			astrcat(&right, diffp->right);
999 	}
1000 
1001 	/* Empty queue and free each diff line and its elements. */
1002 	while (!STAILQ_EMPTY(&diffhead)) {
1003 		diffp = STAILQ_FIRST(&diffhead);
1004 		STAILQ_REMOVE_HEAD(&diffhead, diffentries);
1005 		freediff(diffp);
1006 	}
1007 
1008 	/* Write to outfp, prompting user if lines are different. */
1009 	if (outfp)
1010 		switch (divc) {
1011 		case ' ': case '(': case ')':
1012 			fprintf(outfp, "%s\n", left);
1013 			break;
1014 		case '|': case '<': case '>':
1015 			prompt(left, right);
1016 			break;
1017 		default:
1018 			errx(2, "invalid divider: %c", divc);
1019 		}
1020 
1021 	/* Free left and right. */
1022 	free(left);
1023 	free(right);
1024 }
1025 
1026 /*
1027  * Print lines following an (a)ppend command.
1028  */
1029 static void
1030 printa(FILE *file, size_t line2)
1031 {
1032 	char *line;
1033 
1034 	for (; file2ln <= line2; ++file2ln) {
1035 		if (!(line = xfgets(file)))
1036 			errx(2, "append ended early");
1037 		enqueue(NULL, '>', line);
1038 	}
1039 	processq();
1040 }
1041 
1042 /*
1043  * Print lines following a (c)hange command, from file1ln to file1end
1044  * and from file2ln to file2end.
1045  */
1046 static void
1047 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
1048 {
1049 	struct fileline {
1050 		STAILQ_ENTRY(fileline)	 fileentries;
1051 		char			*line;
1052 	};
1053 	STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead);
1054 
1055 	/* Read lines to be deleted. */
1056 	for (; file1ln <= file1end; ++file1ln) {
1057 		struct fileline *linep;
1058 		char *line1;
1059 
1060 		/* Read lines from both. */
1061 		if (!(line1 = xfgets(file1)))
1062 			errx(2, "error reading file1 in delete in change");
1063 
1064 		/* Add to delete queue. */
1065 		if (!(linep = malloc(sizeof(struct fileline))))
1066 			err(2, "printc");
1067 		linep->line = line1;
1068 		STAILQ_INSERT_TAIL(&delqhead, linep, fileentries);
1069 	}
1070 
1071 	/* Process changed lines.. */
1072 	for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end;
1073 	    ++file2ln) {
1074 		struct fileline *del;
1075 		char *add;
1076 
1077 		/* Get add line. */
1078 		if (!(add = xfgets(file2)))
1079 			errx(2, "error reading add in change");
1080 
1081 		del = STAILQ_FIRST(&delqhead);
1082 		enqueue(del->line, '|', add);
1083 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1084 		/*
1085 		 * Free fileline structure but not its elements since
1086 		 * they are queued up.
1087 		 */
1088 		free(del);
1089 	}
1090 	processq();
1091 
1092 	/* Process remaining lines to add. */
1093 	for (; file2ln <= file2end; ++file2ln) {
1094 		char *add;
1095 
1096 		/* Get add line. */
1097 		if (!(add = xfgets(file2)))
1098 			errx(2, "error reading add in change");
1099 
1100 		enqueue(NULL, '>', add);
1101 	}
1102 	processq();
1103 
1104 	/* Process remaining lines to delete. */
1105 	while (!STAILQ_EMPTY(&delqhead)) {
1106 		struct fileline *filep;
1107 
1108 		filep = STAILQ_FIRST(&delqhead);
1109 		enqueue(filep->line, '<', NULL);
1110 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1111 		free(filep);
1112 	}
1113 	processq();
1114 }
1115 
1116 /*
1117  * Print deleted lines from file, from file1ln to file1end.
1118  */
1119 static void
1120 printd(FILE *file1, size_t file1end)
1121 {
1122 	char *line1;
1123 
1124 	/* Print out lines file1ln to line2. */
1125 	for (; file1ln <= file1end; ++file1ln) {
1126 		if (!(line1 = xfgets(file1)))
1127 			errx(2, "file1 ended early in delete");
1128 		enqueue(line1, '<', NULL);
1129 	}
1130 	processq();
1131 }
1132 
1133 /*
1134  * Interactive mode usage.
1135  */
1136 static void
1137 int_usage(void)
1138 {
1139 
1140 	puts("e:\tedit blank diff\n"
1141 	    "eb:\tedit both diffs concatenated\n"
1142 	    "el:\tedit left diff\n"
1143 	    "er:\tedit right diff\n"
1144 	    "l | 1:\tchoose left diff\n"
1145 	    "r | 2:\tchoose right diff\n"
1146 	    "s:\tsilent mode--don't print identical lines\n"
1147 	    "v:\tverbose mode--print identical lines\n"
1148 	    "q:\tquit");
1149 }
1150 
1151 static void
1152 usage(void)
1153 {
1154 
1155 	fprintf(stderr,
1156 	    "usage: sdiff [-abdilstHW] [-I regexp] [-o outfile] [-w width] file1"
1157 	    " file2\n");
1158 	exit(2);
1159 }
1160