xref: /freebsd/contrib/libdiff/diff/diff.c (revision 59c8e88e72633afbc47a4ace0d2170d00d51f7dc)
1*59c8e88eSDag-Erling Smørgrav /* Commandline diff utility to test diff implementations. */
2*59c8e88eSDag-Erling Smørgrav /*
3*59c8e88eSDag-Erling Smørgrav  * Copyright (c) 2018 Martin Pieuchot
4*59c8e88eSDag-Erling Smørgrav  * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
5*59c8e88eSDag-Erling Smørgrav  *
6*59c8e88eSDag-Erling Smørgrav  * Permission to use, copy, modify, and distribute this software for any
7*59c8e88eSDag-Erling Smørgrav  * purpose with or without fee is hereby granted, provided that the above
8*59c8e88eSDag-Erling Smørgrav  * copyright notice and this permission notice appear in all copies.
9*59c8e88eSDag-Erling Smørgrav  *
10*59c8e88eSDag-Erling Smørgrav  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11*59c8e88eSDag-Erling Smørgrav  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12*59c8e88eSDag-Erling Smørgrav  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13*59c8e88eSDag-Erling Smørgrav  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14*59c8e88eSDag-Erling Smørgrav  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15*59c8e88eSDag-Erling Smørgrav  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16*59c8e88eSDag-Erling Smørgrav  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17*59c8e88eSDag-Erling Smørgrav  */
18*59c8e88eSDag-Erling Smørgrav 
19*59c8e88eSDag-Erling Smørgrav #include <sys/mman.h>
20*59c8e88eSDag-Erling Smørgrav #include <sys/stat.h>
21*59c8e88eSDag-Erling Smørgrav #include <sys/types.h>
22*59c8e88eSDag-Erling Smørgrav 
23*59c8e88eSDag-Erling Smørgrav #include <err.h>
24*59c8e88eSDag-Erling Smørgrav #include <fcntl.h>
25*59c8e88eSDag-Erling Smørgrav #include <stdint.h>
26*59c8e88eSDag-Erling Smørgrav #include <stdio.h>
27*59c8e88eSDag-Erling Smørgrav #include <stdlib.h>
28*59c8e88eSDag-Erling Smørgrav #include <stdbool.h>
29*59c8e88eSDag-Erling Smørgrav #include <string.h>
30*59c8e88eSDag-Erling Smørgrav #include <unistd.h>
31*59c8e88eSDag-Erling Smørgrav 
32*59c8e88eSDag-Erling Smørgrav #include <arraylist.h>
33*59c8e88eSDag-Erling Smørgrav #include <diff_main.h>
34*59c8e88eSDag-Erling Smørgrav #include <diff_output.h>
35*59c8e88eSDag-Erling Smørgrav 
36*59c8e88eSDag-Erling Smørgrav enum diffreg_algo {
37*59c8e88eSDag-Erling Smørgrav 	DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0,
38*59c8e88eSDag-Erling Smørgrav 	DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1,
39*59c8e88eSDag-Erling Smørgrav 	DIFFREG_ALGO_PATIENCE = 2,
40*59c8e88eSDag-Erling Smørgrav 	DIFFREG_ALGO_NONE = 3,
41*59c8e88eSDag-Erling Smørgrav };
42*59c8e88eSDag-Erling Smørgrav 
43*59c8e88eSDag-Erling Smørgrav __dead void	 usage(void);
44*59c8e88eSDag-Erling Smørgrav int		 diffreg(char *, char *, enum diffreg_algo, bool, bool, bool,
45*59c8e88eSDag-Erling Smørgrav 			 int, bool);
46*59c8e88eSDag-Erling Smørgrav FILE *		 openfile(const char *, char **, struct stat *);
47*59c8e88eSDag-Erling Smørgrav 
48*59c8e88eSDag-Erling Smørgrav __dead void
49*59c8e88eSDag-Erling Smørgrav usage(void)
50*59c8e88eSDag-Erling Smørgrav {
51*59c8e88eSDag-Erling Smørgrav 	fprintf(stderr,
52*59c8e88eSDag-Erling Smørgrav 		"usage: %s [-apPQTwe] [-U n] file1 file2\n"
53*59c8e88eSDag-Erling Smørgrav 		"\n"
54*59c8e88eSDag-Erling Smørgrav 		"  -a   Treat input as ASCII even if binary data is detected\n"
55*59c8e88eSDag-Erling Smørgrav 		"  -p   Show function prototypes in hunk headers\n"
56*59c8e88eSDag-Erling Smørgrav 		"  -P   Use Patience Diff (slower but often nicer)\n"
57*59c8e88eSDag-Erling Smørgrav 		"  -Q   Use forward-Myers for small files, otherwise Patience\n"
58*59c8e88eSDag-Erling Smørgrav 		"  -T   Trivial algo: detect similar start and end only\n"
59*59c8e88eSDag-Erling Smørgrav 		"  -w   Ignore Whitespace\n"
60*59c8e88eSDag-Erling Smørgrav 		"  -U n Number of Context Lines\n"
61*59c8e88eSDag-Erling Smørgrav 		"  -e   Produce ed script output\n"
62*59c8e88eSDag-Erling Smørgrav 		, getprogname());
63*59c8e88eSDag-Erling Smørgrav 	exit(1);
64*59c8e88eSDag-Erling Smørgrav }
65*59c8e88eSDag-Erling Smørgrav 
66*59c8e88eSDag-Erling Smørgrav int
67*59c8e88eSDag-Erling Smørgrav main(int argc, char *argv[])
68*59c8e88eSDag-Erling Smørgrav {
69*59c8e88eSDag-Erling Smørgrav 	int ch, rc;
70*59c8e88eSDag-Erling Smørgrav 	bool force_text = false;
71*59c8e88eSDag-Erling Smørgrav 	bool ignore_whitespace = false;
72*59c8e88eSDag-Erling Smørgrav 	bool show_function_prototypes = false;
73*59c8e88eSDag-Erling Smørgrav 	bool edscript = false;
74*59c8e88eSDag-Erling Smørgrav 	int context_lines = 3;
75*59c8e88eSDag-Erling Smørgrav 	enum diffreg_algo algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE;
76*59c8e88eSDag-Erling Smørgrav 
77*59c8e88eSDag-Erling Smørgrav 	while ((ch = getopt(argc, argv, "apPQTwU:e")) != -1) {
78*59c8e88eSDag-Erling Smørgrav 		switch (ch) {
79*59c8e88eSDag-Erling Smørgrav 		case 'a':
80*59c8e88eSDag-Erling Smørgrav 			force_text = true;
81*59c8e88eSDag-Erling Smørgrav 			break;
82*59c8e88eSDag-Erling Smørgrav 		case 'p':
83*59c8e88eSDag-Erling Smørgrav 			show_function_prototypes = true;
84*59c8e88eSDag-Erling Smørgrav 			break;
85*59c8e88eSDag-Erling Smørgrav 		case 'P':
86*59c8e88eSDag-Erling Smørgrav 			algo = DIFFREG_ALGO_PATIENCE;
87*59c8e88eSDag-Erling Smørgrav 			break;
88*59c8e88eSDag-Erling Smørgrav 		case 'Q':
89*59c8e88eSDag-Erling Smørgrav 			algo = DIFFREG_ALGO_MYERS_THEN_PATIENCE;
90*59c8e88eSDag-Erling Smørgrav 			break;
91*59c8e88eSDag-Erling Smørgrav 		case 'T':
92*59c8e88eSDag-Erling Smørgrav 			algo = DIFFREG_ALGO_NONE;
93*59c8e88eSDag-Erling Smørgrav 			break;
94*59c8e88eSDag-Erling Smørgrav 		case 'w':
95*59c8e88eSDag-Erling Smørgrav 			ignore_whitespace = true;
96*59c8e88eSDag-Erling Smørgrav 			break;
97*59c8e88eSDag-Erling Smørgrav 		case 'U':
98*59c8e88eSDag-Erling Smørgrav 			context_lines = atoi(optarg);
99*59c8e88eSDag-Erling Smørgrav 			break;
100*59c8e88eSDag-Erling Smørgrav 		case 'e':
101*59c8e88eSDag-Erling Smørgrav 			edscript = true;
102*59c8e88eSDag-Erling Smørgrav 			break;
103*59c8e88eSDag-Erling Smørgrav 		default:
104*59c8e88eSDag-Erling Smørgrav 			usage();
105*59c8e88eSDag-Erling Smørgrav 		}
106*59c8e88eSDag-Erling Smørgrav 	}
107*59c8e88eSDag-Erling Smørgrav 
108*59c8e88eSDag-Erling Smørgrav 	argc -= optind;
109*59c8e88eSDag-Erling Smørgrav 	argv += optind;
110*59c8e88eSDag-Erling Smørgrav 
111*59c8e88eSDag-Erling Smørgrav 	if (argc != 2)
112*59c8e88eSDag-Erling Smørgrav 		usage();
113*59c8e88eSDag-Erling Smørgrav 
114*59c8e88eSDag-Erling Smørgrav 	rc = diffreg(argv[0], argv[1], algo, force_text, ignore_whitespace,
115*59c8e88eSDag-Erling Smørgrav 	    show_function_prototypes, context_lines, edscript);
116*59c8e88eSDag-Erling Smørgrav 	if (rc != DIFF_RC_OK) {
117*59c8e88eSDag-Erling Smørgrav 		fprintf(stderr, "diff: %s\n", strerror(rc));
118*59c8e88eSDag-Erling Smørgrav 		return 1;
119*59c8e88eSDag-Erling Smørgrav 	}
120*59c8e88eSDag-Erling Smørgrav 	return 0;
121*59c8e88eSDag-Erling Smørgrav }
122*59c8e88eSDag-Erling Smørgrav 
123*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_then_patience;
124*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_then_myers_divide;
125*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config patience;
126*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_divide;
127*59c8e88eSDag-Erling Smørgrav 
128*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_then_patience = (struct diff_algo_config){
129*59c8e88eSDag-Erling Smørgrav 	.impl = diff_algo_myers,
130*59c8e88eSDag-Erling Smørgrav 	.permitted_state_size = 1024 * 1024 * sizeof(int),
131*59c8e88eSDag-Erling Smørgrav 	.fallback_algo = &patience,
132*59c8e88eSDag-Erling Smørgrav };
133*59c8e88eSDag-Erling Smørgrav 
134*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_then_myers_divide =
135*59c8e88eSDag-Erling Smørgrav 	(struct diff_algo_config){
136*59c8e88eSDag-Erling Smørgrav 	.impl = diff_algo_myers,
137*59c8e88eSDag-Erling Smørgrav 	.permitted_state_size = 1024 * 1024 * sizeof(int),
138*59c8e88eSDag-Erling Smørgrav 	.fallback_algo = &myers_divide,
139*59c8e88eSDag-Erling Smørgrav };
140*59c8e88eSDag-Erling Smørgrav 
141*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config patience = (struct diff_algo_config){
142*59c8e88eSDag-Erling Smørgrav 	.impl = diff_algo_patience,
143*59c8e88eSDag-Erling Smørgrav 	/* After subdivision, do Patience again: */
144*59c8e88eSDag-Erling Smørgrav 	.inner_algo = &patience,
145*59c8e88eSDag-Erling Smørgrav 	/* If subdivision failed, do Myers Divide et Impera: */
146*59c8e88eSDag-Erling Smørgrav 	.fallback_algo = &myers_then_myers_divide,
147*59c8e88eSDag-Erling Smørgrav };
148*59c8e88eSDag-Erling Smørgrav 
149*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_divide = (struct diff_algo_config){
150*59c8e88eSDag-Erling Smørgrav 	.impl = diff_algo_myers_divide,
151*59c8e88eSDag-Erling Smørgrav 	/* When division succeeded, start from the top: */
152*59c8e88eSDag-Erling Smørgrav 	.inner_algo = &myers_then_myers_divide,
153*59c8e88eSDag-Erling Smørgrav 	/* (fallback_algo = NULL implies diff_algo_none). */
154*59c8e88eSDag-Erling Smørgrav };
155*59c8e88eSDag-Erling Smørgrav 
156*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config no_algo = (struct diff_algo_config){
157*59c8e88eSDag-Erling Smørgrav 	.impl = diff_algo_none,
158*59c8e88eSDag-Erling Smørgrav };
159*59c8e88eSDag-Erling Smørgrav 
160*59c8e88eSDag-Erling Smørgrav /* If the state for a forward-Myers is small enough, use Myers, otherwise first
161*59c8e88eSDag-Erling Smørgrav  * do a Myers-divide. */
162*59c8e88eSDag-Erling Smørgrav const struct diff_config diff_config_myers_then_myers_divide = {
163*59c8e88eSDag-Erling Smørgrav 	.atomize_func = diff_atomize_text_by_line,
164*59c8e88eSDag-Erling Smørgrav 	.algo = &myers_then_myers_divide,
165*59c8e88eSDag-Erling Smørgrav };
166*59c8e88eSDag-Erling Smørgrav 
167*59c8e88eSDag-Erling Smørgrav /* If the state for a forward-Myers is small enough, use Myers, otherwise first
168*59c8e88eSDag-Erling Smørgrav  * do a Patience. */
169*59c8e88eSDag-Erling Smørgrav const struct diff_config diff_config_myers_then_patience = {
170*59c8e88eSDag-Erling Smørgrav 	.atomize_func = diff_atomize_text_by_line,
171*59c8e88eSDag-Erling Smørgrav 	.algo = &myers_then_patience,
172*59c8e88eSDag-Erling Smørgrav };
173*59c8e88eSDag-Erling Smørgrav 
174*59c8e88eSDag-Erling Smørgrav /* Directly force Patience as a first divider of the source file. */
175*59c8e88eSDag-Erling Smørgrav const struct diff_config diff_config_patience = {
176*59c8e88eSDag-Erling Smørgrav 	.atomize_func = diff_atomize_text_by_line,
177*59c8e88eSDag-Erling Smørgrav 	.algo = &patience,
178*59c8e88eSDag-Erling Smørgrav };
179*59c8e88eSDag-Erling Smørgrav 
180*59c8e88eSDag-Erling Smørgrav /* Directly force Patience as a first divider of the source file. */
181*59c8e88eSDag-Erling Smørgrav const struct diff_config diff_config_no_algo = {
182*59c8e88eSDag-Erling Smørgrav 	.atomize_func = diff_atomize_text_by_line,
183*59c8e88eSDag-Erling Smørgrav };
184*59c8e88eSDag-Erling Smørgrav 
185*59c8e88eSDag-Erling Smørgrav int
186*59c8e88eSDag-Erling Smørgrav diffreg(char *file1, char *file2, enum diffreg_algo algo, bool force_text,
187*59c8e88eSDag-Erling Smørgrav     bool ignore_whitespace, bool show_function_prototypes, int context_lines,
188*59c8e88eSDag-Erling Smørgrav     bool edscript)
189*59c8e88eSDag-Erling Smørgrav {
190*59c8e88eSDag-Erling Smørgrav 	char *str1, *str2;
191*59c8e88eSDag-Erling Smørgrav 	FILE *f1, *f2;
192*59c8e88eSDag-Erling Smørgrav 	struct stat st1, st2;
193*59c8e88eSDag-Erling Smørgrav 	struct diff_input_info info = {
194*59c8e88eSDag-Erling Smørgrav 		.left_path = file1,
195*59c8e88eSDag-Erling Smørgrav 		.right_path = file2,
196*59c8e88eSDag-Erling Smørgrav 	};
197*59c8e88eSDag-Erling Smørgrav 	struct diff_data left = {}, right = {};
198*59c8e88eSDag-Erling Smørgrav 	struct diff_result *result = NULL;
199*59c8e88eSDag-Erling Smørgrav 	int rc;
200*59c8e88eSDag-Erling Smørgrav 	const struct diff_config *cfg;
201*59c8e88eSDag-Erling Smørgrav 	int diff_flags = 0;
202*59c8e88eSDag-Erling Smørgrav 
203*59c8e88eSDag-Erling Smørgrav 	switch (algo) {
204*59c8e88eSDag-Erling Smørgrav 	default:
205*59c8e88eSDag-Erling Smørgrav 	case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE:
206*59c8e88eSDag-Erling Smørgrav 		cfg = &diff_config_myers_then_myers_divide;
207*59c8e88eSDag-Erling Smørgrav 		break;
208*59c8e88eSDag-Erling Smørgrav 	case DIFFREG_ALGO_MYERS_THEN_PATIENCE:
209*59c8e88eSDag-Erling Smørgrav 		cfg = &diff_config_myers_then_patience;
210*59c8e88eSDag-Erling Smørgrav 		break;
211*59c8e88eSDag-Erling Smørgrav 	case DIFFREG_ALGO_PATIENCE:
212*59c8e88eSDag-Erling Smørgrav 		cfg = &diff_config_patience;
213*59c8e88eSDag-Erling Smørgrav 		break;
214*59c8e88eSDag-Erling Smørgrav 	case DIFFREG_ALGO_NONE:
215*59c8e88eSDag-Erling Smørgrav 		cfg = &diff_config_no_algo;
216*59c8e88eSDag-Erling Smørgrav 		break;
217*59c8e88eSDag-Erling Smørgrav 	}
218*59c8e88eSDag-Erling Smørgrav 
219*59c8e88eSDag-Erling Smørgrav 	f1 = openfile(file1, &str1, &st1);
220*59c8e88eSDag-Erling Smørgrav 	f2 = openfile(file2, &str2, &st2);
221*59c8e88eSDag-Erling Smørgrav 
222*59c8e88eSDag-Erling Smørgrav 	if (force_text)
223*59c8e88eSDag-Erling Smørgrav 		diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
224*59c8e88eSDag-Erling Smørgrav 	if (ignore_whitespace)
225*59c8e88eSDag-Erling Smørgrav 		diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
226*59c8e88eSDag-Erling Smørgrav 	if (show_function_prototypes)
227*59c8e88eSDag-Erling Smørgrav 		diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
228*59c8e88eSDag-Erling Smørgrav 
229*59c8e88eSDag-Erling Smørgrav 	rc = diff_atomize_file(&left, cfg, f1, str1, st1.st_size, diff_flags);
230*59c8e88eSDag-Erling Smørgrav 	if (rc)
231*59c8e88eSDag-Erling Smørgrav 		goto done;
232*59c8e88eSDag-Erling Smørgrav 	rc = diff_atomize_file(&right, cfg, f2, str2, st2.st_size, diff_flags);
233*59c8e88eSDag-Erling Smørgrav 	if (rc)
234*59c8e88eSDag-Erling Smørgrav 		goto done;
235*59c8e88eSDag-Erling Smørgrav 
236*59c8e88eSDag-Erling Smørgrav 	result = diff_main(cfg, &left, &right);
237*59c8e88eSDag-Erling Smørgrav #if 0
238*59c8e88eSDag-Erling Smørgrav 	rc = diff_output_plain(stdout, &info, result);
239*59c8e88eSDag-Erling Smørgrav #else
240*59c8e88eSDag-Erling Smørgrav 	if (edscript)
241*59c8e88eSDag-Erling Smørgrav 		rc = diff_output_edscript(NULL, stdout, &info, result);
242*59c8e88eSDag-Erling Smørgrav 	else {
243*59c8e88eSDag-Erling Smørgrav 		rc = diff_output_unidiff(NULL, stdout, &info, result,
244*59c8e88eSDag-Erling Smørgrav 		    context_lines);
245*59c8e88eSDag-Erling Smørgrav 	}
246*59c8e88eSDag-Erling Smørgrav #endif
247*59c8e88eSDag-Erling Smørgrav done:
248*59c8e88eSDag-Erling Smørgrav 	diff_result_free(result);
249*59c8e88eSDag-Erling Smørgrav 	diff_data_free(&left);
250*59c8e88eSDag-Erling Smørgrav 	diff_data_free(&right);
251*59c8e88eSDag-Erling Smørgrav 	if (str1)
252*59c8e88eSDag-Erling Smørgrav 		munmap(str1, st1.st_size);
253*59c8e88eSDag-Erling Smørgrav 	if (str2)
254*59c8e88eSDag-Erling Smørgrav 		munmap(str2, st2.st_size);
255*59c8e88eSDag-Erling Smørgrav 	fclose(f1);
256*59c8e88eSDag-Erling Smørgrav 	fclose(f2);
257*59c8e88eSDag-Erling Smørgrav 
258*59c8e88eSDag-Erling Smørgrav 	return rc;
259*59c8e88eSDag-Erling Smørgrav }
260*59c8e88eSDag-Erling Smørgrav 
261*59c8e88eSDag-Erling Smørgrav FILE *
262*59c8e88eSDag-Erling Smørgrav openfile(const char *path, char **p, struct stat *st)
263*59c8e88eSDag-Erling Smørgrav {
264*59c8e88eSDag-Erling Smørgrav 	FILE *f = NULL;
265*59c8e88eSDag-Erling Smørgrav 
266*59c8e88eSDag-Erling Smørgrav 	f = fopen(path, "r");
267*59c8e88eSDag-Erling Smørgrav 	if (f == NULL)
268*59c8e88eSDag-Erling Smørgrav 		err(2, "%s", path);
269*59c8e88eSDag-Erling Smørgrav 
270*59c8e88eSDag-Erling Smørgrav 	if (fstat(fileno(f), st) == -1)
271*59c8e88eSDag-Erling Smørgrav 		err(2, "%s", path);
272*59c8e88eSDag-Erling Smørgrav 
273*59c8e88eSDag-Erling Smørgrav #ifndef DIFF_NO_MMAP
274*59c8e88eSDag-Erling Smørgrav 	*p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0);
275*59c8e88eSDag-Erling Smørgrav 	if (*p == MAP_FAILED)
276*59c8e88eSDag-Erling Smørgrav #endif
277*59c8e88eSDag-Erling Smørgrav 		*p = NULL; /* fall back on file I/O */
278*59c8e88eSDag-Erling Smørgrav 
279*59c8e88eSDag-Erling Smørgrav 	return f;
280*59c8e88eSDag-Erling Smørgrav }
281