xref: /freebsd/usr.bin/diff/diffreg_new.c (revision 55f160fb07eaee977c89bdee7bdd83f4d21f5adf)
1 /*
2  * Copyright (c) 2018 Martin Pieuchot
3  * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/types.h>
19 #include <sys/capsicum.h>
20 #ifndef DIFF_NO_MMAP
21 #include <sys/mman.h>
22 #endif
23 #include <sys/stat.h>
24 
25 #include <capsicum_helpers.h>
26 #include <err.h>
27 #include <fcntl.h>
28 #include <stdbool.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <time.h>
34 #include <unistd.h>
35 
36 #include "pr.h"
37 #include "diff.h"
38 #include <arraylist.h>
39 #include <diff_main.h>
40 #include <diff_output.h>
41 
42 const char *format_label(const char *, struct stat *);
43 
44 enum diffreg_algo {
45 	DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0,
46 	DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1,
47 	DIFFREG_ALGO_PATIENCE = 2,
48 	DIFFREG_ALGO_NONE = 3,
49 };
50 
51 int		 diffreg_new(char *, char *, int, int);
52 FILE *		 openfile(const char *, char **, struct stat *);
53 
54 static const struct diff_algo_config myers_then_patience;
55 static const struct diff_algo_config myers_then_myers_divide;
56 static const struct diff_algo_config patience;
57 static const struct diff_algo_config myers_divide;
58 
59 static const struct diff_algo_config myers_then_patience = (struct diff_algo_config){
60 	.impl = diff_algo_myers,
61 	.permitted_state_size = 1024 * 1024 * sizeof(int),
62 	.fallback_algo = &patience,
63 };
64 
65 static const struct diff_algo_config myers_then_myers_divide =
66 	(struct diff_algo_config){
67 	.impl = diff_algo_myers,
68 	.permitted_state_size = 1024 * 1024 * sizeof(int),
69 	.fallback_algo = &myers_divide,
70 };
71 
72 static const struct diff_algo_config patience = (struct diff_algo_config){
73 	.impl = diff_algo_patience,
74 	/* After subdivision, do Patience again: */
75 	.inner_algo = &patience,
76 	/* If subdivision failed, do Myers Divide et Impera: */
77 	.fallback_algo = &myers_then_myers_divide,
78 };
79 
80 static const struct diff_algo_config myers_divide = (struct diff_algo_config){
81 	.impl = diff_algo_myers_divide,
82 	/* When division succeeded, start from the top: */
83 	.inner_algo = &myers_then_myers_divide,
84 	/* (fallback_algo = NULL implies diff_algo_none). */
85 };
86 
87 static const struct diff_algo_config no_algo = (struct diff_algo_config){
88 	.impl = diff_algo_none,
89 };
90 
91 /* If the state for a forward-Myers is small enough, use Myers, otherwise first
92  * do a Myers-divide. */
93 static const struct diff_config diff_config_myers_then_myers_divide = {
94 	.atomize_func = diff_atomize_text_by_line,
95 	.algo = &myers_then_myers_divide,
96 };
97 
98 /* If the state for a forward-Myers is small enough, use Myers, otherwise first
99  * do a Patience. */
100 static const struct diff_config diff_config_myers_then_patience = {
101 	.atomize_func = diff_atomize_text_by_line,
102 	.algo = &myers_then_patience,
103 };
104 
105 /* Directly force Patience as a first divider of the source file. */
106 static const struct diff_config diff_config_patience = {
107 	.atomize_func = diff_atomize_text_by_line,
108 	.algo = &patience,
109 };
110 
111 /* Directly force Patience as a first divider of the source file. */
112 static const struct diff_config diff_config_no_algo = {
113 	.atomize_func = diff_atomize_text_by_line,
114 };
115 
116 const char *
format_label(const char * oldlabel,struct stat * stb)117 format_label(const char *oldlabel, struct stat *stb)
118 {
119 	const char *time_format = "%Y-%m-%d %H:%M:%S";
120 	char *newlabel;
121 	char buf[256];
122 	char end[10];
123 	struct tm tm, *tm_ptr;
124 	int nsec = stb->st_mtim.tv_nsec;
125 	size_t newlabellen, timelen, endlen;
126 	tm_ptr = localtime_r(&stb->st_mtime, &tm);
127 
128 	timelen = strftime(buf, 256, time_format, tm_ptr);
129 	endlen = strftime(end, 10, "%z", tm_ptr);
130 
131 	/*
132 	 * The new label is the length of the time, old label, timezone,
133 	 * 9 characters for nanoseconds, and 4 characters for a period
134 	 * and for formatting.
135 	 */
136 	newlabellen = timelen + strlen(oldlabel) + endlen + 9 + 4;
137 	newlabel = calloc(newlabellen, sizeof(char));
138 
139 	snprintf(newlabel, newlabellen ,"%s\t%s.%.9d %s\n",
140 		oldlabel, buf, nsec, end);
141 
142 	return newlabel;
143 }
144 
145 int
diffreg_new(char * file1,char * file2,int flags,int capsicum)146 diffreg_new(char *file1, char *file2, int flags, int capsicum)
147 {
148 	char *str1, *str2;
149 	FILE *f1, *f2;
150 	struct pr *pr = NULL;
151 	struct stat st1, st2;
152 	struct diff_input_info info;
153 	struct diff_data left = {}, right = {};
154 	struct diff_result *result = NULL;
155 	bool force_text, have_binary;
156 	int rc, atomizer_flags, rflags, diff_flags = 0;
157 	int context_lines = diff_context;
158 	const struct diff_config *cfg;
159 	enum diffreg_algo algo;
160 	cap_rights_t rights_ro;
161 	int ret;
162 
163 	algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE;
164 
165 	switch (algo) {
166 	default:
167 	case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE:
168 		cfg = &diff_config_myers_then_myers_divide;
169 		break;
170 	case DIFFREG_ALGO_MYERS_THEN_PATIENCE:
171 		cfg = &diff_config_myers_then_patience;
172 		break;
173 	case DIFFREG_ALGO_PATIENCE:
174 		cfg = &diff_config_patience;
175 		break;
176 	case DIFFREG_ALGO_NONE:
177 		cfg = &diff_config_no_algo;
178 		break;
179 	}
180 
181 	f1 = openfile(file1, &str1, &st1);
182 	f2 = openfile(file2, &str2, &st2);
183 
184 	if (flags & D_PAGINATION)
185 		pr = start_pr(file1, file2);
186 
187 	if (capsicum) {
188 		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
189 		if (caph_rights_limit(fileno(f1), &rights_ro) < 0)
190 			err(2, "unable to limit rights on: %s", file1);
191 		if (caph_rights_limit(fileno(f2), &rights_ro) < 0)
192 			err(2, "unable to limit rights on: %s", file2);
193 		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
194 			/* stdin has already been limited */
195 			if (caph_limit_stderr() == -1)
196 				err(2, "unable to limit stderr");
197 			if (caph_limit_stdout() == -1)
198 				err(2, "unable to limit stdout");
199 		} else if (caph_limit_stdio() == -1)
200 				err(2, "unable to limit stdio");
201 		caph_cache_catpages();
202 		caph_cache_tzdata();
203 		if (caph_enter() < 0)
204 			err(2, "unable to enter capability mode");
205 	}
206 	/*
207 	 * If we have been given a label use that for the paths, if not format
208 	 * the path with the files modification time.
209 	 */
210 	info.flags = 0;
211 	info.left_path = (label[0] != NULL) ?
212 		label[0] : format_label(file1, &stb1);
213 	info.right_path = (label[1] != NULL) ?
214 		label[1] : format_label(file2, &stb2);
215 
216 	if (flags & D_FORCEASCII)
217 		diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
218 	if (flags & D_IGNOREBLANKS)
219 		diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
220 	if (flags & D_PROTOTYPE)
221 		diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
222 
223 	ret = diff_atomize_file(&left, cfg, f1, (uint8_t *)str1, st1.st_size,
224 	    diff_flags);
225 	if (ret != DIFF_RC_OK) {
226 		warnc(ret, "%s", file1);
227 		rc = D_ERROR;
228 		status |= 2;
229 		goto done;
230 	}
231 	ret = diff_atomize_file(&right, cfg, f2, (uint8_t *)str2, st2.st_size,
232 	    diff_flags);
233 	if (ret != DIFF_RC_OK) {
234 		warnc(ret, "%s", file2);
235 		rc = D_ERROR;
236 		status |= 2;
237 		goto done;
238 	}
239 
240 	result = diff_main(cfg, &left, &right);
241 	if (result->rc != DIFF_RC_OK) {
242 		rc = D_ERROR;
243 		status |= 2;
244 		goto done;
245 	}
246 	/*
247 	 * If there wasn't an error, but we don't have any printable chunks
248 	 * then the files must match.
249 	 */
250 	if (!diff_result_contains_printable_chunks(result)) {
251 		rc = D_SAME;
252 		goto done;
253 	}
254 
255 	atomizer_flags = (result->left->atomizer_flags | result->right->atomizer_flags);
256 	rflags = (result->left->root->diff_flags | result->right->root->diff_flags);
257 	force_text = (rflags & DIFF_FLAG_FORCE_TEXT_DATA);
258 	have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
259 
260 	if (have_binary && !force_text) {
261 		rc = D_BINARY;
262 		status |= 1;
263 		goto done;
264 	}
265 
266 	if (color)
267 		diff_output_set_colors(color, del_code, add_code);
268 	if (diff_format == D_NORMAL) {
269 		rc = diff_output_plain(NULL, stdout, &info, result, false);
270 	} else if (diff_format == D_EDIT) {
271 		rc = diff_output_edscript(NULL, stdout, &info, result);
272 	} else {
273 		rc = diff_output_unidiff(NULL, stdout, &info, result,
274 		    context_lines);
275 	}
276 	if (rc != DIFF_RC_OK) {
277 		rc = D_ERROR;
278 		status |= 2;
279 	} else {
280 		rc = D_DIFFER;
281 		status |= 1;
282 	}
283 done:
284 	if (pr != NULL)
285 		stop_pr(pr);
286 	diff_result_free(result);
287 	diff_data_free(&left);
288 	diff_data_free(&right);
289 #ifndef DIFF_NO_MMAP
290 	if (str1)
291 		munmap(str1, st1.st_size);
292 	if (str2)
293 		munmap(str2, st2.st_size);
294 #endif
295 	fclose(f1);
296 	fclose(f2);
297 
298 	return rc;
299 }
300 
301 FILE *
openfile(const char * path,char ** p,struct stat * st)302 openfile(const char *path, char **p, struct stat *st)
303 {
304 	FILE *f = NULL;
305 
306 	if (strcmp(path, "-") == 0)
307 		f = stdin;
308 	else
309 		f = fopen(path, "r");
310 
311 	if (f == NULL)
312 		err(2, "%s", path);
313 
314 	if (fstat(fileno(f), st) == -1)
315 		err(2, "%s", path);
316 
317 #ifndef DIFF_NO_MMAP
318 	*p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0);
319 	if (*p == MAP_FAILED)
320 #endif
321 		*p = NULL; /* fall back on file I/O */
322 
323 	return f;
324 }
325 
326 bool
can_libdiff(int flags)327 can_libdiff(int flags)
328 {
329 	/* libdiff's atomizer can only deal with files */
330 	if (!S_ISREG(stb1.st_mode) || !S_ISREG(stb2.st_mode))
331 		return false;
332 
333 	/* Is this one of the supported input/output modes for diffreg_new? */
334 	if ((flags == 0 || !(flags & ~D_NEWALGO_FLAGS)) &&
335 		ignore_pats == NULL && (
336 		diff_format == D_NORMAL ||
337 #if 0
338 		diff_format == D_EDIT ||
339 #endif
340 		diff_format == D_UNIFIED) &&
341 		(diff_algorithm == D_DIFFMYERS || diff_algorithm == D_DIFFPATIENCE)) {
342 		return true;
343 	}
344 
345 	/* Fallback to using stone. */
346 	return false;
347 }
348