xref: /freebsd/usr.bin/diff/diffreg_new.c (revision 8311bc5f17dec348749f763b82dfe2737bc53cd7)
1 /*
2  * Copyright (c) 2018 Martin Pieuchot
3  * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/capsicum.h>
19 #include <sys/mman.h>
20 #include <sys/stat.h>
21 #include <sys/types.h>
22 
23 #include <capsicum_helpers.h>
24 #include <err.h>
25 #include <fcntl.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <stdbool.h>
30 #include <time.h>
31 #include <string.h>
32 #include <unistd.h>
33 
34 #include "diff.h"
35 #include <arraylist.h>
36 #include <diff_main.h>
37 #include <diff_output.h>
38 
39 const char *format_label(const char *, struct stat *);
40 
41 enum diffreg_algo {
42 	DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0,
43 	DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1,
44 	DIFFREG_ALGO_PATIENCE = 2,
45 	DIFFREG_ALGO_NONE = 3,
46 };
47 
48 int		 diffreg_new(char *, char *, int, int);
49 FILE *		 openfile(const char *, char **, struct stat *);
50 
51 static const struct diff_algo_config myers_then_patience;
52 static const struct diff_algo_config myers_then_myers_divide;
53 static const struct diff_algo_config patience;
54 static const struct diff_algo_config myers_divide;
55 
56 static const struct diff_algo_config myers_then_patience = (struct diff_algo_config){
57 	.impl = diff_algo_myers,
58 	.permitted_state_size = 1024 * 1024 * sizeof(int),
59 	.fallback_algo = &patience,
60 };
61 
62 static const struct diff_algo_config myers_then_myers_divide =
63 	(struct diff_algo_config){
64 	.impl = diff_algo_myers,
65 	.permitted_state_size = 1024 * 1024 * sizeof(int),
66 	.fallback_algo = &myers_divide,
67 };
68 
69 static const struct diff_algo_config patience = (struct diff_algo_config){
70 	.impl = diff_algo_patience,
71 	/* After subdivision, do Patience again: */
72 	.inner_algo = &patience,
73 	/* If subdivision failed, do Myers Divide et Impera: */
74 	.fallback_algo = &myers_then_myers_divide,
75 };
76 
77 static const struct diff_algo_config myers_divide = (struct diff_algo_config){
78 	.impl = diff_algo_myers_divide,
79 	/* When division succeeded, start from the top: */
80 	.inner_algo = &myers_then_myers_divide,
81 	/* (fallback_algo = NULL implies diff_algo_none). */
82 };
83 
84 static const struct diff_algo_config no_algo = (struct diff_algo_config){
85 	.impl = diff_algo_none,
86 };
87 
88 /* If the state for a forward-Myers is small enough, use Myers, otherwise first
89  * do a Myers-divide. */
90 static const struct diff_config diff_config_myers_then_myers_divide = {
91 	.atomize_func = diff_atomize_text_by_line,
92 	.algo = &myers_then_myers_divide,
93 };
94 
95 /* If the state for a forward-Myers is small enough, use Myers, otherwise first
96  * do a Patience. */
97 static const struct diff_config diff_config_myers_then_patience = {
98 	.atomize_func = diff_atomize_text_by_line,
99 	.algo = &myers_then_patience,
100 };
101 
102 /* Directly force Patience as a first divider of the source file. */
103 static const struct diff_config diff_config_patience = {
104 	.atomize_func = diff_atomize_text_by_line,
105 	.algo = &patience,
106 };
107 
108 /* Directly force Patience as a first divider of the source file. */
109 static const struct diff_config diff_config_no_algo = {
110 	.atomize_func = diff_atomize_text_by_line,
111 };
112 
113 const char *
114 format_label(const char *oldlabel, struct stat *stb)
115 {
116 	const char *time_format = "%Y-%m-%d %H:%M:%S";
117 	char *newlabel;
118 	char buf[256];
119 	char end[10];
120 	struct tm tm, *tm_ptr;
121 	int nsec = stb->st_mtim.tv_nsec;
122 	size_t newlabellen, timelen, endlen;
123 	tm_ptr = localtime_r(&stb->st_mtime, &tm);
124 
125 	timelen = strftime(buf, 256, time_format, tm_ptr);
126 	endlen = strftime(end, 10, "%z", tm_ptr);
127 
128 	/*
129 	 * The new label is the length of the time, old label, timezone,
130 	 * 9 characters for nanoseconds, and 4 characters for a period
131 	 * and for formatting.
132 	 */
133 	newlabellen = timelen + strlen(oldlabel) + endlen + 9 + 4;
134 	newlabel = calloc(newlabellen, sizeof(char));
135 
136 	snprintf(newlabel, newlabellen ,"%s\t%s.%.9d %s\n",
137 		oldlabel, buf, nsec, end);
138 
139 	return newlabel;
140 }
141 
142 int
143 diffreg_new(char *file1, char *file2, int flags, int capsicum)
144 {
145 	char *str1, *str2;
146 	FILE *f1, *f2;
147 	struct stat st1, st2;
148 	struct diff_input_info info;
149 	struct diff_data left = {}, right = {};
150 	struct diff_result *result = NULL;
151 	bool force_text, have_binary;
152 	int rc, atomizer_flags, rflags, diff_flags = 0;
153 	int context_lines = diff_context;
154 	const struct diff_config *cfg;
155 	enum diffreg_algo algo;
156 	cap_rights_t rights_ro;
157 
158 	algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE;
159 
160 	switch (algo) {
161 	default:
162 	case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE:
163 		cfg = &diff_config_myers_then_myers_divide;
164 		break;
165 	case DIFFREG_ALGO_MYERS_THEN_PATIENCE:
166 		cfg = &diff_config_myers_then_patience;
167 		break;
168 	case DIFFREG_ALGO_PATIENCE:
169 		cfg = &diff_config_patience;
170 		break;
171 	case DIFFREG_ALGO_NONE:
172 		cfg = &diff_config_no_algo;
173 		break;
174 	}
175 
176 	f1 = openfile(file1, &str1, &st1);
177 	f2 = openfile(file2, &str2, &st2);
178 
179 	if (capsicum) {
180 		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
181 		if (caph_rights_limit(fileno(f1), &rights_ro) < 0)
182 			err(2, "unable to limit rights on: %s", file1);
183 		if (caph_rights_limit(fileno(f2), &rights_ro) < 0)
184 			err(2, "unable to limit rights on: %s", file2);
185 		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
186 			/* stdin has already been limited */
187 			if (caph_limit_stderr() == -1)
188 				err(2, "unable to limit stderr");
189 			if (caph_limit_stdout() == -1)
190 				err(2, "unable to limit stdout");
191 		} else if (caph_limit_stdio() == -1)
192 				err(2, "unable to limit stdio");
193 		caph_cache_catpages();
194 		caph_cache_tzdata();
195 		if (caph_enter() < 0)
196 			err(2, "unable to enter capability mode");
197 	}
198 	/*
199 	 * If we have been given a label use that for the paths, if not format
200 	 * the path with the files modification time.
201 	 */
202 	info.flags = 0;
203 	info.left_path = (label[0] != NULL) ?
204 		label[0] : format_label(file1, &stb1);
205 	info.right_path = (label[1] != NULL) ?
206 		label[1] : format_label(file2, &stb2);
207 
208 	if (flags & D_FORCEASCII)
209 		diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
210 	if (flags & D_IGNOREBLANKS)
211 		diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
212 	if (flags & D_PROTOTYPE)
213 		diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
214 
215 	if (diff_atomize_file(&left, cfg, f1, (uint8_t *)str1, st1.st_size, diff_flags)) {
216 		rc = D_ERROR;
217 		goto done;
218 	}
219 	if (diff_atomize_file(&right, cfg, f2, (uint8_t *)str2, st2.st_size, diff_flags)) {
220 		rc = D_ERROR;
221 		goto done;
222 	}
223 
224 	result = diff_main(cfg, &left, &right);
225 	if (result->rc != DIFF_RC_OK) {
226 		rc = D_ERROR;
227 		status |= 2;
228 		goto done;
229 	}
230 	/*
231 	 * If there wasn't an error, but we don't have any printable chunks
232 	 * then the files must match.
233 	 */
234 	if (!diff_result_contains_printable_chunks(result)) {
235 		rc = D_SAME;
236 		goto done;
237 	}
238 
239 	atomizer_flags = (result->left->atomizer_flags | result->right->atomizer_flags);
240 	rflags = (result->left->root->diff_flags | result->right->root->diff_flags);
241 	force_text = (rflags & DIFF_FLAG_FORCE_TEXT_DATA);
242 	have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
243 
244 	if (have_binary && !force_text) {
245 		rc = D_BINARY;
246 		status |= 1;
247 		goto done;
248 	}
249 
250 	if (diff_format == D_NORMAL) {
251 		rc = diff_output_plain(NULL, stdout, &info, result, false);
252 	} else if (diff_format == D_EDIT) {
253 		rc = diff_output_edscript(NULL, stdout, &info, result);
254 	} else {
255 		rc = diff_output_unidiff(NULL, stdout, &info, result,
256 		    context_lines);
257 	}
258 	if (rc != DIFF_RC_OK) {
259 		rc = D_ERROR;
260 		status |= 2;
261 	} else {
262 		rc = D_DIFFER;
263 		status |= 1;
264 	}
265 done:
266 	diff_result_free(result);
267 	diff_data_free(&left);
268 	diff_data_free(&right);
269 	if (str1)
270 		munmap(str1, st1.st_size);
271 	if (str2)
272 		munmap(str2, st2.st_size);
273 	fclose(f1);
274 	fclose(f2);
275 
276 	return rc;
277 }
278 
279 FILE *
280 openfile(const char *path, char **p, struct stat *st)
281 {
282 	FILE *f = NULL;
283 
284 	if (strcmp(path, "-") == 0)
285 		f = stdin;
286 	else
287 		f = fopen(path, "r");
288 
289 	if (f == NULL)
290 		err(2, "%s", path);
291 
292 	if (fstat(fileno(f), st) == -1)
293 		err(2, "%s", path);
294 
295 #ifndef DIFF_NO_MMAP
296 	*p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0);
297 	if (*p == MAP_FAILED)
298 #endif
299 		*p = NULL; /* fall back on file I/O */
300 
301 	return f;
302 }
303 
304 bool
305 can_libdiff(int flags)
306 {
307 	/* We can't use fifos with libdiff yet */
308 	if (S_ISFIFO(stb1.st_mode) || S_ISFIFO(stb2.st_mode))
309 		return false;
310 
311 	/* Is this one of the supported input/output modes for diffreg_new? */
312 	if ((flags == 0 || !(flags & ~D_NEWALGO_FLAGS)) &&
313 		ignore_pats == NULL && (
314 		diff_format == D_NORMAL ||
315 #if 0
316 		diff_format == D_EDIT ||
317 #endif
318 		diff_format == D_UNIFIED) &&
319 		(diff_algorithm == D_DIFFMYERS || diff_algorithm == D_DIFFPATIENCE)) {
320 		return true;
321 	}
322 
323 	/* Fallback to using stone. */
324 	return false;
325 }
326