1 /*
2 * Copyright (c) 2018 Martin Pieuchot
3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18 #include <sys/types.h>
19 #include <sys/capsicum.h>
20 #ifndef DIFF_NO_MMAP
21 #include <sys/mman.h>
22 #endif
23 #include <sys/stat.h>
24
25 #include <capsicum_helpers.h>
26 #include <err.h>
27 #include <fcntl.h>
28 #include <stdbool.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <time.h>
34 #include <unistd.h>
35
36 #include "pr.h"
37 #include "diff.h"
38 #include <arraylist.h>
39 #include <diff_main.h>
40 #include <diff_output.h>
41
42 const char *format_label(const char *, struct stat *);
43
44 enum diffreg_algo {
45 DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0,
46 DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1,
47 DIFFREG_ALGO_PATIENCE = 2,
48 DIFFREG_ALGO_NONE = 3,
49 };
50
51 int diffreg_new(char *, char *, int, int);
52 FILE * openfile(const char *, char **, struct stat *);
53
54 static const struct diff_algo_config myers_then_patience;
55 static const struct diff_algo_config myers_then_myers_divide;
56 static const struct diff_algo_config patience;
57 static const struct diff_algo_config myers_divide;
58
59 static const struct diff_algo_config myers_then_patience = (struct diff_algo_config){
60 .impl = diff_algo_myers,
61 .permitted_state_size = 1024 * 1024 * sizeof(int),
62 .fallback_algo = &patience,
63 };
64
65 static const struct diff_algo_config myers_then_myers_divide =
66 (struct diff_algo_config){
67 .impl = diff_algo_myers,
68 .permitted_state_size = 1024 * 1024 * sizeof(int),
69 .fallback_algo = &myers_divide,
70 };
71
72 static const struct diff_algo_config patience = (struct diff_algo_config){
73 .impl = diff_algo_patience,
74 /* After subdivision, do Patience again: */
75 .inner_algo = &patience,
76 /* If subdivision failed, do Myers Divide et Impera: */
77 .fallback_algo = &myers_then_myers_divide,
78 };
79
80 static const struct diff_algo_config myers_divide = (struct diff_algo_config){
81 .impl = diff_algo_myers_divide,
82 /* When division succeeded, start from the top: */
83 .inner_algo = &myers_then_myers_divide,
84 /* (fallback_algo = NULL implies diff_algo_none). */
85 };
86
87 static const struct diff_algo_config no_algo = (struct diff_algo_config){
88 .impl = diff_algo_none,
89 };
90
91 /* If the state for a forward-Myers is small enough, use Myers, otherwise first
92 * do a Myers-divide. */
93 static const struct diff_config diff_config_myers_then_myers_divide = {
94 .atomize_func = diff_atomize_text_by_line,
95 .algo = &myers_then_myers_divide,
96 };
97
98 /* If the state for a forward-Myers is small enough, use Myers, otherwise first
99 * do a Patience. */
100 static const struct diff_config diff_config_myers_then_patience = {
101 .atomize_func = diff_atomize_text_by_line,
102 .algo = &myers_then_patience,
103 };
104
105 /* Directly force Patience as a first divider of the source file. */
106 static const struct diff_config diff_config_patience = {
107 .atomize_func = diff_atomize_text_by_line,
108 .algo = &patience,
109 };
110
111 /* Directly force Patience as a first divider of the source file. */
112 static const struct diff_config diff_config_no_algo = {
113 .atomize_func = diff_atomize_text_by_line,
114 };
115
116 const char *
format_label(const char * oldlabel,struct stat * stb)117 format_label(const char *oldlabel, struct stat *stb)
118 {
119 const char *time_format = "%Y-%m-%d %H:%M:%S";
120 char *newlabel;
121 char buf[256];
122 char end[10];
123 struct tm tm, *tm_ptr;
124 int nsec = stb->st_mtim.tv_nsec;
125 size_t newlabellen, timelen, endlen;
126 tm_ptr = localtime_r(&stb->st_mtime, &tm);
127
128 timelen = strftime(buf, 256, time_format, tm_ptr);
129 endlen = strftime(end, 10, "%z", tm_ptr);
130
131 /*
132 * The new label is the length of the time, old label, timezone,
133 * 9 characters for nanoseconds, and 4 characters for a period
134 * and for formatting.
135 */
136 newlabellen = timelen + strlen(oldlabel) + endlen + 9 + 4;
137 newlabel = calloc(newlabellen, sizeof(char));
138
139 snprintf(newlabel, newlabellen ,"%s\t%s.%.9d %s\n",
140 oldlabel, buf, nsec, end);
141
142 return newlabel;
143 }
144
145 int
diffreg_new(char * file1,char * file2,int flags,int capsicum)146 diffreg_new(char *file1, char *file2, int flags, int capsicum)
147 {
148 char *str1, *str2;
149 FILE *f1, *f2;
150 struct pr *pr = NULL;
151 struct stat st1, st2;
152 struct diff_input_info info;
153 struct diff_data left = {}, right = {};
154 struct diff_result *result = NULL;
155 bool force_text, have_binary;
156 int rc, atomizer_flags, rflags, diff_flags = 0;
157 int context_lines = diff_context;
158 const struct diff_config *cfg;
159 enum diffreg_algo algo;
160 cap_rights_t rights_ro;
161 int ret;
162
163 algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE;
164
165 switch (algo) {
166 default:
167 case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE:
168 cfg = &diff_config_myers_then_myers_divide;
169 break;
170 case DIFFREG_ALGO_MYERS_THEN_PATIENCE:
171 cfg = &diff_config_myers_then_patience;
172 break;
173 case DIFFREG_ALGO_PATIENCE:
174 cfg = &diff_config_patience;
175 break;
176 case DIFFREG_ALGO_NONE:
177 cfg = &diff_config_no_algo;
178 break;
179 }
180
181 f1 = openfile(file1, &str1, &st1);
182 f2 = openfile(file2, &str2, &st2);
183
184 if (flags & D_PAGINATION)
185 pr = start_pr(file1, file2);
186
187 if (capsicum) {
188 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
189 if (caph_rights_limit(fileno(f1), &rights_ro) < 0)
190 err(2, "unable to limit rights on: %s", file1);
191 if (caph_rights_limit(fileno(f2), &rights_ro) < 0)
192 err(2, "unable to limit rights on: %s", file2);
193 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
194 /* stdin has already been limited */
195 if (caph_limit_stderr() == -1)
196 err(2, "unable to limit stderr");
197 if (caph_limit_stdout() == -1)
198 err(2, "unable to limit stdout");
199 } else if (caph_limit_stdio() == -1)
200 err(2, "unable to limit stdio");
201 caph_cache_catpages();
202 caph_cache_tzdata();
203 if (caph_enter() < 0)
204 err(2, "unable to enter capability mode");
205 }
206 /*
207 * If we have been given a label use that for the paths, if not format
208 * the path with the files modification time.
209 */
210 info.flags = 0;
211 info.left_path = (label[0] != NULL) ?
212 label[0] : format_label(file1, &stb1);
213 info.right_path = (label[1] != NULL) ?
214 label[1] : format_label(file2, &stb2);
215
216 if (flags & D_FORCEASCII)
217 diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
218 if (flags & D_IGNOREBLANKS)
219 diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
220 if (flags & D_PROTOTYPE)
221 diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
222
223 ret = diff_atomize_file(&left, cfg, f1, (uint8_t *)str1, st1.st_size,
224 diff_flags);
225 if (ret != DIFF_RC_OK) {
226 warnc(ret, "%s", file1);
227 rc = D_ERROR;
228 status |= 2;
229 goto done;
230 }
231 ret = diff_atomize_file(&right, cfg, f2, (uint8_t *)str2, st2.st_size,
232 diff_flags);
233 if (ret != DIFF_RC_OK) {
234 warnc(ret, "%s", file2);
235 rc = D_ERROR;
236 status |= 2;
237 goto done;
238 }
239
240 result = diff_main(cfg, &left, &right);
241 if (result->rc != DIFF_RC_OK) {
242 rc = D_ERROR;
243 status |= 2;
244 goto done;
245 }
246 /*
247 * If there wasn't an error, but we don't have any printable chunks
248 * then the files must match.
249 */
250 if (!diff_result_contains_printable_chunks(result)) {
251 rc = D_SAME;
252 goto done;
253 }
254
255 atomizer_flags = (result->left->atomizer_flags | result->right->atomizer_flags);
256 rflags = (result->left->root->diff_flags | result->right->root->diff_flags);
257 force_text = (rflags & DIFF_FLAG_FORCE_TEXT_DATA);
258 have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
259
260 if (have_binary && !force_text) {
261 rc = D_BINARY;
262 status |= 1;
263 goto done;
264 }
265
266 if (color)
267 diff_output_set_colors(color, del_code, add_code);
268 if (diff_format == D_NORMAL) {
269 rc = diff_output_plain(NULL, stdout, &info, result, false);
270 } else if (diff_format == D_EDIT) {
271 rc = diff_output_edscript(NULL, stdout, &info, result);
272 } else {
273 rc = diff_output_unidiff(NULL, stdout, &info, result,
274 context_lines);
275 }
276 if (rc != DIFF_RC_OK) {
277 rc = D_ERROR;
278 status |= 2;
279 } else {
280 rc = D_DIFFER;
281 status |= 1;
282 }
283 done:
284 if (pr != NULL)
285 stop_pr(pr);
286 diff_result_free(result);
287 diff_data_free(&left);
288 diff_data_free(&right);
289 #ifndef DIFF_NO_MMAP
290 if (str1)
291 munmap(str1, st1.st_size);
292 if (str2)
293 munmap(str2, st2.st_size);
294 #endif
295 fclose(f1);
296 fclose(f2);
297
298 return rc;
299 }
300
301 FILE *
openfile(const char * path,char ** p,struct stat * st)302 openfile(const char *path, char **p, struct stat *st)
303 {
304 FILE *f = NULL;
305
306 if (strcmp(path, "-") == 0)
307 f = stdin;
308 else
309 f = fopen(path, "r");
310
311 if (f == NULL)
312 err(2, "%s", path);
313
314 if (fstat(fileno(f), st) == -1)
315 err(2, "%s", path);
316
317 #ifndef DIFF_NO_MMAP
318 *p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0);
319 if (*p == MAP_FAILED)
320 #endif
321 *p = NULL; /* fall back on file I/O */
322
323 return f;
324 }
325
326 bool
can_libdiff(int flags)327 can_libdiff(int flags)
328 {
329 /* libdiff's atomizer can only deal with files */
330 if (!S_ISREG(stb1.st_mode) || !S_ISREG(stb2.st_mode))
331 return false;
332
333 /* Is this one of the supported input/output modes for diffreg_new? */
334 if ((flags == 0 || !(flags & ~D_NEWALGO_FLAGS)) &&
335 ignore_pats == NULL && (
336 diff_format == D_NORMAL ||
337 #if 0
338 diff_format == D_EDIT ||
339 #endif
340 diff_format == D_UNIFIED) &&
341 (diff_algorithm == D_DIFFMYERS || diff_algorithm == D_DIFFPATIENCE)) {
342 return true;
343 }
344
345 /* Fallback to using stone. */
346 return false;
347 }
348