1 /*
2 * Copyright (c) 2018 Martin Pieuchot
3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18 #include <sys/types.h>
19 #include <sys/capsicum.h>
20 #ifndef DIFF_NO_MMAP
21 #include <sys/mman.h>
22 #endif
23 #include <sys/stat.h>
24
25 #include <capsicum_helpers.h>
26 #include <err.h>
27 #include <fcntl.h>
28 #include <stdbool.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <time.h>
34 #include <unistd.h>
35
36 #include "diff.h"
37 #include <arraylist.h>
38 #include <diff_main.h>
39 #include <diff_output.h>
40
41 const char *format_label(const char *, struct stat *);
42
43 enum diffreg_algo {
44 DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0,
45 DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1,
46 DIFFREG_ALGO_PATIENCE = 2,
47 DIFFREG_ALGO_NONE = 3,
48 };
49
50 int diffreg_new(char *, char *, int, int);
51 FILE * openfile(const char *, char **, struct stat *);
52
53 static const struct diff_algo_config myers_then_patience;
54 static const struct diff_algo_config myers_then_myers_divide;
55 static const struct diff_algo_config patience;
56 static const struct diff_algo_config myers_divide;
57
58 static const struct diff_algo_config myers_then_patience = (struct diff_algo_config){
59 .impl = diff_algo_myers,
60 .permitted_state_size = 1024 * 1024 * sizeof(int),
61 .fallback_algo = &patience,
62 };
63
64 static const struct diff_algo_config myers_then_myers_divide =
65 (struct diff_algo_config){
66 .impl = diff_algo_myers,
67 .permitted_state_size = 1024 * 1024 * sizeof(int),
68 .fallback_algo = &myers_divide,
69 };
70
71 static const struct diff_algo_config patience = (struct diff_algo_config){
72 .impl = diff_algo_patience,
73 /* After subdivision, do Patience again: */
74 .inner_algo = &patience,
75 /* If subdivision failed, do Myers Divide et Impera: */
76 .fallback_algo = &myers_then_myers_divide,
77 };
78
79 static const struct diff_algo_config myers_divide = (struct diff_algo_config){
80 .impl = diff_algo_myers_divide,
81 /* When division succeeded, start from the top: */
82 .inner_algo = &myers_then_myers_divide,
83 /* (fallback_algo = NULL implies diff_algo_none). */
84 };
85
86 static const struct diff_algo_config no_algo = (struct diff_algo_config){
87 .impl = diff_algo_none,
88 };
89
90 /* If the state for a forward-Myers is small enough, use Myers, otherwise first
91 * do a Myers-divide. */
92 static const struct diff_config diff_config_myers_then_myers_divide = {
93 .atomize_func = diff_atomize_text_by_line,
94 .algo = &myers_then_myers_divide,
95 };
96
97 /* If the state for a forward-Myers is small enough, use Myers, otherwise first
98 * do a Patience. */
99 static const struct diff_config diff_config_myers_then_patience = {
100 .atomize_func = diff_atomize_text_by_line,
101 .algo = &myers_then_patience,
102 };
103
104 /* Directly force Patience as a first divider of the source file. */
105 static const struct diff_config diff_config_patience = {
106 .atomize_func = diff_atomize_text_by_line,
107 .algo = &patience,
108 };
109
110 /* Directly force Patience as a first divider of the source file. */
111 static const struct diff_config diff_config_no_algo = {
112 .atomize_func = diff_atomize_text_by_line,
113 };
114
115 const char *
format_label(const char * oldlabel,struct stat * stb)116 format_label(const char *oldlabel, struct stat *stb)
117 {
118 const char *time_format = "%Y-%m-%d %H:%M:%S";
119 char *newlabel;
120 char buf[256];
121 char end[10];
122 struct tm tm, *tm_ptr;
123 int nsec = stb->st_mtim.tv_nsec;
124 size_t newlabellen, timelen, endlen;
125 tm_ptr = localtime_r(&stb->st_mtime, &tm);
126
127 timelen = strftime(buf, 256, time_format, tm_ptr);
128 endlen = strftime(end, 10, "%z", tm_ptr);
129
130 /*
131 * The new label is the length of the time, old label, timezone,
132 * 9 characters for nanoseconds, and 4 characters for a period
133 * and for formatting.
134 */
135 newlabellen = timelen + strlen(oldlabel) + endlen + 9 + 4;
136 newlabel = calloc(newlabellen, sizeof(char));
137
138 snprintf(newlabel, newlabellen ,"%s\t%s.%.9d %s\n",
139 oldlabel, buf, nsec, end);
140
141 return newlabel;
142 }
143
144 int
diffreg_new(char * file1,char * file2,int flags,int capsicum)145 diffreg_new(char *file1, char *file2, int flags, int capsicum)
146 {
147 char *str1, *str2;
148 FILE *f1, *f2;
149 struct stat st1, st2;
150 struct diff_input_info info;
151 struct diff_data left = {}, right = {};
152 struct diff_result *result = NULL;
153 bool force_text, have_binary;
154 int rc, atomizer_flags, rflags, diff_flags = 0;
155 int context_lines = diff_context;
156 const struct diff_config *cfg;
157 enum diffreg_algo algo;
158 cap_rights_t rights_ro;
159
160 algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE;
161
162 switch (algo) {
163 default:
164 case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE:
165 cfg = &diff_config_myers_then_myers_divide;
166 break;
167 case DIFFREG_ALGO_MYERS_THEN_PATIENCE:
168 cfg = &diff_config_myers_then_patience;
169 break;
170 case DIFFREG_ALGO_PATIENCE:
171 cfg = &diff_config_patience;
172 break;
173 case DIFFREG_ALGO_NONE:
174 cfg = &diff_config_no_algo;
175 break;
176 }
177
178 f1 = openfile(file1, &str1, &st1);
179 f2 = openfile(file2, &str2, &st2);
180
181 if (capsicum) {
182 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
183 if (caph_rights_limit(fileno(f1), &rights_ro) < 0)
184 err(2, "unable to limit rights on: %s", file1);
185 if (caph_rights_limit(fileno(f2), &rights_ro) < 0)
186 err(2, "unable to limit rights on: %s", file2);
187 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
188 /* stdin has already been limited */
189 if (caph_limit_stderr() == -1)
190 err(2, "unable to limit stderr");
191 if (caph_limit_stdout() == -1)
192 err(2, "unable to limit stdout");
193 } else if (caph_limit_stdio() == -1)
194 err(2, "unable to limit stdio");
195 caph_cache_catpages();
196 caph_cache_tzdata();
197 if (caph_enter() < 0)
198 err(2, "unable to enter capability mode");
199 }
200 /*
201 * If we have been given a label use that for the paths, if not format
202 * the path with the files modification time.
203 */
204 info.flags = 0;
205 info.left_path = (label[0] != NULL) ?
206 label[0] : format_label(file1, &stb1);
207 info.right_path = (label[1] != NULL) ?
208 label[1] : format_label(file2, &stb2);
209
210 if (flags & D_FORCEASCII)
211 diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
212 if (flags & D_IGNOREBLANKS)
213 diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
214 if (flags & D_PROTOTYPE)
215 diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
216
217 if (diff_atomize_file(&left, cfg, f1, (uint8_t *)str1, st1.st_size, diff_flags)) {
218 rc = D_ERROR;
219 goto done;
220 }
221 if (left.atomizer_flags & DIFF_ATOMIZER_FILE_TRUNCATED)
222 warnx("%s truncated", file1);
223 if (diff_atomize_file(&right, cfg, f2, (uint8_t *)str2, st2.st_size, diff_flags)) {
224 rc = D_ERROR;
225 goto done;
226 }
227 if (right.atomizer_flags & DIFF_ATOMIZER_FILE_TRUNCATED)
228 warnx("%s truncated", file2);
229
230 result = diff_main(cfg, &left, &right);
231 if (result->rc != DIFF_RC_OK) {
232 rc = D_ERROR;
233 status |= 2;
234 goto done;
235 }
236 /*
237 * If there wasn't an error, but we don't have any printable chunks
238 * then the files must match.
239 */
240 if (!diff_result_contains_printable_chunks(result)) {
241 rc = D_SAME;
242 goto done;
243 }
244
245 atomizer_flags = (result->left->atomizer_flags | result->right->atomizer_flags);
246 rflags = (result->left->root->diff_flags | result->right->root->diff_flags);
247 force_text = (rflags & DIFF_FLAG_FORCE_TEXT_DATA);
248 have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
249
250 if (have_binary && !force_text) {
251 rc = D_BINARY;
252 status |= 1;
253 goto done;
254 }
255
256 if (color)
257 diff_output_set_colors(color, del_code, add_code);
258 if (diff_format == D_NORMAL) {
259 rc = diff_output_plain(NULL, stdout, &info, result, false);
260 } else if (diff_format == D_EDIT) {
261 rc = diff_output_edscript(NULL, stdout, &info, result);
262 } else {
263 rc = diff_output_unidiff(NULL, stdout, &info, result,
264 context_lines);
265 }
266 if (rc != DIFF_RC_OK) {
267 rc = D_ERROR;
268 status |= 2;
269 } else {
270 rc = D_DIFFER;
271 status |= 1;
272 }
273 done:
274 diff_result_free(result);
275 diff_data_free(&left);
276 diff_data_free(&right);
277 #ifndef DIFF_NO_MMAP
278 if (str1)
279 munmap(str1, st1.st_size);
280 if (str2)
281 munmap(str2, st2.st_size);
282 #endif
283 fclose(f1);
284 fclose(f2);
285
286 return rc;
287 }
288
289 FILE *
openfile(const char * path,char ** p,struct stat * st)290 openfile(const char *path, char **p, struct stat *st)
291 {
292 FILE *f = NULL;
293
294 if (strcmp(path, "-") == 0)
295 f = stdin;
296 else
297 f = fopen(path, "r");
298
299 if (f == NULL)
300 err(2, "%s", path);
301
302 if (fstat(fileno(f), st) == -1)
303 err(2, "%s", path);
304
305 #ifndef DIFF_NO_MMAP
306 *p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0);
307 if (*p == MAP_FAILED)
308 #endif
309 *p = NULL; /* fall back on file I/O */
310
311 return f;
312 }
313
314 bool
can_libdiff(int flags)315 can_libdiff(int flags)
316 {
317 /* We can't use fifos with libdiff yet */
318 if (S_ISFIFO(stb1.st_mode) || S_ISFIFO(stb2.st_mode))
319 return false;
320
321 /* Is this one of the supported input/output modes for diffreg_new? */
322 if ((flags == 0 || !(flags & ~D_NEWALGO_FLAGS)) &&
323 ignore_pats == NULL && (
324 diff_format == D_NORMAL ||
325 #if 0
326 diff_format == D_EDIT ||
327 #endif
328 diff_format == D_UNIFIED) &&
329 (diff_algorithm == D_DIFFMYERS || diff_algorithm == D_DIFFPATIENCE)) {
330 return true;
331 }
332
333 /* Fallback to using stone. */
334 return false;
335 }
336