1 /* 2 * Copyright (c) 2018 Martin Pieuchot 3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/types.h> 19 #include <sys/capsicum.h> 20 #ifndef DIFF_NO_MMAP 21 #include <sys/mman.h> 22 #endif 23 #include <sys/stat.h> 24 25 #include <capsicum_helpers.h> 26 #include <err.h> 27 #include <fcntl.h> 28 #include <stdbool.h> 29 #include <stdint.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <time.h> 34 #include <unistd.h> 35 36 #include "pr.h" 37 #include "diff.h" 38 #include <arraylist.h> 39 #include <diff_main.h> 40 #include <diff_output.h> 41 42 const char *format_label(const char *, struct stat *); 43 44 enum diffreg_algo { 45 DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0, 46 DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1, 47 DIFFREG_ALGO_PATIENCE = 2, 48 DIFFREG_ALGO_NONE = 3, 49 }; 50 51 int diffreg_new(char *, char *, int, int); 52 FILE * openfile(const char *, char **, struct stat *); 53 54 static const struct diff_algo_config myers_then_patience; 55 static const struct diff_algo_config myers_then_myers_divide; 56 static const struct diff_algo_config patience; 57 static const struct diff_algo_config myers_divide; 58 59 static const struct diff_algo_config myers_then_patience = { 60 .impl = diff_algo_myers, 61 .permitted_state_size = 1024 * 1024 * sizeof(int), 62 .fallback_algo = &patience, 63 }; 64 65 static const struct diff_algo_config myers_then_myers_divide = 66 (struct diff_algo_config){ 67 .impl = diff_algo_myers, 68 .permitted_state_size = 1024 * 1024 * sizeof(int), 69 .fallback_algo = &myers_divide, 70 }; 71 72 static const struct diff_algo_config patience = { 73 .impl = diff_algo_patience, 74 /* After subdivision, do Patience again: */ 75 .inner_algo = &patience, 76 /* If subdivision failed, do Myers Divide et Impera: */ 77 .fallback_algo = &myers_then_myers_divide, 78 }; 79 80 static const struct diff_algo_config myers_divide = { 81 .impl = diff_algo_myers_divide, 82 /* When division succeeded, start from the top: */ 83 .inner_algo = &myers_then_myers_divide, 84 /* (fallback_algo = NULL implies diff_algo_none). */ 85 }; 86 87 static const struct diff_algo_config none = { 88 .impl = diff_algo_none, 89 }; 90 91 /* If the state for a forward-Myers is small enough, use Myers, otherwise first 92 * do a Myers-divide. */ 93 static const struct diff_config diff_config_myers_then_myers_divide = { 94 .atomize_func = diff_atomize_text_by_line, 95 .algo = &myers_then_myers_divide, 96 }; 97 98 /* If the state for a forward-Myers is small enough, use Myers, otherwise first 99 * do a Patience. */ 100 static const struct diff_config diff_config_myers_then_patience = { 101 .atomize_func = diff_atomize_text_by_line, 102 .algo = &myers_then_patience, 103 }; 104 105 /* Directly force Patience as a first divider of the source file. */ 106 static const struct diff_config diff_config_patience = { 107 .atomize_func = diff_atomize_text_by_line, 108 .algo = &patience, 109 }; 110 111 /* Directly force Patience as a first divider of the source file. */ 112 static const struct diff_config diff_config_none = { 113 .atomize_func = diff_atomize_text_by_line, 114 .algo = &none, 115 }; 116 117 const char * 118 format_label(const char *oldlabel, struct stat *stb) 119 { 120 const char *time_format = "%Y-%m-%d %H:%M:%S"; 121 char *newlabel; 122 char buf[256]; 123 char end[10]; 124 struct tm tm, *tm_ptr; 125 int nsec = stb->st_mtim.tv_nsec; 126 size_t newlabellen, timelen, endlen; 127 tm_ptr = localtime_r(&stb->st_mtime, &tm); 128 129 timelen = strftime(buf, 256, time_format, tm_ptr); 130 endlen = strftime(end, 10, "%z", tm_ptr); 131 132 /* 133 * The new label is the length of the time, old label, timezone, 134 * 9 characters for nanoseconds, and 4 characters for a period 135 * and for formatting. 136 */ 137 newlabellen = timelen + strlen(oldlabel) + endlen + 9 + 4; 138 newlabel = calloc(newlabellen, sizeof(char)); 139 140 snprintf(newlabel, newlabellen ,"%s\t%s.%.9d %s\n", 141 oldlabel, buf, nsec, end); 142 143 return newlabel; 144 } 145 146 int 147 diffreg_new(char *file1, char *file2, int flags, int capsicum) 148 { 149 char *str1, *str2; 150 FILE *f1, *f2; 151 struct pr *pr = NULL; 152 struct stat st1, st2; 153 struct diff_input_info info; 154 struct diff_data left = {}, right = {}; 155 struct diff_result *result = NULL; 156 bool force_text, have_binary; 157 int rc, atomizer_flags, rflags, diff_flags = 0; 158 int context_lines = diff_context; 159 const struct diff_config *cfg; 160 enum diffreg_algo algo; 161 cap_rights_t rights_ro; 162 int ret; 163 164 algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE; 165 166 switch (algo) { 167 default: 168 case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE: 169 cfg = &diff_config_myers_then_myers_divide; 170 break; 171 case DIFFREG_ALGO_MYERS_THEN_PATIENCE: 172 cfg = &diff_config_myers_then_patience; 173 break; 174 case DIFFREG_ALGO_PATIENCE: 175 cfg = &diff_config_patience; 176 break; 177 case DIFFREG_ALGO_NONE: 178 cfg = &diff_config_none; 179 break; 180 } 181 182 f1 = openfile(file1, &str1, &st1); 183 f2 = openfile(file2, &str2, &st2); 184 185 if (flags & D_PAGINATION) 186 pr = start_pr(file1, file2); 187 188 if (capsicum) { 189 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); 190 if (caph_rights_limit(fileno(f1), &rights_ro) < 0) 191 err(2, "unable to limit rights on: %s", file1); 192 if (caph_rights_limit(fileno(f2), &rights_ro) < 0) 193 err(2, "unable to limit rights on: %s", file2); 194 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) { 195 /* stdin has already been limited */ 196 if (caph_limit_stderr() == -1) 197 err(2, "unable to limit stderr"); 198 if (caph_limit_stdout() == -1) 199 err(2, "unable to limit stdout"); 200 } else if (caph_limit_stdio() == -1) 201 err(2, "unable to limit stdio"); 202 caph_cache_catpages(); 203 caph_cache_tzdata(); 204 if (caph_enter() < 0) 205 err(2, "unable to enter capability mode"); 206 } 207 /* 208 * If we have been given a label use that for the paths, if not format 209 * the path with the files modification time. 210 */ 211 info.flags = 0; 212 info.left_path = (label[0] != NULL) ? 213 label[0] : format_label(file1, &stb1); 214 info.right_path = (label[1] != NULL) ? 215 label[1] : format_label(file2, &stb2); 216 217 if (flags & D_FORCEASCII) 218 diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA; 219 if (flags & D_IGNOREBLANKS) 220 diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE; 221 if (flags & D_PROTOTYPE) 222 diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES; 223 224 ret = diff_atomize_file(&left, cfg, f1, (uint8_t *)str1, st1.st_size, 225 diff_flags); 226 if (ret != DIFF_RC_OK) { 227 warnc(ret, "%s", file1); 228 rc = D_ERROR; 229 status |= 2; 230 goto done; 231 } 232 ret = diff_atomize_file(&right, cfg, f2, (uint8_t *)str2, st2.st_size, 233 diff_flags); 234 if (ret != DIFF_RC_OK) { 235 warnc(ret, "%s", file2); 236 rc = D_ERROR; 237 status |= 2; 238 goto done; 239 } 240 241 result = diff_main(cfg, &left, &right); 242 if (result->rc != DIFF_RC_OK) { 243 rc = D_ERROR; 244 status |= 2; 245 goto done; 246 } 247 /* 248 * If there wasn't an error, but we don't have any printable chunks 249 * then the files must match. 250 */ 251 if (!diff_result_contains_printable_chunks(result)) { 252 rc = D_SAME; 253 goto done; 254 } 255 256 atomizer_flags = (result->left->atomizer_flags | result->right->atomizer_flags); 257 rflags = (result->left->root->diff_flags | result->right->root->diff_flags); 258 force_text = (rflags & DIFF_FLAG_FORCE_TEXT_DATA); 259 have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA); 260 261 if (have_binary && !force_text) { 262 rc = D_BINARY; 263 status |= 1; 264 goto done; 265 } 266 267 if (color) 268 diff_output_set_colors(color, del_code, add_code); 269 if (diff_format == D_NORMAL) { 270 rc = diff_output_plain(NULL, stdout, &info, result, false); 271 } else if (diff_format == D_EDIT) { 272 rc = diff_output_edscript(NULL, stdout, &info, result); 273 } else { 274 rc = diff_output_unidiff(NULL, stdout, &info, result, 275 context_lines); 276 } 277 if (rc != DIFF_RC_OK) { 278 rc = D_ERROR; 279 status |= 2; 280 } else { 281 rc = D_DIFFER; 282 status |= 1; 283 } 284 done: 285 if (pr != NULL) 286 stop_pr(pr); 287 diff_result_free(result); 288 diff_data_free(&left); 289 diff_data_free(&right); 290 #ifndef DIFF_NO_MMAP 291 if (str1) 292 munmap(str1, st1.st_size); 293 if (str2) 294 munmap(str2, st2.st_size); 295 #endif 296 fclose(f1); 297 fclose(f2); 298 299 return rc; 300 } 301 302 FILE * 303 openfile(const char *path, char **p, struct stat *st) 304 { 305 FILE *f = NULL; 306 307 if (strcmp(path, "-") == 0) 308 f = stdin; 309 else 310 f = fopen(path, "r"); 311 312 if (f == NULL) 313 err(2, "%s", path); 314 315 if (fstat(fileno(f), st) == -1) 316 err(2, "%s", path); 317 318 #ifndef DIFF_NO_MMAP 319 *p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0); 320 if (*p == MAP_FAILED) 321 #endif 322 *p = NULL; /* fall back on file I/O */ 323 324 return f; 325 } 326 327 bool 328 can_libdiff(int flags) 329 { 330 /* libdiff's atomizer can only deal with files */ 331 if (!S_ISREG(stb1.st_mode) || !S_ISREG(stb2.st_mode)) 332 return false; 333 334 /* Is this one of the supported input/output modes for diffreg_new? */ 335 if ((flags == 0 || !(flags & ~D_NEWALGO_FLAGS)) && 336 ignore_pats == NULL && ( 337 diff_format == D_NORMAL || 338 #if 0 339 diff_format == D_EDIT || 340 #endif 341 diff_format == D_UNIFIED) && 342 (diff_algorithm == D_DIFFMYERS || diff_algorithm == D_DIFFPATIENCE)) { 343 return true; 344 } 345 346 /* Fallback to using stone. */ 347 return false; 348 } 349