1 /* 2 * Copyright (c) 2018 Martin Pieuchot 3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/types.h> 19 #include <sys/capsicum.h> 20 #ifndef DIFF_NO_MMAP 21 #include <sys/mman.h> 22 #endif 23 #include <sys/stat.h> 24 25 #include <capsicum_helpers.h> 26 #include <err.h> 27 #include <fcntl.h> 28 #include <stdbool.h> 29 #include <stdint.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <time.h> 34 #include <unistd.h> 35 36 #include "diff.h" 37 #include <arraylist.h> 38 #include <diff_main.h> 39 #include <diff_output.h> 40 41 const char *format_label(const char *, struct stat *); 42 43 enum diffreg_algo { 44 DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0, 45 DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1, 46 DIFFREG_ALGO_PATIENCE = 2, 47 DIFFREG_ALGO_NONE = 3, 48 }; 49 50 int diffreg_new(char *, char *, int, int); 51 FILE * openfile(const char *, char **, struct stat *); 52 53 static const struct diff_algo_config myers_then_patience; 54 static const struct diff_algo_config myers_then_myers_divide; 55 static const struct diff_algo_config patience; 56 static const struct diff_algo_config myers_divide; 57 58 static const struct diff_algo_config myers_then_patience = (struct diff_algo_config){ 59 .impl = diff_algo_myers, 60 .permitted_state_size = 1024 * 1024 * sizeof(int), 61 .fallback_algo = &patience, 62 }; 63 64 static const struct diff_algo_config myers_then_myers_divide = 65 (struct diff_algo_config){ 66 .impl = diff_algo_myers, 67 .permitted_state_size = 1024 * 1024 * sizeof(int), 68 .fallback_algo = &myers_divide, 69 }; 70 71 static const struct diff_algo_config patience = (struct diff_algo_config){ 72 .impl = diff_algo_patience, 73 /* After subdivision, do Patience again: */ 74 .inner_algo = &patience, 75 /* If subdivision failed, do Myers Divide et Impera: */ 76 .fallback_algo = &myers_then_myers_divide, 77 }; 78 79 static const struct diff_algo_config myers_divide = (struct diff_algo_config){ 80 .impl = diff_algo_myers_divide, 81 /* When division succeeded, start from the top: */ 82 .inner_algo = &myers_then_myers_divide, 83 /* (fallback_algo = NULL implies diff_algo_none). */ 84 }; 85 86 static const struct diff_algo_config no_algo = (struct diff_algo_config){ 87 .impl = diff_algo_none, 88 }; 89 90 /* If the state for a forward-Myers is small enough, use Myers, otherwise first 91 * do a Myers-divide. */ 92 static const struct diff_config diff_config_myers_then_myers_divide = { 93 .atomize_func = diff_atomize_text_by_line, 94 .algo = &myers_then_myers_divide, 95 }; 96 97 /* If the state for a forward-Myers is small enough, use Myers, otherwise first 98 * do a Patience. */ 99 static const struct diff_config diff_config_myers_then_patience = { 100 .atomize_func = diff_atomize_text_by_line, 101 .algo = &myers_then_patience, 102 }; 103 104 /* Directly force Patience as a first divider of the source file. */ 105 static const struct diff_config diff_config_patience = { 106 .atomize_func = diff_atomize_text_by_line, 107 .algo = &patience, 108 }; 109 110 /* Directly force Patience as a first divider of the source file. */ 111 static const struct diff_config diff_config_no_algo = { 112 .atomize_func = diff_atomize_text_by_line, 113 }; 114 115 const char * 116 format_label(const char *oldlabel, struct stat *stb) 117 { 118 const char *time_format = "%Y-%m-%d %H:%M:%S"; 119 char *newlabel; 120 char buf[256]; 121 char end[10]; 122 struct tm tm, *tm_ptr; 123 int nsec = stb->st_mtim.tv_nsec; 124 size_t newlabellen, timelen, endlen; 125 tm_ptr = localtime_r(&stb->st_mtime, &tm); 126 127 timelen = strftime(buf, 256, time_format, tm_ptr); 128 endlen = strftime(end, 10, "%z", tm_ptr); 129 130 /* 131 * The new label is the length of the time, old label, timezone, 132 * 9 characters for nanoseconds, and 4 characters for a period 133 * and for formatting. 134 */ 135 newlabellen = timelen + strlen(oldlabel) + endlen + 9 + 4; 136 newlabel = calloc(newlabellen, sizeof(char)); 137 138 snprintf(newlabel, newlabellen ,"%s\t%s.%.9d %s\n", 139 oldlabel, buf, nsec, end); 140 141 return newlabel; 142 } 143 144 int 145 diffreg_new(char *file1, char *file2, int flags, int capsicum) 146 { 147 char *str1, *str2; 148 FILE *f1, *f2; 149 struct stat st1, st2; 150 struct diff_input_info info; 151 struct diff_data left = {}, right = {}; 152 struct diff_result *result = NULL; 153 bool force_text, have_binary; 154 int rc, atomizer_flags, rflags, diff_flags = 0; 155 int context_lines = diff_context; 156 const struct diff_config *cfg; 157 enum diffreg_algo algo; 158 cap_rights_t rights_ro; 159 160 algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE; 161 162 switch (algo) { 163 default: 164 case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE: 165 cfg = &diff_config_myers_then_myers_divide; 166 break; 167 case DIFFREG_ALGO_MYERS_THEN_PATIENCE: 168 cfg = &diff_config_myers_then_patience; 169 break; 170 case DIFFREG_ALGO_PATIENCE: 171 cfg = &diff_config_patience; 172 break; 173 case DIFFREG_ALGO_NONE: 174 cfg = &diff_config_no_algo; 175 break; 176 } 177 178 f1 = openfile(file1, &str1, &st1); 179 f2 = openfile(file2, &str2, &st2); 180 181 if (capsicum) { 182 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); 183 if (caph_rights_limit(fileno(f1), &rights_ro) < 0) 184 err(2, "unable to limit rights on: %s", file1); 185 if (caph_rights_limit(fileno(f2), &rights_ro) < 0) 186 err(2, "unable to limit rights on: %s", file2); 187 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) { 188 /* stdin has already been limited */ 189 if (caph_limit_stderr() == -1) 190 err(2, "unable to limit stderr"); 191 if (caph_limit_stdout() == -1) 192 err(2, "unable to limit stdout"); 193 } else if (caph_limit_stdio() == -1) 194 err(2, "unable to limit stdio"); 195 caph_cache_catpages(); 196 caph_cache_tzdata(); 197 if (caph_enter() < 0) 198 err(2, "unable to enter capability mode"); 199 } 200 /* 201 * If we have been given a label use that for the paths, if not format 202 * the path with the files modification time. 203 */ 204 info.flags = 0; 205 info.left_path = (label[0] != NULL) ? 206 label[0] : format_label(file1, &stb1); 207 info.right_path = (label[1] != NULL) ? 208 label[1] : format_label(file2, &stb2); 209 210 if (flags & D_FORCEASCII) 211 diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA; 212 if (flags & D_IGNOREBLANKS) 213 diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE; 214 if (flags & D_PROTOTYPE) 215 diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES; 216 217 if (diff_atomize_file(&left, cfg, f1, (uint8_t *)str1, st1.st_size, diff_flags)) { 218 rc = D_ERROR; 219 goto done; 220 } 221 if (diff_atomize_file(&right, cfg, f2, (uint8_t *)str2, st2.st_size, diff_flags)) { 222 rc = D_ERROR; 223 goto done; 224 } 225 226 result = diff_main(cfg, &left, &right); 227 if (result->rc != DIFF_RC_OK) { 228 rc = D_ERROR; 229 status |= 2; 230 goto done; 231 } 232 /* 233 * If there wasn't an error, but we don't have any printable chunks 234 * then the files must match. 235 */ 236 if (!diff_result_contains_printable_chunks(result)) { 237 rc = D_SAME; 238 goto done; 239 } 240 241 atomizer_flags = (result->left->atomizer_flags | result->right->atomizer_flags); 242 rflags = (result->left->root->diff_flags | result->right->root->diff_flags); 243 force_text = (rflags & DIFF_FLAG_FORCE_TEXT_DATA); 244 have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA); 245 246 if (have_binary && !force_text) { 247 rc = D_BINARY; 248 status |= 1; 249 goto done; 250 } 251 252 if (diff_format == D_NORMAL) { 253 rc = diff_output_plain(NULL, stdout, &info, result, false); 254 } else if (diff_format == D_EDIT) { 255 rc = diff_output_edscript(NULL, stdout, &info, result); 256 } else { 257 rc = diff_output_unidiff(NULL, stdout, &info, result, 258 context_lines); 259 } 260 if (rc != DIFF_RC_OK) { 261 rc = D_ERROR; 262 status |= 2; 263 } else { 264 rc = D_DIFFER; 265 status |= 1; 266 } 267 done: 268 diff_result_free(result); 269 diff_data_free(&left); 270 diff_data_free(&right); 271 #ifndef DIFF_NO_MMAP 272 if (str1) 273 munmap(str1, st1.st_size); 274 if (str2) 275 munmap(str2, st2.st_size); 276 #endif 277 fclose(f1); 278 fclose(f2); 279 280 return rc; 281 } 282 283 FILE * 284 openfile(const char *path, char **p, struct stat *st) 285 { 286 FILE *f = NULL; 287 288 if (strcmp(path, "-") == 0) 289 f = stdin; 290 else 291 f = fopen(path, "r"); 292 293 if (f == NULL) 294 err(2, "%s", path); 295 296 if (fstat(fileno(f), st) == -1) 297 err(2, "%s", path); 298 299 #ifndef DIFF_NO_MMAP 300 *p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0); 301 if (*p == MAP_FAILED) 302 #endif 303 *p = NULL; /* fall back on file I/O */ 304 305 return f; 306 } 307 308 bool 309 can_libdiff(int flags) 310 { 311 /* We can't use fifos with libdiff yet */ 312 if (S_ISFIFO(stb1.st_mode) || S_ISFIFO(stb2.st_mode)) 313 return false; 314 315 /* Is this one of the supported input/output modes for diffreg_new? */ 316 if ((flags == 0 || !(flags & ~D_NEWALGO_FLAGS)) && 317 ignore_pats == NULL && ( 318 diff_format == D_NORMAL || 319 #if 0 320 diff_format == D_EDIT || 321 #endif 322 diff_format == D_UNIFIED) && 323 (diff_algorithm == D_DIFFMYERS || diff_algorithm == D_DIFFPATIENCE)) { 324 return true; 325 } 326 327 /* Fallback to using stone. */ 328 return false; 329 } 330