1 /* 2 * Copyright (c) 2018 Martin Pieuchot 3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/capsicum.h> 19 #include <sys/mman.h> 20 #include <sys/stat.h> 21 #include <sys/types.h> 22 23 #include <capsicum_helpers.h> 24 #include <err.h> 25 #include <fcntl.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <stdbool.h> 30 #include <time.h> 31 #include <string.h> 32 #include <unistd.h> 33 34 #include "diff.h" 35 #include <arraylist.h> 36 #include <diff_main.h> 37 #include <diff_output.h> 38 39 const char *format_label(const char *, struct stat *); 40 41 enum diffreg_algo { 42 DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0, 43 DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1, 44 DIFFREG_ALGO_PATIENCE = 2, 45 DIFFREG_ALGO_NONE = 3, 46 }; 47 48 int diffreg_new(char *, char *, int, int); 49 FILE * openfile(const char *, char **, struct stat *); 50 51 static const struct diff_algo_config myers_then_patience; 52 static const struct diff_algo_config myers_then_myers_divide; 53 static const struct diff_algo_config patience; 54 static const struct diff_algo_config myers_divide; 55 56 static const struct diff_algo_config myers_then_patience = (struct diff_algo_config){ 57 .impl = diff_algo_myers, 58 .permitted_state_size = 1024 * 1024 * sizeof(int), 59 .fallback_algo = &patience, 60 }; 61 62 static const struct diff_algo_config myers_then_myers_divide = 63 (struct diff_algo_config){ 64 .impl = diff_algo_myers, 65 .permitted_state_size = 1024 * 1024 * sizeof(int), 66 .fallback_algo = &myers_divide, 67 }; 68 69 static const struct diff_algo_config patience = (struct diff_algo_config){ 70 .impl = diff_algo_patience, 71 /* After subdivision, do Patience again: */ 72 .inner_algo = &patience, 73 /* If subdivision failed, do Myers Divide et Impera: */ 74 .fallback_algo = &myers_then_myers_divide, 75 }; 76 77 static const struct diff_algo_config myers_divide = (struct diff_algo_config){ 78 .impl = diff_algo_myers_divide, 79 /* When division succeeded, start from the top: */ 80 .inner_algo = &myers_then_myers_divide, 81 /* (fallback_algo = NULL implies diff_algo_none). */ 82 }; 83 84 static const struct diff_algo_config no_algo = (struct diff_algo_config){ 85 .impl = diff_algo_none, 86 }; 87 88 /* If the state for a forward-Myers is small enough, use Myers, otherwise first 89 * do a Myers-divide. */ 90 static const struct diff_config diff_config_myers_then_myers_divide = { 91 .atomize_func = diff_atomize_text_by_line, 92 .algo = &myers_then_myers_divide, 93 }; 94 95 /* If the state for a forward-Myers is small enough, use Myers, otherwise first 96 * do a Patience. */ 97 static const struct diff_config diff_config_myers_then_patience = { 98 .atomize_func = diff_atomize_text_by_line, 99 .algo = &myers_then_patience, 100 }; 101 102 /* Directly force Patience as a first divider of the source file. */ 103 static const struct diff_config diff_config_patience = { 104 .atomize_func = diff_atomize_text_by_line, 105 .algo = &patience, 106 }; 107 108 /* Directly force Patience as a first divider of the source file. */ 109 static const struct diff_config diff_config_no_algo = { 110 .atomize_func = diff_atomize_text_by_line, 111 }; 112 113 const char * 114 format_label(const char *oldlabel, struct stat *stb) 115 { 116 const char *time_format = "%Y-%m-%d %H:%M:%S"; 117 char *newlabel; 118 char buf[256]; 119 char end[10]; 120 struct tm tm, *tm_ptr; 121 int nsec = stb->st_mtim.tv_nsec; 122 size_t newlabellen, timelen, endlen; 123 tm_ptr = localtime_r(&stb->st_mtime, &tm); 124 125 timelen = strftime(buf, 256, time_format, tm_ptr); 126 endlen = strftime(end, 10, "%z", tm_ptr); 127 128 /* 129 * The new label is the length of the time, old label, timezone, 130 * 9 characters for nanoseconds, and 4 characters for a period 131 * and for formatting. 132 */ 133 newlabellen = timelen + strlen(oldlabel) + endlen + 9 + 4; 134 newlabel = calloc(newlabellen, sizeof(char)); 135 136 snprintf(newlabel, newlabellen ,"%s\t%s.%.9d %s\n", 137 oldlabel, buf, nsec, end); 138 139 return newlabel; 140 } 141 142 int 143 diffreg_new(char *file1, char *file2, int flags, int capsicum) 144 { 145 char *str1, *str2; 146 FILE *f1, *f2; 147 struct stat st1, st2; 148 struct diff_input_info info; 149 struct diff_data left = {}, right = {}; 150 struct diff_result *result = NULL; 151 bool force_text, have_binary; 152 int rc, atomizer_flags, rflags, diff_flags = 0; 153 int context_lines = diff_context; 154 const struct diff_config *cfg; 155 enum diffreg_algo algo; 156 cap_rights_t rights_ro; 157 158 algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE; 159 160 switch (algo) { 161 default: 162 case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE: 163 cfg = &diff_config_myers_then_myers_divide; 164 break; 165 case DIFFREG_ALGO_MYERS_THEN_PATIENCE: 166 cfg = &diff_config_myers_then_patience; 167 break; 168 case DIFFREG_ALGO_PATIENCE: 169 cfg = &diff_config_patience; 170 break; 171 case DIFFREG_ALGO_NONE: 172 cfg = &diff_config_no_algo; 173 break; 174 } 175 176 f1 = openfile(file1, &str1, &st1); 177 f2 = openfile(file2, &str2, &st2); 178 179 if (capsicum) { 180 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); 181 if (caph_rights_limit(fileno(f1), &rights_ro) < 0) 182 err(2, "unable to limit rights on: %s", file1); 183 if (caph_rights_limit(fileno(f2), &rights_ro) < 0) 184 err(2, "unable to limit rights on: %s", file2); 185 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) { 186 /* stdin has already been limited */ 187 if (caph_limit_stderr() == -1) 188 err(2, "unable to limit stderr"); 189 if (caph_limit_stdout() == -1) 190 err(2, "unable to limit stdout"); 191 } else if (caph_limit_stdio() == -1) 192 err(2, "unable to limit stdio"); 193 caph_cache_catpages(); 194 caph_cache_tzdata(); 195 if (caph_enter() < 0) 196 err(2, "unable to enter capability mode"); 197 } 198 /* 199 * If we have been given a label use that for the paths, if not format 200 * the path with the files modification time. 201 */ 202 info.flags = 0; 203 info.left_path = (label[0] != NULL) ? 204 label[0] : format_label(file1, &stb1); 205 info.right_path = (label[1] != NULL) ? 206 label[1] : format_label(file2, &stb2); 207 208 if (flags & D_FORCEASCII) 209 diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA; 210 if (flags & D_IGNOREBLANKS) 211 diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE; 212 if (flags & D_PROTOTYPE) 213 diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES; 214 215 if (diff_atomize_file(&left, cfg, f1, (uint8_t *)str1, st1.st_size, diff_flags)) { 216 rc = D_ERROR; 217 goto done; 218 } 219 if (diff_atomize_file(&right, cfg, f2, (uint8_t *)str2, st2.st_size, diff_flags)) { 220 rc = D_ERROR; 221 goto done; 222 } 223 224 result = diff_main(cfg, &left, &right); 225 if (result->rc != DIFF_RC_OK) { 226 rc = D_ERROR; 227 status |= 2; 228 goto done; 229 } 230 /* 231 * If there wasn't an error, but we don't have any printable chunks 232 * then the files must match. 233 */ 234 if (!diff_result_contains_printable_chunks(result)) { 235 rc = D_SAME; 236 goto done; 237 } 238 239 atomizer_flags = (result->left->atomizer_flags | result->right->atomizer_flags); 240 rflags = (result->left->root->diff_flags | result->right->root->diff_flags); 241 force_text = (rflags & DIFF_FLAG_FORCE_TEXT_DATA); 242 have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA); 243 244 if (have_binary && !force_text) { 245 rc = D_BINARY; 246 status |= 1; 247 goto done; 248 } 249 250 if (diff_format == D_NORMAL) { 251 rc = diff_output_plain(NULL, stdout, &info, result, false); 252 } else if (diff_format == D_EDIT) { 253 rc = diff_output_edscript(NULL, stdout, &info, result); 254 } else { 255 rc = diff_output_unidiff(NULL, stdout, &info, result, 256 context_lines); 257 } 258 if (rc != DIFF_RC_OK) { 259 rc = D_ERROR; 260 status |= 2; 261 } else { 262 rc = D_DIFFER; 263 status |= 1; 264 } 265 done: 266 diff_result_free(result); 267 diff_data_free(&left); 268 diff_data_free(&right); 269 if (str1) 270 munmap(str1, st1.st_size); 271 if (str2) 272 munmap(str2, st2.st_size); 273 fclose(f1); 274 fclose(f2); 275 276 return rc; 277 } 278 279 FILE * 280 openfile(const char *path, char **p, struct stat *st) 281 { 282 FILE *f = NULL; 283 284 if (strcmp(path, "-") == 0) 285 f = stdin; 286 else 287 f = fopen(path, "r"); 288 289 if (f == NULL) 290 err(2, "%s", path); 291 292 if (fstat(fileno(f), st) == -1) 293 err(2, "%s", path); 294 295 #ifndef DIFF_NO_MMAP 296 *p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0); 297 if (*p == MAP_FAILED) 298 #endif 299 *p = NULL; /* fall back on file I/O */ 300 301 return f; 302 } 303 304 bool 305 can_libdiff(int flags) 306 { 307 /* We can't use fifos with libdiff yet */ 308 if (S_ISFIFO(stb1.st_mode) || S_ISFIFO(stb2.st_mode)) 309 return false; 310 311 /* Is this one of the supported input/output modes for diffreg_new? */ 312 if ((flags == 0 || !(flags & ~D_NEWALGO_FLAGS)) && 313 ignore_pats == NULL && ( 314 diff_format == D_NORMAL || 315 #if 0 316 diff_format == D_EDIT || 317 #endif 318 diff_format == D_UNIFIED) && 319 (diff_algorithm == D_DIFFMYERS || diff_algorithm == D_DIFFPATIENCE)) { 320 return true; 321 } 322 323 /* Fallback to using stone. */ 324 return false; 325 } 326