1*59c8e88eSDag-Erling Smørgrav /* Commandline diff utility to test diff implementations. */ 2*59c8e88eSDag-Erling Smørgrav /* 3*59c8e88eSDag-Erling Smørgrav * Copyright (c) 2018 Martin Pieuchot 4*59c8e88eSDag-Erling Smørgrav * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de> 5*59c8e88eSDag-Erling Smørgrav * 6*59c8e88eSDag-Erling Smørgrav * Permission to use, copy, modify, and distribute this software for any 7*59c8e88eSDag-Erling Smørgrav * purpose with or without fee is hereby granted, provided that the above 8*59c8e88eSDag-Erling Smørgrav * copyright notice and this permission notice appear in all copies. 9*59c8e88eSDag-Erling Smørgrav * 10*59c8e88eSDag-Erling Smørgrav * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11*59c8e88eSDag-Erling Smørgrav * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12*59c8e88eSDag-Erling Smørgrav * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13*59c8e88eSDag-Erling Smørgrav * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14*59c8e88eSDag-Erling Smørgrav * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15*59c8e88eSDag-Erling Smørgrav * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16*59c8e88eSDag-Erling Smørgrav * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17*59c8e88eSDag-Erling Smørgrav */ 18*59c8e88eSDag-Erling Smørgrav 19*59c8e88eSDag-Erling Smørgrav #include <sys/mman.h> 20*59c8e88eSDag-Erling Smørgrav #include <sys/stat.h> 21*59c8e88eSDag-Erling Smørgrav #include <sys/types.h> 22*59c8e88eSDag-Erling Smørgrav 23*59c8e88eSDag-Erling Smørgrav #include <err.h> 24*59c8e88eSDag-Erling Smørgrav #include <fcntl.h> 25*59c8e88eSDag-Erling Smørgrav #include <stdint.h> 26*59c8e88eSDag-Erling Smørgrav #include <stdio.h> 27*59c8e88eSDag-Erling Smørgrav #include <stdlib.h> 28*59c8e88eSDag-Erling Smørgrav #include <stdbool.h> 29*59c8e88eSDag-Erling Smørgrav #include <string.h> 30*59c8e88eSDag-Erling Smørgrav #include <unistd.h> 31*59c8e88eSDag-Erling Smørgrav 32*59c8e88eSDag-Erling Smørgrav #include <arraylist.h> 33*59c8e88eSDag-Erling Smørgrav #include <diff_main.h> 34*59c8e88eSDag-Erling Smørgrav #include <diff_output.h> 35*59c8e88eSDag-Erling Smørgrav 36*59c8e88eSDag-Erling Smørgrav enum diffreg_algo { 37*59c8e88eSDag-Erling Smørgrav DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0, 38*59c8e88eSDag-Erling Smørgrav DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1, 39*59c8e88eSDag-Erling Smørgrav DIFFREG_ALGO_PATIENCE = 2, 40*59c8e88eSDag-Erling Smørgrav DIFFREG_ALGO_NONE = 3, 41*59c8e88eSDag-Erling Smørgrav }; 42*59c8e88eSDag-Erling Smørgrav 43*59c8e88eSDag-Erling Smørgrav __dead void usage(void); 44*59c8e88eSDag-Erling Smørgrav int diffreg(char *, char *, enum diffreg_algo, bool, bool, bool, 45*59c8e88eSDag-Erling Smørgrav int, bool); 46*59c8e88eSDag-Erling Smørgrav FILE * openfile(const char *, char **, struct stat *); 47*59c8e88eSDag-Erling Smørgrav 48*59c8e88eSDag-Erling Smørgrav __dead void 49*59c8e88eSDag-Erling Smørgrav usage(void) 50*59c8e88eSDag-Erling Smørgrav { 51*59c8e88eSDag-Erling Smørgrav fprintf(stderr, 52*59c8e88eSDag-Erling Smørgrav "usage: %s [-apPQTwe] [-U n] file1 file2\n" 53*59c8e88eSDag-Erling Smørgrav "\n" 54*59c8e88eSDag-Erling Smørgrav " -a Treat input as ASCII even if binary data is detected\n" 55*59c8e88eSDag-Erling Smørgrav " -p Show function prototypes in hunk headers\n" 56*59c8e88eSDag-Erling Smørgrav " -P Use Patience Diff (slower but often nicer)\n" 57*59c8e88eSDag-Erling Smørgrav " -Q Use forward-Myers for small files, otherwise Patience\n" 58*59c8e88eSDag-Erling Smørgrav " -T Trivial algo: detect similar start and end only\n" 59*59c8e88eSDag-Erling Smørgrav " -w Ignore Whitespace\n" 60*59c8e88eSDag-Erling Smørgrav " -U n Number of Context Lines\n" 61*59c8e88eSDag-Erling Smørgrav " -e Produce ed script output\n" 62*59c8e88eSDag-Erling Smørgrav , getprogname()); 63*59c8e88eSDag-Erling Smørgrav exit(1); 64*59c8e88eSDag-Erling Smørgrav } 65*59c8e88eSDag-Erling Smørgrav 66*59c8e88eSDag-Erling Smørgrav int 67*59c8e88eSDag-Erling Smørgrav main(int argc, char *argv[]) 68*59c8e88eSDag-Erling Smørgrav { 69*59c8e88eSDag-Erling Smørgrav int ch, rc; 70*59c8e88eSDag-Erling Smørgrav bool force_text = false; 71*59c8e88eSDag-Erling Smørgrav bool ignore_whitespace = false; 72*59c8e88eSDag-Erling Smørgrav bool show_function_prototypes = false; 73*59c8e88eSDag-Erling Smørgrav bool edscript = false; 74*59c8e88eSDag-Erling Smørgrav int context_lines = 3; 75*59c8e88eSDag-Erling Smørgrav enum diffreg_algo algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE; 76*59c8e88eSDag-Erling Smørgrav 77*59c8e88eSDag-Erling Smørgrav while ((ch = getopt(argc, argv, "apPQTwU:e")) != -1) { 78*59c8e88eSDag-Erling Smørgrav switch (ch) { 79*59c8e88eSDag-Erling Smørgrav case 'a': 80*59c8e88eSDag-Erling Smørgrav force_text = true; 81*59c8e88eSDag-Erling Smørgrav break; 82*59c8e88eSDag-Erling Smørgrav case 'p': 83*59c8e88eSDag-Erling Smørgrav show_function_prototypes = true; 84*59c8e88eSDag-Erling Smørgrav break; 85*59c8e88eSDag-Erling Smørgrav case 'P': 86*59c8e88eSDag-Erling Smørgrav algo = DIFFREG_ALGO_PATIENCE; 87*59c8e88eSDag-Erling Smørgrav break; 88*59c8e88eSDag-Erling Smørgrav case 'Q': 89*59c8e88eSDag-Erling Smørgrav algo = DIFFREG_ALGO_MYERS_THEN_PATIENCE; 90*59c8e88eSDag-Erling Smørgrav break; 91*59c8e88eSDag-Erling Smørgrav case 'T': 92*59c8e88eSDag-Erling Smørgrav algo = DIFFREG_ALGO_NONE; 93*59c8e88eSDag-Erling Smørgrav break; 94*59c8e88eSDag-Erling Smørgrav case 'w': 95*59c8e88eSDag-Erling Smørgrav ignore_whitespace = true; 96*59c8e88eSDag-Erling Smørgrav break; 97*59c8e88eSDag-Erling Smørgrav case 'U': 98*59c8e88eSDag-Erling Smørgrav context_lines = atoi(optarg); 99*59c8e88eSDag-Erling Smørgrav break; 100*59c8e88eSDag-Erling Smørgrav case 'e': 101*59c8e88eSDag-Erling Smørgrav edscript = true; 102*59c8e88eSDag-Erling Smørgrav break; 103*59c8e88eSDag-Erling Smørgrav default: 104*59c8e88eSDag-Erling Smørgrav usage(); 105*59c8e88eSDag-Erling Smørgrav } 106*59c8e88eSDag-Erling Smørgrav } 107*59c8e88eSDag-Erling Smørgrav 108*59c8e88eSDag-Erling Smørgrav argc -= optind; 109*59c8e88eSDag-Erling Smørgrav argv += optind; 110*59c8e88eSDag-Erling Smørgrav 111*59c8e88eSDag-Erling Smørgrav if (argc != 2) 112*59c8e88eSDag-Erling Smørgrav usage(); 113*59c8e88eSDag-Erling Smørgrav 114*59c8e88eSDag-Erling Smørgrav rc = diffreg(argv[0], argv[1], algo, force_text, ignore_whitespace, 115*59c8e88eSDag-Erling Smørgrav show_function_prototypes, context_lines, edscript); 116*59c8e88eSDag-Erling Smørgrav if (rc != DIFF_RC_OK) { 117*59c8e88eSDag-Erling Smørgrav fprintf(stderr, "diff: %s\n", strerror(rc)); 118*59c8e88eSDag-Erling Smørgrav return 1; 119*59c8e88eSDag-Erling Smørgrav } 120*59c8e88eSDag-Erling Smørgrav return 0; 121*59c8e88eSDag-Erling Smørgrav } 122*59c8e88eSDag-Erling Smørgrav 123*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_then_patience; 124*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_then_myers_divide; 125*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config patience; 126*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_divide; 127*59c8e88eSDag-Erling Smørgrav 128*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_then_patience = (struct diff_algo_config){ 129*59c8e88eSDag-Erling Smørgrav .impl = diff_algo_myers, 130*59c8e88eSDag-Erling Smørgrav .permitted_state_size = 1024 * 1024 * sizeof(int), 131*59c8e88eSDag-Erling Smørgrav .fallback_algo = &patience, 132*59c8e88eSDag-Erling Smørgrav }; 133*59c8e88eSDag-Erling Smørgrav 134*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_then_myers_divide = 135*59c8e88eSDag-Erling Smørgrav (struct diff_algo_config){ 136*59c8e88eSDag-Erling Smørgrav .impl = diff_algo_myers, 137*59c8e88eSDag-Erling Smørgrav .permitted_state_size = 1024 * 1024 * sizeof(int), 138*59c8e88eSDag-Erling Smørgrav .fallback_algo = &myers_divide, 139*59c8e88eSDag-Erling Smørgrav }; 140*59c8e88eSDag-Erling Smørgrav 141*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config patience = (struct diff_algo_config){ 142*59c8e88eSDag-Erling Smørgrav .impl = diff_algo_patience, 143*59c8e88eSDag-Erling Smørgrav /* After subdivision, do Patience again: */ 144*59c8e88eSDag-Erling Smørgrav .inner_algo = &patience, 145*59c8e88eSDag-Erling Smørgrav /* If subdivision failed, do Myers Divide et Impera: */ 146*59c8e88eSDag-Erling Smørgrav .fallback_algo = &myers_then_myers_divide, 147*59c8e88eSDag-Erling Smørgrav }; 148*59c8e88eSDag-Erling Smørgrav 149*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config myers_divide = (struct diff_algo_config){ 150*59c8e88eSDag-Erling Smørgrav .impl = diff_algo_myers_divide, 151*59c8e88eSDag-Erling Smørgrav /* When division succeeded, start from the top: */ 152*59c8e88eSDag-Erling Smørgrav .inner_algo = &myers_then_myers_divide, 153*59c8e88eSDag-Erling Smørgrav /* (fallback_algo = NULL implies diff_algo_none). */ 154*59c8e88eSDag-Erling Smørgrav }; 155*59c8e88eSDag-Erling Smørgrav 156*59c8e88eSDag-Erling Smørgrav const struct diff_algo_config no_algo = (struct diff_algo_config){ 157*59c8e88eSDag-Erling Smørgrav .impl = diff_algo_none, 158*59c8e88eSDag-Erling Smørgrav }; 159*59c8e88eSDag-Erling Smørgrav 160*59c8e88eSDag-Erling Smørgrav /* If the state for a forward-Myers is small enough, use Myers, otherwise first 161*59c8e88eSDag-Erling Smørgrav * do a Myers-divide. */ 162*59c8e88eSDag-Erling Smørgrav const struct diff_config diff_config_myers_then_myers_divide = { 163*59c8e88eSDag-Erling Smørgrav .atomize_func = diff_atomize_text_by_line, 164*59c8e88eSDag-Erling Smørgrav .algo = &myers_then_myers_divide, 165*59c8e88eSDag-Erling Smørgrav }; 166*59c8e88eSDag-Erling Smørgrav 167*59c8e88eSDag-Erling Smørgrav /* If the state for a forward-Myers is small enough, use Myers, otherwise first 168*59c8e88eSDag-Erling Smørgrav * do a Patience. */ 169*59c8e88eSDag-Erling Smørgrav const struct diff_config diff_config_myers_then_patience = { 170*59c8e88eSDag-Erling Smørgrav .atomize_func = diff_atomize_text_by_line, 171*59c8e88eSDag-Erling Smørgrav .algo = &myers_then_patience, 172*59c8e88eSDag-Erling Smørgrav }; 173*59c8e88eSDag-Erling Smørgrav 174*59c8e88eSDag-Erling Smørgrav /* Directly force Patience as a first divider of the source file. */ 175*59c8e88eSDag-Erling Smørgrav const struct diff_config diff_config_patience = { 176*59c8e88eSDag-Erling Smørgrav .atomize_func = diff_atomize_text_by_line, 177*59c8e88eSDag-Erling Smørgrav .algo = &patience, 178*59c8e88eSDag-Erling Smørgrav }; 179*59c8e88eSDag-Erling Smørgrav 180*59c8e88eSDag-Erling Smørgrav /* Directly force Patience as a first divider of the source file. */ 181*59c8e88eSDag-Erling Smørgrav const struct diff_config diff_config_no_algo = { 182*59c8e88eSDag-Erling Smørgrav .atomize_func = diff_atomize_text_by_line, 183*59c8e88eSDag-Erling Smørgrav }; 184*59c8e88eSDag-Erling Smørgrav 185*59c8e88eSDag-Erling Smørgrav int 186*59c8e88eSDag-Erling Smørgrav diffreg(char *file1, char *file2, enum diffreg_algo algo, bool force_text, 187*59c8e88eSDag-Erling Smørgrav bool ignore_whitespace, bool show_function_prototypes, int context_lines, 188*59c8e88eSDag-Erling Smørgrav bool edscript) 189*59c8e88eSDag-Erling Smørgrav { 190*59c8e88eSDag-Erling Smørgrav char *str1, *str2; 191*59c8e88eSDag-Erling Smørgrav FILE *f1, *f2; 192*59c8e88eSDag-Erling Smørgrav struct stat st1, st2; 193*59c8e88eSDag-Erling Smørgrav struct diff_input_info info = { 194*59c8e88eSDag-Erling Smørgrav .left_path = file1, 195*59c8e88eSDag-Erling Smørgrav .right_path = file2, 196*59c8e88eSDag-Erling Smørgrav }; 197*59c8e88eSDag-Erling Smørgrav struct diff_data left = {}, right = {}; 198*59c8e88eSDag-Erling Smørgrav struct diff_result *result = NULL; 199*59c8e88eSDag-Erling Smørgrav int rc; 200*59c8e88eSDag-Erling Smørgrav const struct diff_config *cfg; 201*59c8e88eSDag-Erling Smørgrav int diff_flags = 0; 202*59c8e88eSDag-Erling Smørgrav 203*59c8e88eSDag-Erling Smørgrav switch (algo) { 204*59c8e88eSDag-Erling Smørgrav default: 205*59c8e88eSDag-Erling Smørgrav case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE: 206*59c8e88eSDag-Erling Smørgrav cfg = &diff_config_myers_then_myers_divide; 207*59c8e88eSDag-Erling Smørgrav break; 208*59c8e88eSDag-Erling Smørgrav case DIFFREG_ALGO_MYERS_THEN_PATIENCE: 209*59c8e88eSDag-Erling Smørgrav cfg = &diff_config_myers_then_patience; 210*59c8e88eSDag-Erling Smørgrav break; 211*59c8e88eSDag-Erling Smørgrav case DIFFREG_ALGO_PATIENCE: 212*59c8e88eSDag-Erling Smørgrav cfg = &diff_config_patience; 213*59c8e88eSDag-Erling Smørgrav break; 214*59c8e88eSDag-Erling Smørgrav case DIFFREG_ALGO_NONE: 215*59c8e88eSDag-Erling Smørgrav cfg = &diff_config_no_algo; 216*59c8e88eSDag-Erling Smørgrav break; 217*59c8e88eSDag-Erling Smørgrav } 218*59c8e88eSDag-Erling Smørgrav 219*59c8e88eSDag-Erling Smørgrav f1 = openfile(file1, &str1, &st1); 220*59c8e88eSDag-Erling Smørgrav f2 = openfile(file2, &str2, &st2); 221*59c8e88eSDag-Erling Smørgrav 222*59c8e88eSDag-Erling Smørgrav if (force_text) 223*59c8e88eSDag-Erling Smørgrav diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA; 224*59c8e88eSDag-Erling Smørgrav if (ignore_whitespace) 225*59c8e88eSDag-Erling Smørgrav diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE; 226*59c8e88eSDag-Erling Smørgrav if (show_function_prototypes) 227*59c8e88eSDag-Erling Smørgrav diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES; 228*59c8e88eSDag-Erling Smørgrav 229*59c8e88eSDag-Erling Smørgrav rc = diff_atomize_file(&left, cfg, f1, str1, st1.st_size, diff_flags); 230*59c8e88eSDag-Erling Smørgrav if (rc) 231*59c8e88eSDag-Erling Smørgrav goto done; 232*59c8e88eSDag-Erling Smørgrav rc = diff_atomize_file(&right, cfg, f2, str2, st2.st_size, diff_flags); 233*59c8e88eSDag-Erling Smørgrav if (rc) 234*59c8e88eSDag-Erling Smørgrav goto done; 235*59c8e88eSDag-Erling Smørgrav 236*59c8e88eSDag-Erling Smørgrav result = diff_main(cfg, &left, &right); 237*59c8e88eSDag-Erling Smørgrav #if 0 238*59c8e88eSDag-Erling Smørgrav rc = diff_output_plain(stdout, &info, result); 239*59c8e88eSDag-Erling Smørgrav #else 240*59c8e88eSDag-Erling Smørgrav if (edscript) 241*59c8e88eSDag-Erling Smørgrav rc = diff_output_edscript(NULL, stdout, &info, result); 242*59c8e88eSDag-Erling Smørgrav else { 243*59c8e88eSDag-Erling Smørgrav rc = diff_output_unidiff(NULL, stdout, &info, result, 244*59c8e88eSDag-Erling Smørgrav context_lines); 245*59c8e88eSDag-Erling Smørgrav } 246*59c8e88eSDag-Erling Smørgrav #endif 247*59c8e88eSDag-Erling Smørgrav done: 248*59c8e88eSDag-Erling Smørgrav diff_result_free(result); 249*59c8e88eSDag-Erling Smørgrav diff_data_free(&left); 250*59c8e88eSDag-Erling Smørgrav diff_data_free(&right); 251*59c8e88eSDag-Erling Smørgrav if (str1) 252*59c8e88eSDag-Erling Smørgrav munmap(str1, st1.st_size); 253*59c8e88eSDag-Erling Smørgrav if (str2) 254*59c8e88eSDag-Erling Smørgrav munmap(str2, st2.st_size); 255*59c8e88eSDag-Erling Smørgrav fclose(f1); 256*59c8e88eSDag-Erling Smørgrav fclose(f2); 257*59c8e88eSDag-Erling Smørgrav 258*59c8e88eSDag-Erling Smørgrav return rc; 259*59c8e88eSDag-Erling Smørgrav } 260*59c8e88eSDag-Erling Smørgrav 261*59c8e88eSDag-Erling Smørgrav FILE * 262*59c8e88eSDag-Erling Smørgrav openfile(const char *path, char **p, struct stat *st) 263*59c8e88eSDag-Erling Smørgrav { 264*59c8e88eSDag-Erling Smørgrav FILE *f = NULL; 265*59c8e88eSDag-Erling Smørgrav 266*59c8e88eSDag-Erling Smørgrav f = fopen(path, "r"); 267*59c8e88eSDag-Erling Smørgrav if (f == NULL) 268*59c8e88eSDag-Erling Smørgrav err(2, "%s", path); 269*59c8e88eSDag-Erling Smørgrav 270*59c8e88eSDag-Erling Smørgrav if (fstat(fileno(f), st) == -1) 271*59c8e88eSDag-Erling Smørgrav err(2, "%s", path); 272*59c8e88eSDag-Erling Smørgrav 273*59c8e88eSDag-Erling Smørgrav #ifndef DIFF_NO_MMAP 274*59c8e88eSDag-Erling Smørgrav *p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0); 275*59c8e88eSDag-Erling Smørgrav if (*p == MAP_FAILED) 276*59c8e88eSDag-Erling Smørgrav #endif 277*59c8e88eSDag-Erling Smørgrav *p = NULL; /* fall back on file I/O */ 278*59c8e88eSDag-Erling Smørgrav 279*59c8e88eSDag-Erling Smørgrav return f; 280*59c8e88eSDag-Erling Smørgrav } 281