118fd37a7SXin LI /* Shared definitions for GNU DIFF 218fd37a7SXin LI 318fd37a7SXin LI Copyright (C) 1988, 1989, 1991, 1992, 1993, 1994, 1995, 1998, 2001, 418fd37a7SXin LI 2002, 2004 Free Software Foundation, Inc. 518fd37a7SXin LI 618fd37a7SXin LI This file is part of GNU DIFF. 718fd37a7SXin LI 818fd37a7SXin LI GNU DIFF is free software; you can redistribute it and/or modify 918fd37a7SXin LI it under the terms of the GNU General Public License as published by 1018fd37a7SXin LI the Free Software Foundation; either version 2, or (at your option) 1118fd37a7SXin LI any later version. 1218fd37a7SXin LI 1318fd37a7SXin LI GNU DIFF is distributed in the hope that it will be useful, 1418fd37a7SXin LI but WITHOUT ANY WARRANTY; without even the implied warranty of 1518fd37a7SXin LI MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1618fd37a7SXin LI GNU General Public License for more details. 1718fd37a7SXin LI 1818fd37a7SXin LI You should have received a copy of the GNU General Public License 1918fd37a7SXin LI along with this program; see the file COPYING. 2018fd37a7SXin LI If not, write to the Free Software Foundation, 2118fd37a7SXin LI 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 2218fd37a7SXin LI 2318fd37a7SXin LI #include "system.h" 2418fd37a7SXin LI #include <regex.h> 2518fd37a7SXin LI #include <stdio.h> 2618fd37a7SXin LI #include <unlocked-io.h> 2718fd37a7SXin LI 2818fd37a7SXin LI /* What kind of changes a hunk contains. */ 2918fd37a7SXin LI enum changes 3018fd37a7SXin LI { 3118fd37a7SXin LI /* No changes: lines common to both files. */ 3218fd37a7SXin LI UNCHANGED, 3318fd37a7SXin LI 3418fd37a7SXin LI /* Deletes only: lines taken from just the first file. */ 3518fd37a7SXin LI OLD, 3618fd37a7SXin LI 3718fd37a7SXin LI /* Inserts only: lines taken from just the second file. */ 3818fd37a7SXin LI NEW, 3918fd37a7SXin LI 4018fd37a7SXin LI /* Both deletes and inserts: a hunk containing both old and new lines. */ 4118fd37a7SXin LI CHANGED 4218fd37a7SXin LI }; 4318fd37a7SXin LI 4418fd37a7SXin LI /* Variables for command line options */ 4518fd37a7SXin LI 4618fd37a7SXin LI #ifndef GDIFF_MAIN 4718fd37a7SXin LI # define XTERN extern 4818fd37a7SXin LI #else 4918fd37a7SXin LI # define XTERN 5018fd37a7SXin LI #endif 5118fd37a7SXin LI 5218fd37a7SXin LI enum output_style 5318fd37a7SXin LI { 5418fd37a7SXin LI /* No output style specified. */ 5518fd37a7SXin LI OUTPUT_UNSPECIFIED, 5618fd37a7SXin LI 5718fd37a7SXin LI /* Default output style. */ 5818fd37a7SXin LI OUTPUT_NORMAL, 5918fd37a7SXin LI 6018fd37a7SXin LI /* Output the differences with lines of context before and after (-c). */ 6118fd37a7SXin LI OUTPUT_CONTEXT, 6218fd37a7SXin LI 6318fd37a7SXin LI /* Output the differences in a unified context diff format (-u). */ 6418fd37a7SXin LI OUTPUT_UNIFIED, 6518fd37a7SXin LI 6618fd37a7SXin LI /* Output the differences as commands suitable for `ed' (-e). */ 6718fd37a7SXin LI OUTPUT_ED, 6818fd37a7SXin LI 6918fd37a7SXin LI /* Output the diff as a forward ed script (-f). */ 7018fd37a7SXin LI OUTPUT_FORWARD_ED, 7118fd37a7SXin LI 7218fd37a7SXin LI /* Like -f, but output a count of changed lines in each "command" (-n). */ 7318fd37a7SXin LI OUTPUT_RCS, 7418fd37a7SXin LI 7518fd37a7SXin LI /* Output merged #ifdef'd file (-D). */ 7618fd37a7SXin LI OUTPUT_IFDEF, 7718fd37a7SXin LI 7818fd37a7SXin LI /* Output sdiff style (-y). */ 7918fd37a7SXin LI OUTPUT_SDIFF 8018fd37a7SXin LI }; 8118fd37a7SXin LI 8218fd37a7SXin LI /* True for output styles that are robust, 8318fd37a7SXin LI i.e. can handle a file that ends in a non-newline. */ 8418fd37a7SXin LI #define ROBUST_OUTPUT_STYLE(S) ((S) != OUTPUT_ED && (S) != OUTPUT_FORWARD_ED) 8518fd37a7SXin LI 8618fd37a7SXin LI XTERN enum output_style output_style; 8718fd37a7SXin LI 8818fd37a7SXin LI /* Nonzero if output cannot be generated for identical files. */ 8918fd37a7SXin LI XTERN bool no_diff_means_no_output; 9018fd37a7SXin LI 9118fd37a7SXin LI /* Number of lines of context to show in each set of diffs. 9218fd37a7SXin LI This is zero when context is not to be shown. */ 9318fd37a7SXin LI XTERN lin context; 9418fd37a7SXin LI 9518fd37a7SXin LI /* Consider all files as text files (-a). 9618fd37a7SXin LI Don't interpret codes over 0177 as implying a "binary file". */ 9718fd37a7SXin LI XTERN bool text; 9818fd37a7SXin LI 9918fd37a7SXin LI /* Number of lines to keep in identical prefix and suffix. */ 10018fd37a7SXin LI XTERN lin horizon_lines; 10118fd37a7SXin LI 10218fd37a7SXin LI /* The significance of white space during comparisons. */ 10318fd37a7SXin LI XTERN enum 10418fd37a7SXin LI { 10518fd37a7SXin LI /* All white space is significant (the default). */ 10618fd37a7SXin LI IGNORE_NO_WHITE_SPACE, 10718fd37a7SXin LI 10818fd37a7SXin LI /* Ignore changes due to tab expansion (-E). */ 10918fd37a7SXin LI IGNORE_TAB_EXPANSION, 11018fd37a7SXin LI 11118fd37a7SXin LI /* Ignore changes in horizontal white space (-b). */ 11218fd37a7SXin LI IGNORE_SPACE_CHANGE, 11318fd37a7SXin LI 11418fd37a7SXin LI /* Ignore all horizontal white space (-w). */ 11518fd37a7SXin LI IGNORE_ALL_SPACE 11618fd37a7SXin LI } ignore_white_space; 11718fd37a7SXin LI 11818fd37a7SXin LI /* Ignore changes that affect only blank lines (-B). */ 11918fd37a7SXin LI XTERN bool ignore_blank_lines; 12018fd37a7SXin LI 12118fd37a7SXin LI /* Files can be compared byte-by-byte, as if they were binary. 12218fd37a7SXin LI This depends on various options. */ 12318fd37a7SXin LI XTERN bool files_can_be_treated_as_binary; 12418fd37a7SXin LI 12518fd37a7SXin LI /* Ignore differences in case of letters (-i). */ 12618fd37a7SXin LI XTERN bool ignore_case; 12718fd37a7SXin LI 12818fd37a7SXin LI /* Ignore differences in case of letters in file names. */ 12918fd37a7SXin LI XTERN bool ignore_file_name_case; 13018fd37a7SXin LI 13118fd37a7SXin LI /* File labels for `-c' output headers (--label). */ 13218fd37a7SXin LI XTERN char *file_label[2]; 13318fd37a7SXin LI 13418fd37a7SXin LI /* Regexp to identify function-header lines (-F). */ 13518fd37a7SXin LI XTERN struct re_pattern_buffer function_regexp; 13618fd37a7SXin LI 13718fd37a7SXin LI /* Ignore changes that affect only lines matching this regexp (-I). */ 13818fd37a7SXin LI XTERN struct re_pattern_buffer ignore_regexp; 13918fd37a7SXin LI 14018fd37a7SXin LI /* Say only whether files differ, not how (-q). */ 14118fd37a7SXin LI XTERN bool brief; 14218fd37a7SXin LI 14318fd37a7SXin LI /* Expand tabs in the output so the text lines up properly 14418fd37a7SXin LI despite the characters added to the front of each line (-t). */ 14518fd37a7SXin LI XTERN bool expand_tabs; 14618fd37a7SXin LI 14718fd37a7SXin LI /* Number of columns between tab stops. */ 14818fd37a7SXin LI XTERN size_t tabsize; 14918fd37a7SXin LI 15018fd37a7SXin LI /* Use a tab in the output, rather than a space, before the text of an 15118fd37a7SXin LI input line, so as to keep the proper alignment in the input line 15218fd37a7SXin LI without changing the characters in it (-T). */ 15318fd37a7SXin LI XTERN bool initial_tab; 15418fd37a7SXin LI 15518fd37a7SXin LI /* Remove trailing carriage returns from input. */ 15618fd37a7SXin LI XTERN bool strip_trailing_cr; 15718fd37a7SXin LI 15818fd37a7SXin LI /* In directory comparison, specify file to start with (-S). 15918fd37a7SXin LI This is used for resuming an aborted comparison. 16018fd37a7SXin LI All file names less than this name are ignored. */ 16118fd37a7SXin LI XTERN char const *starting_file; 16218fd37a7SXin LI 16318fd37a7SXin LI /* Pipe each file's output through pr (-l). */ 16418fd37a7SXin LI XTERN bool paginate; 16518fd37a7SXin LI 16618fd37a7SXin LI /* Line group formats for unchanged, old, new, and changed groups. */ 16718fd37a7SXin LI XTERN char const *group_format[CHANGED + 1]; 16818fd37a7SXin LI 16918fd37a7SXin LI /* Line formats for unchanged, old, and new lines. */ 17018fd37a7SXin LI XTERN char const *line_format[NEW + 1]; 17118fd37a7SXin LI 17218fd37a7SXin LI /* If using OUTPUT_SDIFF print extra information to help the sdiff filter. */ 17318fd37a7SXin LI XTERN bool sdiff_merge_assist; 17418fd37a7SXin LI 17518fd37a7SXin LI /* Tell OUTPUT_SDIFF to show only the left version of common lines. */ 17618fd37a7SXin LI XTERN bool left_column; 17718fd37a7SXin LI 17818fd37a7SXin LI /* Tell OUTPUT_SDIFF to not show common lines. */ 17918fd37a7SXin LI XTERN bool suppress_common_lines; 18018fd37a7SXin LI 18118fd37a7SXin LI /* The half line width and column 2 offset for OUTPUT_SDIFF. */ 18218fd37a7SXin LI XTERN size_t sdiff_half_width; 18318fd37a7SXin LI XTERN size_t sdiff_column2_offset; 18418fd37a7SXin LI 18518fd37a7SXin LI /* String containing all the command options diff received, 18618fd37a7SXin LI with spaces between and at the beginning but none at the end. 18718fd37a7SXin LI If there were no options given, this string is empty. */ 18818fd37a7SXin LI XTERN char *switch_string; 18918fd37a7SXin LI 19018fd37a7SXin LI /* Use heuristics for better speed with large files with a small 19118fd37a7SXin LI density of changes. */ 19218fd37a7SXin LI XTERN bool speed_large_files; 19318fd37a7SXin LI 19418fd37a7SXin LI /* Patterns that match file names to be excluded. */ 19518fd37a7SXin LI XTERN struct exclude *excluded; 19618fd37a7SXin LI 19718fd37a7SXin LI /* Don't discard lines. This makes things slower (sometimes much 19818fd37a7SXin LI slower) but will find a guaranteed minimal set of changes. */ 19918fd37a7SXin LI XTERN bool minimal; 20018fd37a7SXin LI 20118fd37a7SXin LI /* Name of program the user invoked (for error messages). */ 20218fd37a7SXin LI XTERN char *program_name; 20318fd37a7SXin LI 20418fd37a7SXin LI /* The strftime format to use for time strings. */ 20518fd37a7SXin LI XTERN char const *time_format; 20618fd37a7SXin LI 20718fd37a7SXin LI /* The result of comparison is an "edit script": a chain of `struct change'. 20818fd37a7SXin LI Each `struct change' represents one place where some lines are deleted 20918fd37a7SXin LI and some are inserted. 21018fd37a7SXin LI 21118fd37a7SXin LI LINE0 and LINE1 are the first affected lines in the two files (origin 0). 21218fd37a7SXin LI DELETED is the number of lines deleted here from file 0. 21318fd37a7SXin LI INSERTED is the number of lines inserted here in file 1. 21418fd37a7SXin LI 21518fd37a7SXin LI If DELETED is 0 then LINE0 is the number of the line before 21618fd37a7SXin LI which the insertion was done; vice versa for INSERTED and LINE1. */ 21718fd37a7SXin LI 21818fd37a7SXin LI struct change 21918fd37a7SXin LI { 22018fd37a7SXin LI struct change *link; /* Previous or next edit command */ 22118fd37a7SXin LI lin inserted; /* # lines of file 1 changed here. */ 22218fd37a7SXin LI lin deleted; /* # lines of file 0 changed here. */ 22318fd37a7SXin LI lin line0; /* Line number of 1st deleted line. */ 22418fd37a7SXin LI lin line1; /* Line number of 1st inserted line. */ 22518fd37a7SXin LI bool ignore; /* Flag used in context.c. */ 22618fd37a7SXin LI }; 22718fd37a7SXin LI 22818fd37a7SXin LI /* Structures that describe the input files. */ 22918fd37a7SXin LI 23018fd37a7SXin LI /* Data on one input file being compared. */ 23118fd37a7SXin LI 23218fd37a7SXin LI struct file_data { 23318fd37a7SXin LI int desc; /* File descriptor */ 23418fd37a7SXin LI char const *name; /* File name */ 23518fd37a7SXin LI struct stat stat; /* File status */ 23618fd37a7SXin LI 23718fd37a7SXin LI /* Buffer in which text of file is read. */ 23818fd37a7SXin LI word *buffer; 23918fd37a7SXin LI 24018fd37a7SXin LI /* Allocated size of buffer, in bytes. Always a multiple of 24118fd37a7SXin LI sizeof *buffer. */ 24218fd37a7SXin LI size_t bufsize; 24318fd37a7SXin LI 24418fd37a7SXin LI /* Number of valid bytes now in the buffer. */ 24518fd37a7SXin LI size_t buffered; 24618fd37a7SXin LI 24718fd37a7SXin LI /* Array of pointers to lines in the file. */ 24818fd37a7SXin LI char const **linbuf; 24918fd37a7SXin LI 25018fd37a7SXin LI /* linbuf_base <= buffered_lines <= valid_lines <= alloc_lines. 25118fd37a7SXin LI linebuf[linbuf_base ... buffered_lines - 1] are possibly differing. 25218fd37a7SXin LI linebuf[linbuf_base ... valid_lines - 1] contain valid data. 25318fd37a7SXin LI linebuf[linbuf_base ... alloc_lines - 1] are allocated. */ 25418fd37a7SXin LI lin linbuf_base, buffered_lines, valid_lines, alloc_lines; 25518fd37a7SXin LI 25618fd37a7SXin LI /* Pointer to end of prefix of this file to ignore when hashing. */ 25718fd37a7SXin LI char const *prefix_end; 25818fd37a7SXin LI 25918fd37a7SXin LI /* Count of lines in the prefix. 26018fd37a7SXin LI There are this many lines in the file before linbuf[0]. */ 26118fd37a7SXin LI lin prefix_lines; 26218fd37a7SXin LI 26318fd37a7SXin LI /* Pointer to start of suffix of this file to ignore when hashing. */ 26418fd37a7SXin LI char const *suffix_begin; 26518fd37a7SXin LI 26618fd37a7SXin LI /* Vector, indexed by line number, containing an equivalence code for 26718fd37a7SXin LI each line. It is this vector that is actually compared with that 26818fd37a7SXin LI of another file to generate differences. */ 26918fd37a7SXin LI lin *equivs; 27018fd37a7SXin LI 27118fd37a7SXin LI /* Vector, like the previous one except that 27218fd37a7SXin LI the elements for discarded lines have been squeezed out. */ 27318fd37a7SXin LI lin *undiscarded; 27418fd37a7SXin LI 27518fd37a7SXin LI /* Vector mapping virtual line numbers (not counting discarded lines) 27618fd37a7SXin LI to real ones (counting those lines). Both are origin-0. */ 27718fd37a7SXin LI lin *realindexes; 27818fd37a7SXin LI 27918fd37a7SXin LI /* Total number of nondiscarded lines. */ 28018fd37a7SXin LI lin nondiscarded_lines; 28118fd37a7SXin LI 28218fd37a7SXin LI /* Vector, indexed by real origin-0 line number, 28318fd37a7SXin LI containing 1 for a line that is an insertion or a deletion. 28418fd37a7SXin LI The results of comparison are stored here. */ 28518fd37a7SXin LI char *changed; 28618fd37a7SXin LI 28718fd37a7SXin LI /* 1 if file ends in a line with no final newline. */ 28818fd37a7SXin LI bool missing_newline; 28918fd37a7SXin LI 29018fd37a7SXin LI /* 1 if at end of file. */ 29118fd37a7SXin LI bool eof; 29218fd37a7SXin LI 29318fd37a7SXin LI /* 1 more than the maximum equivalence value used for this or its 29418fd37a7SXin LI sibling file. */ 29518fd37a7SXin LI lin equiv_max; 29618fd37a7SXin LI }; 29718fd37a7SXin LI 29818fd37a7SXin LI /* The file buffer, considered as an array of bytes rather than 29918fd37a7SXin LI as an array of words. */ 30018fd37a7SXin LI #define FILE_BUFFER(f) ((char *) (f)->buffer) 30118fd37a7SXin LI 30218fd37a7SXin LI /* Data on two input files being compared. */ 30318fd37a7SXin LI 30418fd37a7SXin LI struct comparison 30518fd37a7SXin LI { 30618fd37a7SXin LI struct file_data file[2]; 30718fd37a7SXin LI struct comparison const *parent; /* parent, if a recursive comparison */ 30818fd37a7SXin LI }; 30918fd37a7SXin LI 31018fd37a7SXin LI /* Describe the two files currently being compared. */ 31118fd37a7SXin LI 31218fd37a7SXin LI XTERN struct file_data files[2]; 31318fd37a7SXin LI 31418fd37a7SXin LI /* Stdio stream to output diffs to. */ 31518fd37a7SXin LI 31618fd37a7SXin LI XTERN FILE *outfile; 31718fd37a7SXin LI 31818fd37a7SXin LI /* Declare various functions. */ 31918fd37a7SXin LI 32018fd37a7SXin LI /* analyze.c */ 32118fd37a7SXin LI int diff_2_files (struct comparison *); 32218fd37a7SXin LI 32318fd37a7SXin LI /* context.c */ 32418fd37a7SXin LI void print_context_header (struct file_data[], bool); 32518fd37a7SXin LI void print_context_script (struct change *, bool); 32618fd37a7SXin LI 32718fd37a7SXin LI /* dir.c */ 32818fd37a7SXin LI int diff_dirs (struct comparison const *, int (*) (struct comparison const *, char const *, char const *)); 32918fd37a7SXin LI 33018fd37a7SXin LI /* ed.c */ 33118fd37a7SXin LI void print_ed_script (struct change *); 33218fd37a7SXin LI void pr_forward_ed_script (struct change *); 33318fd37a7SXin LI 33418fd37a7SXin LI /* ifdef.c */ 33518fd37a7SXin LI void print_ifdef_script (struct change *); 33618fd37a7SXin LI 33718fd37a7SXin LI /* io.c */ 33818fd37a7SXin LI void file_block_read (struct file_data *, size_t); 33918fd37a7SXin LI bool read_files (struct file_data[], bool); 34018fd37a7SXin LI 34118fd37a7SXin LI /* normal.c */ 34218fd37a7SXin LI void print_normal_script (struct change *); 34318fd37a7SXin LI 34418fd37a7SXin LI /* rcs.c */ 34518fd37a7SXin LI void print_rcs_script (struct change *); 34618fd37a7SXin LI 34718fd37a7SXin LI /* side.c */ 34818fd37a7SXin LI void print_sdiff_script (struct change *); 34918fd37a7SXin LI 35018fd37a7SXin LI /* util.c */ 35118fd37a7SXin LI extern char const change_letter[4]; 35218fd37a7SXin LI extern char const pr_program[]; 35318fd37a7SXin LI char *concat (char const *, char const *, char const *); 35418fd37a7SXin LI char *dir_file_pathname (char const *, char const *); 35518fd37a7SXin LI bool lines_differ (char const *, char const *); 35618fd37a7SXin LI lin translate_line_number (struct file_data const *, lin); 35718fd37a7SXin LI struct change *find_change (struct change *); 35818fd37a7SXin LI struct change *find_reverse_change (struct change *); 35918fd37a7SXin LI void *zalloc (size_t); 36018fd37a7SXin LI enum changes analyze_hunk (struct change *, lin *, lin *, lin *, lin *); 36118fd37a7SXin LI void begin_output (void); 36218fd37a7SXin LI void debug_script (struct change *); 36318fd37a7SXin LI void fatal (char const *) __attribute__((noreturn)); 36418fd37a7SXin LI void finish_output (void); 36518fd37a7SXin LI void message (char const *, char const *, char const *); 36618fd37a7SXin LI void message5 (char const *, char const *, char const *, char const *, char const *); 36718fd37a7SXin LI void output_1_line (char const *, char const *, char const *, char const *); 36818fd37a7SXin LI void perror_with_name (char const *); 36918fd37a7SXin LI void pfatal_with_name (char const *) __attribute__((noreturn)); 37018fd37a7SXin LI void print_1_line (char const *, char const * const *); 37118fd37a7SXin LI void print_message_queue (void); 37218fd37a7SXin LI void print_number_range (char, struct file_data *, lin, lin); 37318fd37a7SXin LI void print_script (struct change *, struct change * (*) (struct change *), void (*) (struct change *)); 37418fd37a7SXin LI void setup_output (char const *, char const *, bool); 37518fd37a7SXin LI void translate_range (struct file_data const *, lin, lin, long int *, long int *); 376