118fd37a7SXin LI /* File I/O for GNU DIFF.
218fd37a7SXin LI
318fd37a7SXin LI Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002,
418fd37a7SXin LI 2004 Free Software Foundation, Inc.
518fd37a7SXin LI
618fd37a7SXin LI This file is part of GNU DIFF.
718fd37a7SXin LI
818fd37a7SXin LI GNU DIFF is free software; you can redistribute it and/or modify
918fd37a7SXin LI it under the terms of the GNU General Public License as published by
1018fd37a7SXin LI the Free Software Foundation; either version 2, or (at your option)
1118fd37a7SXin LI any later version.
1218fd37a7SXin LI
1318fd37a7SXin LI GNU DIFF is distributed in the hope that it will be useful,
1418fd37a7SXin LI but WITHOUT ANY WARRANTY; without even the implied warranty of
1518fd37a7SXin LI MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1618fd37a7SXin LI GNU General Public License for more details.
1718fd37a7SXin LI
1818fd37a7SXin LI You should have received a copy of the GNU General Public License
1918fd37a7SXin LI along with this program; see the file COPYING.
2018fd37a7SXin LI If not, write to the Free Software Foundation,
2118fd37a7SXin LI 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
2218fd37a7SXin LI
2318fd37a7SXin LI #include "diff.h"
2418fd37a7SXin LI #include <cmpbuf.h>
2518fd37a7SXin LI #include <file-type.h>
2618fd37a7SXin LI #include <setmode.h>
2718fd37a7SXin LI #include <xalloc.h>
2818fd37a7SXin LI
2918fd37a7SXin LI /* Rotate an unsigned value to the left. */
3018fd37a7SXin LI #define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
3118fd37a7SXin LI
3218fd37a7SXin LI /* Given a hash value and a new character, return a new hash value. */
3318fd37a7SXin LI #define HASH(h, c) ((c) + ROL (h, 7))
3418fd37a7SXin LI
3518fd37a7SXin LI /* The type of a hash value. */
3618fd37a7SXin LI typedef size_t hash_value;
3718fd37a7SXin LI verify (hash_value_is_unsigned, ! TYPE_SIGNED (hash_value));
3818fd37a7SXin LI
3918fd37a7SXin LI /* Lines are put into equivalence classes of lines that match in lines_differ.
4018fd37a7SXin LI Each equivalence class is represented by one of these structures,
4118fd37a7SXin LI but only while the classes are being computed.
4218fd37a7SXin LI Afterward, each class is represented by a number. */
4318fd37a7SXin LI struct equivclass
4418fd37a7SXin LI {
4518fd37a7SXin LI lin next; /* Next item in this bucket. */
4618fd37a7SXin LI hash_value hash; /* Hash of lines in this class. */
4718fd37a7SXin LI char const *line; /* A line that fits this class. */
4818fd37a7SXin LI size_t length; /* That line's length, not counting its newline. */
4918fd37a7SXin LI };
5018fd37a7SXin LI
5118fd37a7SXin LI /* Hash-table: array of buckets, each being a chain of equivalence classes.
5218fd37a7SXin LI buckets[-1] is reserved for incomplete lines. */
5318fd37a7SXin LI static lin *buckets;
5418fd37a7SXin LI
5518fd37a7SXin LI /* Number of buckets in the hash table array, not counting buckets[-1]. */
5618fd37a7SXin LI static size_t nbuckets;
5718fd37a7SXin LI
5818fd37a7SXin LI /* Array in which the equivalence classes are allocated.
5918fd37a7SXin LI The bucket-chains go through the elements in this array.
6018fd37a7SXin LI The number of an equivalence class is its index in this array. */
6118fd37a7SXin LI static struct equivclass *equivs;
6218fd37a7SXin LI
6318fd37a7SXin LI /* Index of first free element in the array `equivs'. */
6418fd37a7SXin LI static lin equivs_index;
6518fd37a7SXin LI
6618fd37a7SXin LI /* Number of elements allocated in the array `equivs'. */
6718fd37a7SXin LI static lin equivs_alloc;
6818fd37a7SXin LI
6918fd37a7SXin LI /* Read a block of data into a file buffer, checking for EOF and error. */
7018fd37a7SXin LI
7118fd37a7SXin LI void
file_block_read(struct file_data * current,size_t size)7218fd37a7SXin LI file_block_read (struct file_data *current, size_t size)
7318fd37a7SXin LI {
7418fd37a7SXin LI if (size && ! current->eof)
7518fd37a7SXin LI {
7618fd37a7SXin LI size_t s = block_read (current->desc,
7718fd37a7SXin LI FILE_BUFFER (current) + current->buffered, size);
7818fd37a7SXin LI if (s == SIZE_MAX)
7918fd37a7SXin LI pfatal_with_name (current->name);
8018fd37a7SXin LI current->buffered += s;
8118fd37a7SXin LI current->eof = s < size;
8218fd37a7SXin LI }
8318fd37a7SXin LI }
8418fd37a7SXin LI
8518fd37a7SXin LI /* Check for binary files and compare them for exact identity. */
8618fd37a7SXin LI
8718fd37a7SXin LI /* Return 1 if BUF contains a non text character.
8818fd37a7SXin LI SIZE is the number of characters in BUF. */
8918fd37a7SXin LI
9018fd37a7SXin LI #define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
9118fd37a7SXin LI
9218fd37a7SXin LI /* Get ready to read the current file.
9318fd37a7SXin LI Return nonzero if SKIP_TEST is zero,
9418fd37a7SXin LI and if it appears to be a binary file. */
9518fd37a7SXin LI
9618fd37a7SXin LI static bool
sip(struct file_data * current,bool skip_test)9718fd37a7SXin LI sip (struct file_data *current, bool skip_test)
9818fd37a7SXin LI {
9918fd37a7SXin LI /* If we have a nonexistent file at this stage, treat it as empty. */
10018fd37a7SXin LI if (current->desc < 0)
10118fd37a7SXin LI {
10218fd37a7SXin LI /* Leave room for a sentinel. */
10318fd37a7SXin LI current->bufsize = sizeof (word);
10418fd37a7SXin LI current->buffer = xmalloc (current->bufsize);
10518fd37a7SXin LI }
10618fd37a7SXin LI else
10718fd37a7SXin LI {
10818fd37a7SXin LI current->bufsize = buffer_lcm (sizeof (word),
10918fd37a7SXin LI STAT_BLOCKSIZE (current->stat),
11018fd37a7SXin LI PTRDIFF_MAX - 2 * sizeof (word));
11118fd37a7SXin LI current->buffer = xmalloc (current->bufsize);
11218fd37a7SXin LI
11318fd37a7SXin LI if (! skip_test)
11418fd37a7SXin LI {
11518fd37a7SXin LI /* Check first part of file to see if it's a binary file. */
11618fd37a7SXin LI
11718fd37a7SXin LI bool was_binary = set_binary_mode (current->desc, true);
11818fd37a7SXin LI off_t buffered;
11918fd37a7SXin LI file_block_read (current, current->bufsize);
12018fd37a7SXin LI buffered = current->buffered;
12118fd37a7SXin LI
12218fd37a7SXin LI if (! was_binary)
12318fd37a7SXin LI {
12418fd37a7SXin LI /* Revert to text mode and seek back to the beginning to
12518fd37a7SXin LI reread the file. Use relative seek, since file
12618fd37a7SXin LI descriptors like stdin might not start at offset
12718fd37a7SXin LI zero. */
12818fd37a7SXin LI
12918fd37a7SXin LI if (lseek (current->desc, - buffered, SEEK_CUR) == -1)
13018fd37a7SXin LI pfatal_with_name (current->name);
13118fd37a7SXin LI set_binary_mode (current->desc, false);
13218fd37a7SXin LI current->buffered = 0;
13318fd37a7SXin LI current->eof = false;
13418fd37a7SXin LI }
13518fd37a7SXin LI
13618fd37a7SXin LI return binary_file_p (current->buffer, buffered);
13718fd37a7SXin LI }
13818fd37a7SXin LI }
13918fd37a7SXin LI
14018fd37a7SXin LI current->buffered = 0;
14118fd37a7SXin LI current->eof = false;
14218fd37a7SXin LI return false;
14318fd37a7SXin LI }
14418fd37a7SXin LI
14518fd37a7SXin LI /* Slurp the rest of the current file completely into memory. */
14618fd37a7SXin LI
14718fd37a7SXin LI static void
slurp(struct file_data * current)14818fd37a7SXin LI slurp (struct file_data *current)
14918fd37a7SXin LI {
15018fd37a7SXin LI size_t cc;
15118fd37a7SXin LI
15218fd37a7SXin LI if (current->desc < 0)
15318fd37a7SXin LI {
15418fd37a7SXin LI /* The file is nonexistent. */
15518fd37a7SXin LI return;
15618fd37a7SXin LI }
15718fd37a7SXin LI
15818fd37a7SXin LI if (S_ISREG (current->stat.st_mode))
15918fd37a7SXin LI {
16018fd37a7SXin LI /* It's a regular file; slurp in the rest all at once. */
16118fd37a7SXin LI
16218fd37a7SXin LI /* Get the size out of the stat block.
16318fd37a7SXin LI Allocate just enough room for appended newline plus word sentinel,
16418fd37a7SXin LI plus word-alignment since we want the buffer word-aligned. */
16518fd37a7SXin LI size_t file_size = current->stat.st_size;
16618fd37a7SXin LI cc = file_size + 2 * sizeof (word) - file_size % sizeof (word);
16718fd37a7SXin LI if (file_size != current->stat.st_size || cc < file_size
16818fd37a7SXin LI || PTRDIFF_MAX <= cc)
16918fd37a7SXin LI xalloc_die ();
17018fd37a7SXin LI
17118fd37a7SXin LI if (current->bufsize < cc)
17218fd37a7SXin LI {
17318fd37a7SXin LI current->bufsize = cc;
17418fd37a7SXin LI current->buffer = xrealloc (current->buffer, cc);
17518fd37a7SXin LI }
17618fd37a7SXin LI
17718fd37a7SXin LI /* Try to read at least 1 more byte than the size indicates, to
17818fd37a7SXin LI detect whether the file is growing. This is a nicety for
17918fd37a7SXin LI users who run 'diff' on files while they are changing. */
18018fd37a7SXin LI
18118fd37a7SXin LI if (current->buffered <= file_size)
18218fd37a7SXin LI {
18318fd37a7SXin LI file_block_read (current, file_size + 1 - current->buffered);
18418fd37a7SXin LI if (current->buffered <= file_size)
18518fd37a7SXin LI return;
18618fd37a7SXin LI }
18718fd37a7SXin LI }
18818fd37a7SXin LI
18918fd37a7SXin LI /* It's not a regular file, or it's a growing regular file; read it,
19018fd37a7SXin LI growing the buffer as needed. */
19118fd37a7SXin LI
19218fd37a7SXin LI file_block_read (current, current->bufsize - current->buffered);
19318fd37a7SXin LI
19418fd37a7SXin LI if (current->buffered)
19518fd37a7SXin LI {
19618fd37a7SXin LI while (current->buffered == current->bufsize)
19718fd37a7SXin LI {
19818fd37a7SXin LI if (PTRDIFF_MAX / 2 - sizeof (word) < current->bufsize)
19918fd37a7SXin LI xalloc_die ();
20018fd37a7SXin LI current->bufsize *= 2;
20118fd37a7SXin LI current->buffer = xrealloc (current->buffer, current->bufsize);
20218fd37a7SXin LI file_block_read (current, current->bufsize - current->buffered);
20318fd37a7SXin LI }
20418fd37a7SXin LI
20518fd37a7SXin LI /* Allocate just enough room for appended newline plus word
20618fd37a7SXin LI sentinel, plus word-alignment. */
20718fd37a7SXin LI cc = current->buffered + 2 * sizeof (word);
20818fd37a7SXin LI current->bufsize = cc - cc % sizeof (word);
20918fd37a7SXin LI current->buffer = xrealloc (current->buffer, current->bufsize);
21018fd37a7SXin LI }
21118fd37a7SXin LI }
21218fd37a7SXin LI
21318fd37a7SXin LI /* Split the file into lines, simultaneously computing the equivalence
21418fd37a7SXin LI class for each line. */
21518fd37a7SXin LI
21618fd37a7SXin LI static void
find_and_hash_each_line(struct file_data * current)21718fd37a7SXin LI find_and_hash_each_line (struct file_data *current)
21818fd37a7SXin LI {
21918fd37a7SXin LI hash_value h;
22018fd37a7SXin LI char const *p = current->prefix_end;
22118fd37a7SXin LI unsigned char c;
22218fd37a7SXin LI lin i, *bucket;
22318fd37a7SXin LI size_t length;
22418fd37a7SXin LI
22518fd37a7SXin LI /* Cache often-used quantities in local variables to help the compiler. */
22618fd37a7SXin LI char const **linbuf = current->linbuf;
22718fd37a7SXin LI lin alloc_lines = current->alloc_lines;
22818fd37a7SXin LI lin line = 0;
22918fd37a7SXin LI lin linbuf_base = current->linbuf_base;
23018fd37a7SXin LI lin *cureqs = xmalloc (alloc_lines * sizeof *cureqs);
23118fd37a7SXin LI struct equivclass *eqs = equivs;
23218fd37a7SXin LI lin eqs_index = equivs_index;
23318fd37a7SXin LI lin eqs_alloc = equivs_alloc;
23418fd37a7SXin LI char const *suffix_begin = current->suffix_begin;
23518fd37a7SXin LI char const *bufend = FILE_BUFFER (current) + current->buffered;
23618fd37a7SXin LI bool diff_length_compare_anyway =
23718fd37a7SXin LI ignore_white_space != IGNORE_NO_WHITE_SPACE;
23818fd37a7SXin LI bool same_length_diff_contents_compare_anyway =
23918fd37a7SXin LI diff_length_compare_anyway | ignore_case;
24018fd37a7SXin LI
24118fd37a7SXin LI while (p < suffix_begin)
24218fd37a7SXin LI {
24318fd37a7SXin LI char const *ip = p;
24418fd37a7SXin LI
24518fd37a7SXin LI h = 0;
24618fd37a7SXin LI
24718fd37a7SXin LI /* Hash this line until we find a newline. */
24818fd37a7SXin LI if (ignore_case)
24918fd37a7SXin LI switch (ignore_white_space)
25018fd37a7SXin LI {
25118fd37a7SXin LI case IGNORE_ALL_SPACE:
25218fd37a7SXin LI while ((c = *p++) != '\n')
25318fd37a7SXin LI if (! isspace (c))
25418fd37a7SXin LI h = HASH (h, tolower (c));
25518fd37a7SXin LI break;
25618fd37a7SXin LI
25718fd37a7SXin LI case IGNORE_SPACE_CHANGE:
25818fd37a7SXin LI while ((c = *p++) != '\n')
25918fd37a7SXin LI {
26018fd37a7SXin LI if (isspace (c))
26118fd37a7SXin LI {
26218fd37a7SXin LI do
26318fd37a7SXin LI if ((c = *p++) == '\n')
26418fd37a7SXin LI goto hashing_done;
26518fd37a7SXin LI while (isspace (c));
26618fd37a7SXin LI
26718fd37a7SXin LI h = HASH (h, ' ');
26818fd37a7SXin LI }
26918fd37a7SXin LI
27018fd37a7SXin LI /* C is now the first non-space. */
27118fd37a7SXin LI h = HASH (h, tolower (c));
27218fd37a7SXin LI }
27318fd37a7SXin LI break;
27418fd37a7SXin LI
27518fd37a7SXin LI case IGNORE_TAB_EXPANSION:
27618fd37a7SXin LI {
27718fd37a7SXin LI size_t column = 0;
27818fd37a7SXin LI while ((c = *p++) != '\n')
27918fd37a7SXin LI {
28018fd37a7SXin LI size_t repetitions = 1;
28118fd37a7SXin LI
28218fd37a7SXin LI switch (c)
28318fd37a7SXin LI {
28418fd37a7SXin LI case '\b':
28518fd37a7SXin LI column -= 0 < column;
28618fd37a7SXin LI break;
28718fd37a7SXin LI
28818fd37a7SXin LI case '\t':
28918fd37a7SXin LI c = ' ';
29018fd37a7SXin LI repetitions = tabsize - column % tabsize;
29118fd37a7SXin LI column = (column + repetitions < column
29218fd37a7SXin LI ? 0
29318fd37a7SXin LI : column + repetitions);
29418fd37a7SXin LI break;
29518fd37a7SXin LI
29618fd37a7SXin LI case '\r':
29718fd37a7SXin LI column = 0;
29818fd37a7SXin LI break;
29918fd37a7SXin LI
30018fd37a7SXin LI default:
30118fd37a7SXin LI c = tolower (c);
30218fd37a7SXin LI column++;
30318fd37a7SXin LI break;
30418fd37a7SXin LI }
30518fd37a7SXin LI
30618fd37a7SXin LI do
30718fd37a7SXin LI h = HASH (h, c);
30818fd37a7SXin LI while (--repetitions != 0);
30918fd37a7SXin LI }
31018fd37a7SXin LI }
31118fd37a7SXin LI break;
31218fd37a7SXin LI
31318fd37a7SXin LI default:
31418fd37a7SXin LI while ((c = *p++) != '\n')
31518fd37a7SXin LI h = HASH (h, tolower (c));
31618fd37a7SXin LI break;
31718fd37a7SXin LI }
31818fd37a7SXin LI else
31918fd37a7SXin LI switch (ignore_white_space)
32018fd37a7SXin LI {
32118fd37a7SXin LI case IGNORE_ALL_SPACE:
32218fd37a7SXin LI while ((c = *p++) != '\n')
32318fd37a7SXin LI if (! isspace (c))
32418fd37a7SXin LI h = HASH (h, c);
32518fd37a7SXin LI break;
32618fd37a7SXin LI
32718fd37a7SXin LI case IGNORE_SPACE_CHANGE:
32818fd37a7SXin LI while ((c = *p++) != '\n')
32918fd37a7SXin LI {
33018fd37a7SXin LI if (isspace (c))
33118fd37a7SXin LI {
33218fd37a7SXin LI do
33318fd37a7SXin LI if ((c = *p++) == '\n')
33418fd37a7SXin LI goto hashing_done;
33518fd37a7SXin LI while (isspace (c));
33618fd37a7SXin LI
33718fd37a7SXin LI h = HASH (h, ' ');
33818fd37a7SXin LI }
33918fd37a7SXin LI
34018fd37a7SXin LI /* C is now the first non-space. */
34118fd37a7SXin LI h = HASH (h, c);
34218fd37a7SXin LI }
34318fd37a7SXin LI break;
34418fd37a7SXin LI
34518fd37a7SXin LI case IGNORE_TAB_EXPANSION:
34618fd37a7SXin LI {
34718fd37a7SXin LI size_t column = 0;
34818fd37a7SXin LI while ((c = *p++) != '\n')
34918fd37a7SXin LI {
35018fd37a7SXin LI size_t repetitions = 1;
35118fd37a7SXin LI
35218fd37a7SXin LI switch (c)
35318fd37a7SXin LI {
35418fd37a7SXin LI case '\b':
35518fd37a7SXin LI column -= 0 < column;
35618fd37a7SXin LI break;
35718fd37a7SXin LI
35818fd37a7SXin LI case '\t':
35918fd37a7SXin LI c = ' ';
36018fd37a7SXin LI repetitions = tabsize - column % tabsize;
36118fd37a7SXin LI column = (column + repetitions < column
36218fd37a7SXin LI ? 0
36318fd37a7SXin LI : column + repetitions);
36418fd37a7SXin LI break;
36518fd37a7SXin LI
36618fd37a7SXin LI case '\r':
36718fd37a7SXin LI column = 0;
36818fd37a7SXin LI break;
36918fd37a7SXin LI
37018fd37a7SXin LI default:
37118fd37a7SXin LI column++;
37218fd37a7SXin LI break;
37318fd37a7SXin LI }
37418fd37a7SXin LI
37518fd37a7SXin LI do
37618fd37a7SXin LI h = HASH (h, c);
37718fd37a7SXin LI while (--repetitions != 0);
37818fd37a7SXin LI }
37918fd37a7SXin LI }
38018fd37a7SXin LI break;
38118fd37a7SXin LI
38218fd37a7SXin LI default:
38318fd37a7SXin LI while ((c = *p++) != '\n')
38418fd37a7SXin LI h = HASH (h, c);
38518fd37a7SXin LI break;
38618fd37a7SXin LI }
38718fd37a7SXin LI
38818fd37a7SXin LI hashing_done:;
38918fd37a7SXin LI
39018fd37a7SXin LI bucket = &buckets[h % nbuckets];
39118fd37a7SXin LI length = p - ip - 1;
39218fd37a7SXin LI
39318fd37a7SXin LI if (p == bufend
39418fd37a7SXin LI && current->missing_newline
39518fd37a7SXin LI && ROBUST_OUTPUT_STYLE (output_style))
39618fd37a7SXin LI {
39718fd37a7SXin LI /* This line is incomplete. If this is significant,
39818fd37a7SXin LI put the line into buckets[-1]. */
39918fd37a7SXin LI if (ignore_white_space < IGNORE_SPACE_CHANGE)
40018fd37a7SXin LI bucket = &buckets[-1];
40118fd37a7SXin LI
40218fd37a7SXin LI /* Omit the inserted newline when computing linbuf later. */
40318fd37a7SXin LI p--;
40418fd37a7SXin LI bufend = suffix_begin = p;
40518fd37a7SXin LI }
40618fd37a7SXin LI
40718fd37a7SXin LI for (i = *bucket; ; i = eqs[i].next)
40818fd37a7SXin LI if (!i)
40918fd37a7SXin LI {
41018fd37a7SXin LI /* Create a new equivalence class in this bucket. */
41118fd37a7SXin LI i = eqs_index++;
41218fd37a7SXin LI if (i == eqs_alloc)
41318fd37a7SXin LI {
41418fd37a7SXin LI if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc)
41518fd37a7SXin LI xalloc_die ();
41618fd37a7SXin LI eqs_alloc *= 2;
41718fd37a7SXin LI eqs = xrealloc (eqs, eqs_alloc * sizeof *eqs);
41818fd37a7SXin LI }
41918fd37a7SXin LI eqs[i].next = *bucket;
42018fd37a7SXin LI eqs[i].hash = h;
42118fd37a7SXin LI eqs[i].line = ip;
42218fd37a7SXin LI eqs[i].length = length;
42318fd37a7SXin LI *bucket = i;
42418fd37a7SXin LI break;
42518fd37a7SXin LI }
42618fd37a7SXin LI else if (eqs[i].hash == h)
42718fd37a7SXin LI {
42818fd37a7SXin LI char const *eqline = eqs[i].line;
42918fd37a7SXin LI
43018fd37a7SXin LI /* Reuse existing class if lines_differ reports the lines
43118fd37a7SXin LI equal. */
43218fd37a7SXin LI if (eqs[i].length == length)
43318fd37a7SXin LI {
43418fd37a7SXin LI /* Reuse existing equivalence class if the lines are identical.
43518fd37a7SXin LI This detects the common case of exact identity
43618fd37a7SXin LI faster than lines_differ would. */
43718fd37a7SXin LI if (memcmp (eqline, ip, length) == 0)
43818fd37a7SXin LI break;
43918fd37a7SXin LI if (!same_length_diff_contents_compare_anyway)
44018fd37a7SXin LI continue;
44118fd37a7SXin LI }
44218fd37a7SXin LI else if (!diff_length_compare_anyway)
44318fd37a7SXin LI continue;
44418fd37a7SXin LI
44518fd37a7SXin LI if (! lines_differ (eqline, ip))
44618fd37a7SXin LI break;
44718fd37a7SXin LI }
44818fd37a7SXin LI
44918fd37a7SXin LI /* Maybe increase the size of the line table. */
45018fd37a7SXin LI if (line == alloc_lines)
45118fd37a7SXin LI {
45218fd37a7SXin LI /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
45318fd37a7SXin LI if (PTRDIFF_MAX / 3 <= alloc_lines
45418fd37a7SXin LI || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
45518fd37a7SXin LI || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
45618fd37a7SXin LI xalloc_die ();
45718fd37a7SXin LI alloc_lines = 2 * alloc_lines - linbuf_base;
45818fd37a7SXin LI cureqs = xrealloc (cureqs, alloc_lines * sizeof *cureqs);
45918fd37a7SXin LI linbuf += linbuf_base;
46018fd37a7SXin LI linbuf = xrealloc (linbuf,
46118fd37a7SXin LI (alloc_lines - linbuf_base) * sizeof *linbuf);
46218fd37a7SXin LI linbuf -= linbuf_base;
46318fd37a7SXin LI }
46418fd37a7SXin LI linbuf[line] = ip;
46518fd37a7SXin LI cureqs[line] = i;
46618fd37a7SXin LI ++line;
46718fd37a7SXin LI }
46818fd37a7SXin LI
46918fd37a7SXin LI current->buffered_lines = line;
47018fd37a7SXin LI
47118fd37a7SXin LI for (i = 0; ; i++)
47218fd37a7SXin LI {
47318fd37a7SXin LI /* Record the line start for lines in the suffix that we care about.
47418fd37a7SXin LI Record one more line start than lines,
47518fd37a7SXin LI so that we can compute the length of any buffered line. */
47618fd37a7SXin LI if (line == alloc_lines)
47718fd37a7SXin LI {
47818fd37a7SXin LI /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
47918fd37a7SXin LI if (PTRDIFF_MAX / 3 <= alloc_lines
48018fd37a7SXin LI || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
48118fd37a7SXin LI || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
48218fd37a7SXin LI xalloc_die ();
48318fd37a7SXin LI alloc_lines = 2 * alloc_lines - linbuf_base;
48418fd37a7SXin LI linbuf += linbuf_base;
48518fd37a7SXin LI linbuf = xrealloc (linbuf,
48618fd37a7SXin LI (alloc_lines - linbuf_base) * sizeof *linbuf);
48718fd37a7SXin LI linbuf -= linbuf_base;
48818fd37a7SXin LI }
48918fd37a7SXin LI linbuf[line] = p;
49018fd37a7SXin LI
49118fd37a7SXin LI if (p == bufend)
49218fd37a7SXin LI break;
49318fd37a7SXin LI
49418fd37a7SXin LI if (context <= i && no_diff_means_no_output)
49518fd37a7SXin LI break;
49618fd37a7SXin LI
49718fd37a7SXin LI line++;
49818fd37a7SXin LI
49918fd37a7SXin LI while (*p++ != '\n')
50018fd37a7SXin LI continue;
50118fd37a7SXin LI }
50218fd37a7SXin LI
50318fd37a7SXin LI /* Done with cache in local variables. */
50418fd37a7SXin LI current->linbuf = linbuf;
50518fd37a7SXin LI current->valid_lines = line;
50618fd37a7SXin LI current->alloc_lines = alloc_lines;
50718fd37a7SXin LI current->equivs = cureqs;
50818fd37a7SXin LI equivs = eqs;
50918fd37a7SXin LI equivs_alloc = eqs_alloc;
51018fd37a7SXin LI equivs_index = eqs_index;
51118fd37a7SXin LI }
51218fd37a7SXin LI
51318fd37a7SXin LI /* Prepare the text. Make sure the text end is initialized.
51418fd37a7SXin LI Make sure text ends in a newline,
51518fd37a7SXin LI but remember that we had to add one.
51618fd37a7SXin LI Strip trailing CRs, if that was requested. */
51718fd37a7SXin LI
51818fd37a7SXin LI static void
prepare_text(struct file_data * current)51918fd37a7SXin LI prepare_text (struct file_data *current)
52018fd37a7SXin LI {
52118fd37a7SXin LI size_t buffered = current->buffered;
52218fd37a7SXin LI char *p = FILE_BUFFER (current);
52318fd37a7SXin LI char *dst;
52418fd37a7SXin LI
52518fd37a7SXin LI if (buffered == 0 || p[buffered - 1] == '\n')
52618fd37a7SXin LI current->missing_newline = false;
52718fd37a7SXin LI else
52818fd37a7SXin LI {
52918fd37a7SXin LI p[buffered++] = '\n';
53018fd37a7SXin LI current->missing_newline = true;
53118fd37a7SXin LI }
53218fd37a7SXin LI
53318fd37a7SXin LI if (!p)
53418fd37a7SXin LI return;
53518fd37a7SXin LI
53618fd37a7SXin LI /* Don't use uninitialized storage when planting or using sentinels. */
53718fd37a7SXin LI memset (p + buffered, 0, sizeof (word));
53818fd37a7SXin LI
53918fd37a7SXin LI if (strip_trailing_cr && (dst = memchr (p, '\r', buffered)))
54018fd37a7SXin LI {
54118fd37a7SXin LI char const *src = dst;
54218fd37a7SXin LI char const *srclim = p + buffered;
54318fd37a7SXin LI
54418fd37a7SXin LI do
54518fd37a7SXin LI dst += ! ((*dst = *src++) == '\r' && *src == '\n');
54618fd37a7SXin LI while (src < srclim);
54718fd37a7SXin LI
54818fd37a7SXin LI buffered -= src - dst;
54918fd37a7SXin LI }
55018fd37a7SXin LI
55118fd37a7SXin LI current->buffered = buffered;
55218fd37a7SXin LI }
55318fd37a7SXin LI
55418fd37a7SXin LI /* We have found N lines in a buffer of size S; guess the
55518fd37a7SXin LI proportionate number of lines that will be found in a buffer of
55618fd37a7SXin LI size T. However, do not guess a number of lines so large that the
55718fd37a7SXin LI resulting line table might cause overflow in size calculations. */
55818fd37a7SXin LI static lin
guess_lines(lin n,size_t s,size_t t)55918fd37a7SXin LI guess_lines (lin n, size_t s, size_t t)
56018fd37a7SXin LI {
56118fd37a7SXin LI size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
56218fd37a7SXin LI lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
56318fd37a7SXin LI return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5) + 5;
56418fd37a7SXin LI }
56518fd37a7SXin LI
56618fd37a7SXin LI /* Given a vector of two file_data objects, find the identical
56718fd37a7SXin LI prefixes and suffixes of each object. */
56818fd37a7SXin LI
56918fd37a7SXin LI static void
find_identical_ends(struct file_data filevec[])57018fd37a7SXin LI find_identical_ends (struct file_data filevec[])
57118fd37a7SXin LI {
57218fd37a7SXin LI word *w0, *w1;
57318fd37a7SXin LI char *p0, *p1, *buffer0, *buffer1;
57418fd37a7SXin LI char const *end0, *beg0;
57518fd37a7SXin LI char const **linbuf0, **linbuf1;
57618fd37a7SXin LI lin i, lines;
57718fd37a7SXin LI size_t n0, n1;
57818fd37a7SXin LI lin alloc_lines0, alloc_lines1;
57918fd37a7SXin LI lin buffered_prefix, prefix_count, prefix_mask;
58018fd37a7SXin LI lin middle_guess, suffix_guess;
58118fd37a7SXin LI
58218fd37a7SXin LI slurp (&filevec[0]);
58318fd37a7SXin LI prepare_text (&filevec[0]);
58418fd37a7SXin LI if (filevec[0].desc != filevec[1].desc)
58518fd37a7SXin LI {
58618fd37a7SXin LI slurp (&filevec[1]);
58718fd37a7SXin LI prepare_text (&filevec[1]);
58818fd37a7SXin LI }
58918fd37a7SXin LI else
59018fd37a7SXin LI {
59118fd37a7SXin LI filevec[1].buffer = filevec[0].buffer;
59218fd37a7SXin LI filevec[1].bufsize = filevec[0].bufsize;
59318fd37a7SXin LI filevec[1].buffered = filevec[0].buffered;
59418fd37a7SXin LI filevec[1].missing_newline = filevec[0].missing_newline;
59518fd37a7SXin LI }
59618fd37a7SXin LI
59718fd37a7SXin LI /* Find identical prefix. */
59818fd37a7SXin LI
59918fd37a7SXin LI w0 = filevec[0].buffer;
60018fd37a7SXin LI w1 = filevec[1].buffer;
60118fd37a7SXin LI p0 = buffer0 = (char *) w0;
60218fd37a7SXin LI p1 = buffer1 = (char *) w1;
60318fd37a7SXin LI n0 = filevec[0].buffered;
60418fd37a7SXin LI n1 = filevec[1].buffered;
60518fd37a7SXin LI
60618fd37a7SXin LI if (p0 == p1)
60718fd37a7SXin LI /* The buffers are the same; sentinels won't work. */
60818fd37a7SXin LI p0 = p1 += n1;
60918fd37a7SXin LI else
61018fd37a7SXin LI {
61118fd37a7SXin LI /* Insert end sentinels, in this case characters that are guaranteed
61218fd37a7SXin LI to make the equality test false, and thus terminate the loop. */
61318fd37a7SXin LI
61418fd37a7SXin LI if (n0 < n1)
61518fd37a7SXin LI p0[n0] = ~p1[n0];
61618fd37a7SXin LI else
61718fd37a7SXin LI p1[n1] = ~p0[n1];
61818fd37a7SXin LI
61918fd37a7SXin LI /* Loop until first mismatch, or to the sentinel characters. */
62018fd37a7SXin LI
62118fd37a7SXin LI /* Compare a word at a time for speed. */
62218fd37a7SXin LI while (*w0 == *w1)
62318fd37a7SXin LI w0++, w1++;
62418fd37a7SXin LI
62518fd37a7SXin LI /* Do the last few bytes of comparison a byte at a time. */
62618fd37a7SXin LI p0 = (char *) w0;
62718fd37a7SXin LI p1 = (char *) w1;
62818fd37a7SXin LI while (*p0 == *p1)
62918fd37a7SXin LI p0++, p1++;
63018fd37a7SXin LI
63118fd37a7SXin LI /* Don't mistakenly count missing newline as part of prefix. */
63218fd37a7SXin LI if (ROBUST_OUTPUT_STYLE (output_style)
63318fd37a7SXin LI && ((buffer0 + n0 - filevec[0].missing_newline < p0)
63418fd37a7SXin LI !=
63518fd37a7SXin LI (buffer1 + n1 - filevec[1].missing_newline < p1)))
63618fd37a7SXin LI p0--, p1--;
63718fd37a7SXin LI }
63818fd37a7SXin LI
63918fd37a7SXin LI /* Now P0 and P1 point at the first nonmatching characters. */
64018fd37a7SXin LI
64118fd37a7SXin LI /* Skip back to last line-beginning in the prefix,
64218fd37a7SXin LI and then discard up to HORIZON_LINES lines from the prefix. */
64318fd37a7SXin LI i = horizon_lines;
64418fd37a7SXin LI while (p0 != buffer0 && (p0[-1] != '\n' || i--))
64518fd37a7SXin LI p0--, p1--;
64618fd37a7SXin LI
64718fd37a7SXin LI /* Record the prefix. */
64818fd37a7SXin LI filevec[0].prefix_end = p0;
64918fd37a7SXin LI filevec[1].prefix_end = p1;
65018fd37a7SXin LI
65118fd37a7SXin LI /* Find identical suffix. */
65218fd37a7SXin LI
65318fd37a7SXin LI /* P0 and P1 point beyond the last chars not yet compared. */
65418fd37a7SXin LI p0 = buffer0 + n0;
65518fd37a7SXin LI p1 = buffer1 + n1;
65618fd37a7SXin LI
65718fd37a7SXin LI if (! ROBUST_OUTPUT_STYLE (output_style)
65818fd37a7SXin LI || filevec[0].missing_newline == filevec[1].missing_newline)
65918fd37a7SXin LI {
66018fd37a7SXin LI end0 = p0; /* Addr of last char in file 0. */
66118fd37a7SXin LI
66218fd37a7SXin LI /* Get value of P0 at which we should stop scanning backward:
66318fd37a7SXin LI this is when either P0 or P1 points just past the last char
66418fd37a7SXin LI of the identical prefix. */
66518fd37a7SXin LI beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
66618fd37a7SXin LI
66718fd37a7SXin LI /* Scan back until chars don't match or we reach that point. */
66818fd37a7SXin LI for (; p0 != beg0; p0--, p1--)
66918fd37a7SXin LI if (*p0 != *p1)
67018fd37a7SXin LI {
67118fd37a7SXin LI /* Point at the first char of the matching suffix. */
67218fd37a7SXin LI beg0 = p0;
67318fd37a7SXin LI break;
67418fd37a7SXin LI }
67518fd37a7SXin LI
67618fd37a7SXin LI /* Are we at a line-beginning in both files? If not, add the rest of
67718fd37a7SXin LI this line to the main body. Discard up to HORIZON_LINES lines from
67818fd37a7SXin LI the identical suffix. Also, discard one extra line,
67918fd37a7SXin LI because shift_boundaries may need it. */
68018fd37a7SXin LI i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
68118fd37a7SXin LI &&
68218fd37a7SXin LI (buffer1 == p1 || p1[-1] == '\n'));
68318fd37a7SXin LI while (i-- && p0 != end0)
68418fd37a7SXin LI while (*p0++ != '\n')
68518fd37a7SXin LI continue;
68618fd37a7SXin LI
68718fd37a7SXin LI p1 += p0 - beg0;
68818fd37a7SXin LI }
68918fd37a7SXin LI
69018fd37a7SXin LI /* Record the suffix. */
69118fd37a7SXin LI filevec[0].suffix_begin = p0;
69218fd37a7SXin LI filevec[1].suffix_begin = p1;
69318fd37a7SXin LI
69418fd37a7SXin LI /* Calculate number of lines of prefix to save.
69518fd37a7SXin LI
69618fd37a7SXin LI prefix_count == 0 means save the whole prefix;
69718fd37a7SXin LI we need this for options like -D that output the whole file,
69818fd37a7SXin LI or for enormous contexts (to avoid worrying about arithmetic overflow).
69918fd37a7SXin LI We also need it for options like -F that output some preceding line;
70018fd37a7SXin LI at least we will need to find the last few lines,
70118fd37a7SXin LI but since we don't know how many, it's easiest to find them all.
70218fd37a7SXin LI
70318fd37a7SXin LI Otherwise, prefix_count != 0. Save just prefix_count lines at start
70418fd37a7SXin LI of the line buffer; they'll be moved to the proper location later.
70518fd37a7SXin LI Handle 1 more line than the context says (because we count 1 too many),
70618fd37a7SXin LI rounded up to the next power of 2 to speed index computation. */
70718fd37a7SXin LI
70818fd37a7SXin LI if (no_diff_means_no_output && ! function_regexp.fastmap
70918fd37a7SXin LI && context < LIN_MAX / 4 && context < n0)
71018fd37a7SXin LI {
71118fd37a7SXin LI middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end);
71218fd37a7SXin LI suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0);
71318fd37a7SXin LI for (prefix_count = 1; prefix_count <= context; prefix_count *= 2)
71418fd37a7SXin LI continue;
71518fd37a7SXin LI alloc_lines0 = (prefix_count + middle_guess
71618fd37a7SXin LI + MIN (context, suffix_guess));
71718fd37a7SXin LI }
71818fd37a7SXin LI else
71918fd37a7SXin LI {
72018fd37a7SXin LI prefix_count = 0;
72118fd37a7SXin LI alloc_lines0 = guess_lines (0, 0, n0);
72218fd37a7SXin LI }
72318fd37a7SXin LI
72418fd37a7SXin LI prefix_mask = prefix_count - 1;
72518fd37a7SXin LI lines = 0;
72618fd37a7SXin LI linbuf0 = xmalloc (alloc_lines0 * sizeof *linbuf0);
72718fd37a7SXin LI p0 = buffer0;
72818fd37a7SXin LI
72918fd37a7SXin LI /* If the prefix is needed, find the prefix lines. */
73018fd37a7SXin LI if (! (no_diff_means_no_output
73118fd37a7SXin LI && filevec[0].prefix_end == p0
73218fd37a7SXin LI && filevec[1].prefix_end == p1))
73318fd37a7SXin LI {
73418fd37a7SXin LI end0 = filevec[0].prefix_end;
73518fd37a7SXin LI while (p0 != end0)
73618fd37a7SXin LI {
73718fd37a7SXin LI lin l = lines++ & prefix_mask;
73818fd37a7SXin LI if (l == alloc_lines0)
73918fd37a7SXin LI {
74018fd37a7SXin LI if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0)
74118fd37a7SXin LI xalloc_die ();
74218fd37a7SXin LI alloc_lines0 *= 2;
74318fd37a7SXin LI linbuf0 = xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0);
74418fd37a7SXin LI }
74518fd37a7SXin LI linbuf0[l] = p0;
74618fd37a7SXin LI while (*p0++ != '\n')
74718fd37a7SXin LI continue;
74818fd37a7SXin LI }
74918fd37a7SXin LI }
75018fd37a7SXin LI buffered_prefix = prefix_count && context < lines ? context : lines;
75118fd37a7SXin LI
75218fd37a7SXin LI /* Allocate line buffer 1. */
75318fd37a7SXin LI
75418fd37a7SXin LI middle_guess = guess_lines (lines, p0 - buffer0, p1 - filevec[1].prefix_end);
75518fd37a7SXin LI suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1);
75618fd37a7SXin LI alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
75718fd37a7SXin LI if (alloc_lines1 < buffered_prefix
75818fd37a7SXin LI || PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1)
75918fd37a7SXin LI xalloc_die ();
76018fd37a7SXin LI linbuf1 = xmalloc (alloc_lines1 * sizeof *linbuf1);
76118fd37a7SXin LI
76218fd37a7SXin LI if (buffered_prefix != lines)
76318fd37a7SXin LI {
76418fd37a7SXin LI /* Rotate prefix lines to proper location. */
76518fd37a7SXin LI for (i = 0; i < buffered_prefix; i++)
76618fd37a7SXin LI linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
76718fd37a7SXin LI for (i = 0; i < buffered_prefix; i++)
76818fd37a7SXin LI linbuf0[i] = linbuf1[i];
76918fd37a7SXin LI }
77018fd37a7SXin LI
77118fd37a7SXin LI /* Initialize line buffer 1 from line buffer 0. */
77218fd37a7SXin LI for (i = 0; i < buffered_prefix; i++)
77318fd37a7SXin LI linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
77418fd37a7SXin LI
77518fd37a7SXin LI /* Record the line buffer, adjusted so that
77618fd37a7SXin LI linbuf[0] points at the first differing line. */
77718fd37a7SXin LI filevec[0].linbuf = linbuf0 + buffered_prefix;
77818fd37a7SXin LI filevec[1].linbuf = linbuf1 + buffered_prefix;
77918fd37a7SXin LI filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
78018fd37a7SXin LI filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
78118fd37a7SXin LI filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
78218fd37a7SXin LI filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
78318fd37a7SXin LI }
78418fd37a7SXin LI
78518fd37a7SXin LI /* If 1 < k, then (2**k - prime_offset[k]) is the largest prime less
78618fd37a7SXin LI than 2**k. This table is derived from Chris K. Caldwell's list
78718fd37a7SXin LI <http://www.utm.edu/research/primes/lists/2small/>. */
78818fd37a7SXin LI
78918fd37a7SXin LI static unsigned char const prime_offset[] =
79018fd37a7SXin LI {
79118fd37a7SXin LI 0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3,
79218fd37a7SXin LI 15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21,
79318fd37a7SXin LI 11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27,
79418fd37a7SXin LI 55, 93, 1, 57, 25
79518fd37a7SXin LI };
79618fd37a7SXin LI
79718fd37a7SXin LI /* Verify that this host's size_t is not too wide for the above table. */
79818fd37a7SXin LI
79918fd37a7SXin LI verify (enough_prime_offsets,
80018fd37a7SXin LI sizeof (size_t) * CHAR_BIT <= sizeof prime_offset);
80118fd37a7SXin LI
80218fd37a7SXin LI /* Given a vector of two file_data objects, read the file associated
80318fd37a7SXin LI with each one, and build the table of equivalence classes.
80418fd37a7SXin LI Return nonzero if either file appears to be a binary file.
80518fd37a7SXin LI If PRETEND_BINARY is nonzero, pretend they are binary regardless. */
80618fd37a7SXin LI
80718fd37a7SXin LI bool
read_files(struct file_data filevec[],bool pretend_binary)80818fd37a7SXin LI read_files (struct file_data filevec[], bool pretend_binary)
80918fd37a7SXin LI {
81018fd37a7SXin LI int i;
81118fd37a7SXin LI bool skip_test = text | pretend_binary;
81218fd37a7SXin LI bool appears_binary = pretend_binary | sip (&filevec[0], skip_test);
81318fd37a7SXin LI
81418fd37a7SXin LI if (filevec[0].desc != filevec[1].desc)
81518fd37a7SXin LI appears_binary |= sip (&filevec[1], skip_test | appears_binary);
81618fd37a7SXin LI else
81718fd37a7SXin LI {
81818fd37a7SXin LI filevec[1].buffer = filevec[0].buffer;
81918fd37a7SXin LI filevec[1].bufsize = filevec[0].bufsize;
82018fd37a7SXin LI filevec[1].buffered = filevec[0].buffered;
82118fd37a7SXin LI }
82218fd37a7SXin LI if (appears_binary)
82318fd37a7SXin LI {
82418fd37a7SXin LI set_binary_mode (filevec[0].desc, true);
82518fd37a7SXin LI set_binary_mode (filevec[1].desc, true);
82618fd37a7SXin LI return true;
82718fd37a7SXin LI }
82818fd37a7SXin LI
82918fd37a7SXin LI find_identical_ends (filevec);
83018fd37a7SXin LI
83118fd37a7SXin LI equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
83218fd37a7SXin LI if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc)
83318fd37a7SXin LI xalloc_die ();
83418fd37a7SXin LI equivs = xmalloc (equivs_alloc * sizeof *equivs);
83518fd37a7SXin LI /* Equivalence class 0 is permanently safe for lines that were not
83618fd37a7SXin LI hashed. Real equivalence classes start at 1. */
83718fd37a7SXin LI equivs_index = 1;
83818fd37a7SXin LI
83918fd37a7SXin LI /* Allocate (one plus) a prime number of hash buckets. Use a prime
84018fd37a7SXin LI number between 1/3 and 2/3 of the value of equiv_allocs,
84118fd37a7SXin LI approximately. */
84218fd37a7SXin LI for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++)
84318fd37a7SXin LI continue;
84418fd37a7SXin LI nbuckets = ((size_t) 1 << i) - prime_offset[i];
84518fd37a7SXin LI if (PTRDIFF_MAX / sizeof *buckets <= nbuckets)
84618fd37a7SXin LI xalloc_die ();
84718fd37a7SXin LI buckets = zalloc ((nbuckets + 1) * sizeof *buckets);
84818fd37a7SXin LI buckets++;
84918fd37a7SXin LI
85018fd37a7SXin LI for (i = 0; i < 2; i++)
85118fd37a7SXin LI find_and_hash_each_line (&filevec[i]);
85218fd37a7SXin LI
85318fd37a7SXin LI filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
85418fd37a7SXin LI
85518fd37a7SXin LI free (equivs);
85618fd37a7SXin LI free (buckets - 1);
85718fd37a7SXin LI
85818fd37a7SXin LI return false;
85918fd37a7SXin LI }
860