diff/src/io.c

18fd37a7SXin LI/* File I/O for GNU DIFF.
18fd37a7SXin LI
18fd37a7SXin LI   Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002,
18fd37a7SXin LI   2004 Free Software Foundation, Inc.
18fd37a7SXin LI
18fd37a7SXin LI   This file is part of GNU DIFF.
18fd37a7SXin LI
18fd37a7SXin LI   GNU DIFF is free software; you can redistribute it and/or modify
18fd37a7SXin LI   it under the terms of the GNU General Public License as published by
18fd37a7SXin LI   the Free Software Foundation; either version 2, or (at your option)
18fd37a7SXin LI   any later version.
18fd37a7SXin LI
18fd37a7SXin LI   GNU DIFF is distributed in the hope that it will be useful,
18fd37a7SXin LI   but WITHOUT ANY WARRANTY; without even the implied warranty of
18fd37a7SXin LI   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18fd37a7SXin LI   GNU General Public License for more details.
18fd37a7SXin LI
18fd37a7SXin LI   You should have received a copy of the GNU General Public License
18fd37a7SXin LI   along with this program; see the file COPYING.
18fd37a7SXin LI   If not, write to the Free Software Foundation,
18fd37a7SXin LI   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
18fd37a7SXin LI
18fd37a7SXin LI#include "diff.h"
18fd37a7SXin LI#include <cmpbuf.h>
18fd37a7SXin LI#include <file-type.h>
18fd37a7SXin LI#include <setmode.h>
18fd37a7SXin LI#include <xalloc.h>
18fd37a7SXin LI
18fd37a7SXin LI/* Rotate an unsigned value to the left.  */
18fd37a7SXin LI#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
18fd37a7SXin LI
18fd37a7SXin LI/* Given a hash value and a new character, return a new hash value.  */
18fd37a7SXin LI#define HASH(h, c) ((c) + ROL (h, 7))
18fd37a7SXin LI
18fd37a7SXin LI/* The type of a hash value.  */
18fd37a7SXin LItypedef size_t hash_value;
18fd37a7SXin LIverify (hash_value_is_unsigned, ! TYPE_SIGNED (hash_value));
18fd37a7SXin LI
18fd37a7SXin LI/* Lines are put into equivalence classes of lines that match in lines_differ.
18fd37a7SXin LI   Each equivalence class is represented by one of these structures,
18fd37a7SXin LI   but only while the classes are being computed.
18fd37a7SXin LI   Afterward, each class is represented by a number.  */
18fd37a7SXin LIstruct equivclass
18fd37a7SXin LI{
18fd37a7SXin LI  lin next;		/* Next item in this bucket.  */
18fd37a7SXin LI  hash_value hash;	/* Hash of lines in this class.  */
18fd37a7SXin LI  char const *line;	/* A line that fits this class.  */
18fd37a7SXin LI  size_t length;	/* That line's length, not counting its newline.  */
18fd37a7SXin LI};
18fd37a7SXin LI
18fd37a7SXin LI/* Hash-table: array of buckets, each being a chain of equivalence classes.
18fd37a7SXin LI   buckets[-1] is reserved for incomplete lines.  */
18fd37a7SXin LIstatic lin *buckets;
18fd37a7SXin LI
18fd37a7SXin LI/* Number of buckets in the hash table array, not counting buckets[-1].  */
18fd37a7SXin LIstatic size_t nbuckets;
18fd37a7SXin LI
18fd37a7SXin LI/* Array in which the equivalence classes are allocated.
18fd37a7SXin LI   The bucket-chains go through the elements in this array.
18fd37a7SXin LI   The number of an equivalence class is its index in this array.  */
18fd37a7SXin LIstatic struct equivclass *equivs;
18fd37a7SXin LI
18fd37a7SXin LI/* Index of first free element in the array `equivs'.  */
18fd37a7SXin LIstatic lin equivs_index;
18fd37a7SXin LI
18fd37a7SXin LI/* Number of elements allocated in the array `equivs'.  */
18fd37a7SXin LIstatic lin equivs_alloc;
18fd37a7SXin LI
18fd37a7SXin LI/* Read a block of data into a file buffer, checking for EOF and error.  */
18fd37a7SXin LI
18fd37a7SXin LIvoid
18fd37a7SXin LIfile_block_read (struct file_data *current, size_t size)
18fd37a7SXin LI{
18fd37a7SXin LI  if (size && ! current->eof)
18fd37a7SXin LI    {
18fd37a7SXin LI      size_t s = block_read (current->desc,
18fd37a7SXin LI			     FILE_BUFFER (current) + current->buffered, size);
18fd37a7SXin LI      if (s == SIZE_MAX)
18fd37a7SXin LI	pfatal_with_name (current->name);
18fd37a7SXin LI      current->buffered += s;
18fd37a7SXin LI      current->eof = s < size;
18fd37a7SXin LI    }
18fd37a7SXin LI}
18fd37a7SXin LI
18fd37a7SXin LI/* Check for binary files and compare them for exact identity.  */
18fd37a7SXin LI
18fd37a7SXin LI/* Return 1 if BUF contains a non text character.
18fd37a7SXin LI   SIZE is the number of characters in BUF.  */
18fd37a7SXin LI
18fd37a7SXin LI#define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
18fd37a7SXin LI
18fd37a7SXin LI/* Get ready to read the current file.
18fd37a7SXin LI   Return nonzero if SKIP_TEST is zero,
18fd37a7SXin LI   and if it appears to be a binary file.  */
18fd37a7SXin LI
18fd37a7SXin LIstatic bool
18fd37a7SXin LIsip (struct file_data *current, bool skip_test)
18fd37a7SXin LI{
18fd37a7SXin LI  /* If we have a nonexistent file at this stage, treat it as empty.  */
18fd37a7SXin LI  if (current->desc < 0)
18fd37a7SXin LI    {
18fd37a7SXin LI      /* Leave room for a sentinel.  */
18fd37a7SXin LI      current->bufsize = sizeof (word);
18fd37a7SXin LI      current->buffer = xmalloc (current->bufsize);
18fd37a7SXin LI    }
18fd37a7SXin LI  else
18fd37a7SXin LI    {
18fd37a7SXin LI      current->bufsize = buffer_lcm (sizeof (word),
18fd37a7SXin LI				     STAT_BLOCKSIZE (current->stat),
18fd37a7SXin LI				     PTRDIFF_MAX - 2 * sizeof (word));
18fd37a7SXin LI      current->buffer = xmalloc (current->bufsize);
18fd37a7SXin LI
18fd37a7SXin LI      if (! skip_test)
18fd37a7SXin LI	{
18fd37a7SXin LI	  /* Check first part of file to see if it's a binary file.  */
18fd37a7SXin LI
18fd37a7SXin LI	  bool was_binary = set_binary_mode (current->desc, true);
18fd37a7SXin LI	  off_t buffered;
18fd37a7SXin LI	  file_block_read (current, current->bufsize);
18fd37a7SXin LI	  buffered = current->buffered;
18fd37a7SXin LI
18fd37a7SXin LI	  if (! was_binary)
18fd37a7SXin LI	    {
18fd37a7SXin LI	      /* Revert to text mode and seek back to the beginning to
18fd37a7SXin LI		 reread the file.  Use relative seek, since file
18fd37a7SXin LI		 descriptors like stdin might not start at offset
18fd37a7SXin LI		 zero.  */
18fd37a7SXin LI
18fd37a7SXin LI	      if (lseek (current->desc, - buffered, SEEK_CUR) == -1)
18fd37a7SXin LI		pfatal_with_name (current->name);
18fd37a7SXin LI	      set_binary_mode (current->desc, false);
18fd37a7SXin LI	      current->buffered = 0;
18fd37a7SXin LI	      current->eof = false;
18fd37a7SXin LI	    }
18fd37a7SXin LI
18fd37a7SXin LI	  return binary_file_p (current->buffer, buffered);
18fd37a7SXin LI	}
18fd37a7SXin LI    }
18fd37a7SXin LI
18fd37a7SXin LI  current->buffered = 0;
18fd37a7SXin LI  current->eof = false;
18fd37a7SXin LI  return false;
18fd37a7SXin LI}
18fd37a7SXin LI
18fd37a7SXin LI/* Slurp the rest of the current file completely into memory.  */
18fd37a7SXin LI
18fd37a7SXin LIstatic void
18fd37a7SXin LIslurp (struct file_data *current)
18fd37a7SXin LI{
18fd37a7SXin LI  size_t cc;
18fd37a7SXin LI
18fd37a7SXin LI  if (current->desc < 0)
18fd37a7SXin LI    {
18fd37a7SXin LI      /* The file is nonexistent.  */
18fd37a7SXin LI      return;
18fd37a7SXin LI    }
18fd37a7SXin LI
18fd37a7SXin LI  if (S_ISREG (current->stat.st_mode))
18fd37a7SXin LI    {
18fd37a7SXin LI      /* It's a regular file; slurp in the rest all at once.  */
18fd37a7SXin LI
18fd37a7SXin LI      /* Get the size out of the stat block.
18fd37a7SXin LI	 Allocate just enough room for appended newline plus word sentinel,
18fd37a7SXin LI	 plus word-alignment since we want the buffer word-aligned.  */
18fd37a7SXin LI      size_t file_size = current->stat.st_size;
18fd37a7SXin LI      cc = file_size + 2 * sizeof (word) - file_size % sizeof (word);
18fd37a7SXin LI      if (file_size != current->stat.st_size || cc < file_size
18fd37a7SXin LI	  || PTRDIFF_MAX <= cc)
18fd37a7SXin LI	xalloc_die ();
18fd37a7SXin LI
18fd37a7SXin LI      if (current->bufsize < cc)
18fd37a7SXin LI	{
18fd37a7SXin LI	  current->bufsize = cc;
18fd37a7SXin LI	  current->buffer = xrealloc (current->buffer, cc);
18fd37a7SXin LI	}
18fd37a7SXin LI
18fd37a7SXin LI      /* Try to read at least 1 more byte than the size indicates, to
18fd37a7SXin LI	 detect whether the file is growing.  This is a nicety for
18fd37a7SXin LI	 users who run 'diff' on files while they are changing.  */
18fd37a7SXin LI
18fd37a7SXin LI      if (current->buffered <= file_size)
18fd37a7SXin LI	{
18fd37a7SXin LI	  file_block_read (current, file_size + 1 - current->buffered);
18fd37a7SXin LI	  if (current->buffered <= file_size)
18fd37a7SXin LI	    return;
18fd37a7SXin LI	}
18fd37a7SXin LI    }
18fd37a7SXin LI
18fd37a7SXin LI  /* It's not a regular file, or it's a growing regular file; read it,
18fd37a7SXin LI     growing the buffer as needed.  */
18fd37a7SXin LI
18fd37a7SXin LI  file_block_read (current, current->bufsize - current->buffered);
18fd37a7SXin LI
18fd37a7SXin LI  if (current->buffered)
18fd37a7SXin LI    {
18fd37a7SXin LI      while (current->buffered == current->bufsize)
18fd37a7SXin LI	{
18fd37a7SXin LI	  if (PTRDIFF_MAX / 2 - sizeof (word) < current->bufsize)
18fd37a7SXin LI	    xalloc_die ();
18fd37a7SXin LI	  current->bufsize *= 2;
18fd37a7SXin LI	  current->buffer = xrealloc (current->buffer, current->bufsize);
18fd37a7SXin LI	  file_block_read (current, current->bufsize - current->buffered);
18fd37a7SXin LI	}
18fd37a7SXin LI
18fd37a7SXin LI      /* Allocate just enough room for appended newline plus word
18fd37a7SXin LI	 sentinel, plus word-alignment.  */
18fd37a7SXin LI      cc = current->buffered + 2 * sizeof (word);
18fd37a7SXin LI      current->bufsize = cc - cc % sizeof (word);
18fd37a7SXin LI      current->buffer = xrealloc (current->buffer, current->bufsize);
18fd37a7SXin LI    }
18fd37a7SXin LI}
18fd37a7SXin LI
18fd37a7SXin LI/* Split the file into lines, simultaneously computing the equivalence
18fd37a7SXin LI   class for each line.  */
18fd37a7SXin LI
18fd37a7SXin LIstatic void
18fd37a7SXin LIfind_and_hash_each_line (struct file_data *current)
18fd37a7SXin LI{
18fd37a7SXin LI  hash_value h;
18fd37a7SXin LI  char const *p = current->prefix_end;
18fd37a7SXin LI  unsigned char c;
18fd37a7SXin LI  lin i, *bucket;
18fd37a7SXin LI  size_t length;
18fd37a7SXin LI
18fd37a7SXin LI  /* Cache often-used quantities in local variables to help the compiler.  */
18fd37a7SXin LI  char const **linbuf = current->linbuf;
18fd37a7SXin LI  lin alloc_lines = current->alloc_lines;
18fd37a7SXin LI  lin line = 0;
18fd37a7SXin LI  lin linbuf_base = current->linbuf_base;
18fd37a7SXin LI  lin *cureqs = xmalloc (alloc_lines * sizeof *cureqs);
18fd37a7SXin LI  struct equivclass *eqs = equivs;
18fd37a7SXin LI  lin eqs_index = equivs_index;
18fd37a7SXin LI  lin eqs_alloc = equivs_alloc;
18fd37a7SXin LI  char const *suffix_begin = current->suffix_begin;
18fd37a7SXin LI  char const *bufend = FILE_BUFFER (current) + current->buffered;
18fd37a7SXin LI  bool diff_length_compare_anyway =
18fd37a7SXin LI    ignore_white_space != IGNORE_NO_WHITE_SPACE;
18fd37a7SXin LI  bool same_length_diff_contents_compare_anyway =
18fd37a7SXin LI    diff_length_compare_anyway | ignore_case;
18fd37a7SXin LI
18fd37a7SXin LI  while (p < suffix_begin)
18fd37a7SXin LI    {
18fd37a7SXin LI      char const *ip = p;
18fd37a7SXin LI
18fd37a7SXin LI      h = 0;
18fd37a7SXin LI
18fd37a7SXin LI      /* Hash this line until we find a newline.  */
18fd37a7SXin LI      if (ignore_case)
18fd37a7SXin LI	switch (ignore_white_space)
18fd37a7SXin LI	  {
18fd37a7SXin LI	  case IGNORE_ALL_SPACE:
18fd37a7SXin LI	    while ((c = *p++) != '\n')
18fd37a7SXin LI	      if (! isspace (c))
18fd37a7SXin LI		h = HASH (h, tolower (c));
18fd37a7SXin LI	    break;
18fd37a7SXin LI
18fd37a7SXin LI	  case IGNORE_SPACE_CHANGE:
18fd37a7SXin LI	    while ((c = *p++) != '\n')
18fd37a7SXin LI	      {
18fd37a7SXin LI		if (isspace (c))
18fd37a7SXin LI		  {
18fd37a7SXin LI		    do
18fd37a7SXin LI		      if ((c = *p++) == '\n')
18fd37a7SXin LI			goto hashing_done;
18fd37a7SXin LI		    while (isspace (c));
18fd37a7SXin LI
18fd37a7SXin LI		    h = HASH (h, ' ');
18fd37a7SXin LI		  }
18fd37a7SXin LI
18fd37a7SXin LI		/* C is now the first non-space.  */
18fd37a7SXin LI		h = HASH (h, tolower (c));
18fd37a7SXin LI	      }
18fd37a7SXin LI	    break;
18fd37a7SXin LI
18fd37a7SXin LI	  case IGNORE_TAB_EXPANSION:
18fd37a7SXin LI	    {
18fd37a7SXin LI	      size_t column = 0;
18fd37a7SXin LI	      while ((c = *p++) != '\n')
18fd37a7SXin LI		{
18fd37a7SXin LI		  size_t repetitions = 1;
18fd37a7SXin LI
18fd37a7SXin LI		  switch (c)
18fd37a7SXin LI		    {
18fd37a7SXin LI		    case '\b':
18fd37a7SXin LI		      column -= 0 < column;
18fd37a7SXin LI		      break;
18fd37a7SXin LI
18fd37a7SXin LI		    case '\t':
18fd37a7SXin LI		      c = ' ';
18fd37a7SXin LI		      repetitions = tabsize - column % tabsize;
18fd37a7SXin LI		      column = (column + repetitions < column
18fd37a7SXin LI				? 0
18fd37a7SXin LI				: column + repetitions);
18fd37a7SXin LI		      break;
18fd37a7SXin LI
18fd37a7SXin LI		    case '\r':
18fd37a7SXin LI		      column = 0;
18fd37a7SXin LI		      break;
18fd37a7SXin LI
18fd37a7SXin LI		    default:
18fd37a7SXin LI		      c = tolower (c);
18fd37a7SXin LI		      column++;
18fd37a7SXin LI		      break;
18fd37a7SXin LI		    }
18fd37a7SXin LI
18fd37a7SXin LI		  do
18fd37a7SXin LI		    h = HASH (h, c);
18fd37a7SXin LI		  while (--repetitions != 0);
18fd37a7SXin LI		}
18fd37a7SXin LI	    }
18fd37a7SXin LI	    break;
18fd37a7SXin LI
18fd37a7SXin LI	  default:
18fd37a7SXin LI	    while ((c = *p++) != '\n')
18fd37a7SXin LI	      h = HASH (h, tolower (c));
18fd37a7SXin LI	    break;
18fd37a7SXin LI	  }
18fd37a7SXin LI      else
18fd37a7SXin LI	switch (ignore_white_space)
18fd37a7SXin LI	  {
18fd37a7SXin LI	  case IGNORE_ALL_SPACE:
18fd37a7SXin LI	    while ((c = *p++) != '\n')
18fd37a7SXin LI	      if (! isspace (c))
18fd37a7SXin LI		h = HASH (h, c);
18fd37a7SXin LI	    break;
18fd37a7SXin LI
18fd37a7SXin LI	  case IGNORE_SPACE_CHANGE:
18fd37a7SXin LI	    while ((c = *p++) != '\n')
18fd37a7SXin LI	      {
18fd37a7SXin LI		if (isspace (c))
18fd37a7SXin LI		  {
18fd37a7SXin LI		    do
18fd37a7SXin LI		      if ((c = *p++) == '\n')
18fd37a7SXin LI			goto hashing_done;
18fd37a7SXin LI		    while (isspace (c));
18fd37a7SXin LI
18fd37a7SXin LI		    h = HASH (h, ' ');
18fd37a7SXin LI		  }
18fd37a7SXin LI
18fd37a7SXin LI		/* C is now the first non-space.  */
18fd37a7SXin LI		h = HASH (h, c);
18fd37a7SXin LI	      }
18fd37a7SXin LI	    break;
18fd37a7SXin LI
18fd37a7SXin LI	  case IGNORE_TAB_EXPANSION:
18fd37a7SXin LI	    {
18fd37a7SXin LI	      size_t column = 0;
18fd37a7SXin LI	      while ((c = *p++) != '\n')
18fd37a7SXin LI		{
18fd37a7SXin LI		  size_t repetitions = 1;
18fd37a7SXin LI
18fd37a7SXin LI		  switch (c)
18fd37a7SXin LI		    {
18fd37a7SXin LI		    case '\b':
18fd37a7SXin LI		      column -= 0 < column;
18fd37a7SXin LI		      break;
18fd37a7SXin LI
18fd37a7SXin LI		    case '\t':
18fd37a7SXin LI		      c = ' ';
18fd37a7SXin LI		      repetitions = tabsize - column % tabsize;
18fd37a7SXin LI		      column = (column + repetitions < column
18fd37a7SXin LI				? 0
18fd37a7SXin LI				: column + repetitions);
18fd37a7SXin LI		      break;
18fd37a7SXin LI
18fd37a7SXin LI		    case '\r':
18fd37a7SXin LI		      column = 0;
18fd37a7SXin LI		      break;
18fd37a7SXin LI
18fd37a7SXin LI		    default:
18fd37a7SXin LI		      column++;
18fd37a7SXin LI		      break;
18fd37a7SXin LI		    }
18fd37a7SXin LI
18fd37a7SXin LI		  do
18fd37a7SXin LI		    h = HASH (h, c);
18fd37a7SXin LI		  while (--repetitions != 0);
18fd37a7SXin LI		}
18fd37a7SXin LI	    }
18fd37a7SXin LI	    break;
18fd37a7SXin LI
18fd37a7SXin LI	  default:
18fd37a7SXin LI	    while ((c = *p++) != '\n')
18fd37a7SXin LI	      h = HASH (h, c);
18fd37a7SXin LI	    break;
18fd37a7SXin LI	  }
18fd37a7SXin LI
18fd37a7SXin LI   hashing_done:;
18fd37a7SXin LI
18fd37a7SXin LI      bucket = &buckets[h % nbuckets];
18fd37a7SXin LI      length = p - ip - 1;
18fd37a7SXin LI
18fd37a7SXin LI      if (p == bufend
18fd37a7SXin LI	  && current->missing_newline
18fd37a7SXin LI	  && ROBUST_OUTPUT_STYLE (output_style))
18fd37a7SXin LI	{
18fd37a7SXin LI	  /* This line is incomplete.  If this is significant,
18fd37a7SXin LI	     put the line into buckets[-1].  */
18fd37a7SXin LI	  if (ignore_white_space < IGNORE_SPACE_CHANGE)
18fd37a7SXin LI	    bucket = &buckets[-1];
18fd37a7SXin LI
18fd37a7SXin LI	  /* Omit the inserted newline when computing linbuf later.  */
18fd37a7SXin LI	  p--;
18fd37a7SXin LI	  bufend = suffix_begin = p;
18fd37a7SXin LI	}
18fd37a7SXin LI
18fd37a7SXin LI      for (i = *bucket;  ;  i = eqs[i].next)
18fd37a7SXin LI	if (!i)
18fd37a7SXin LI	  {
18fd37a7SXin LI	    /* Create a new equivalence class in this bucket.  */
18fd37a7SXin LI	    i = eqs_index++;
18fd37a7SXin LI	    if (i == eqs_alloc)
18fd37a7SXin LI	      {
18fd37a7SXin LI		if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc)
18fd37a7SXin LI		  xalloc_die ();
18fd37a7SXin LI		eqs_alloc *= 2;
18fd37a7SXin LI		eqs = xrealloc (eqs, eqs_alloc * sizeof *eqs);
18fd37a7SXin LI	      }
18fd37a7SXin LI	    eqs[i].next = *bucket;
18fd37a7SXin LI	    eqs[i].hash = h;
18fd37a7SXin LI	    eqs[i].line = ip;
18fd37a7SXin LI	    eqs[i].length = length;
18fd37a7SXin LI	    *bucket = i;
18fd37a7SXin LI	    break;
18fd37a7SXin LI	  }
18fd37a7SXin LI	else if (eqs[i].hash == h)
18fd37a7SXin LI	  {
18fd37a7SXin LI	    char const *eqline = eqs[i].line;
18fd37a7SXin LI
18fd37a7SXin LI	    /* Reuse existing class if lines_differ reports the lines
18fd37a7SXin LI               equal.  */
18fd37a7SXin LI	    if (eqs[i].length == length)
18fd37a7SXin LI	      {
18fd37a7SXin LI		/* Reuse existing equivalence class if the lines are identical.
18fd37a7SXin LI		   This detects the common case of exact identity
18fd37a7SXin LI		   faster than lines_differ would.  */
18fd37a7SXin LI		if (memcmp (eqline, ip, length) == 0)
18fd37a7SXin LI		  break;
18fd37a7SXin LI		if (!same_length_diff_contents_compare_anyway)
18fd37a7SXin LI		  continue;
18fd37a7SXin LI	      }
18fd37a7SXin LI	    else if (!diff_length_compare_anyway)
18fd37a7SXin LI	      continue;
18fd37a7SXin LI
18fd37a7SXin LI	    if (! lines_differ (eqline, ip))
18fd37a7SXin LI	      break;
18fd37a7SXin LI	  }
18fd37a7SXin LI
18fd37a7SXin LI      /* Maybe increase the size of the line table.  */
18fd37a7SXin LI      if (line == alloc_lines)
18fd37a7SXin LI	{
18fd37a7SXin LI	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
18fd37a7SXin LI	  if (PTRDIFF_MAX / 3 <= alloc_lines
18fd37a7SXin LI	      || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
18fd37a7SXin LI	      || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
18fd37a7SXin LI	    xalloc_die ();
18fd37a7SXin LI	  alloc_lines = 2 * alloc_lines - linbuf_base;
18fd37a7SXin LI	  cureqs = xrealloc (cureqs, alloc_lines * sizeof *cureqs);
18fd37a7SXin LI	  linbuf += linbuf_base;
18fd37a7SXin LI	  linbuf = xrealloc (linbuf,
18fd37a7SXin LI			     (alloc_lines - linbuf_base) * sizeof *linbuf);
18fd37a7SXin LI	  linbuf -= linbuf_base;
18fd37a7SXin LI	}
18fd37a7SXin LI      linbuf[line] = ip;
18fd37a7SXin LI      cureqs[line] = i;
18fd37a7SXin LI      ++line;
18fd37a7SXin LI    }
18fd37a7SXin LI
18fd37a7SXin LI  current->buffered_lines = line;
18fd37a7SXin LI
18fd37a7SXin LI  for (i = 0;  ;  i++)
18fd37a7SXin LI    {
18fd37a7SXin LI      /* Record the line start for lines in the suffix that we care about.
18fd37a7SXin LI	 Record one more line start than lines,
18fd37a7SXin LI	 so that we can compute the length of any buffered line.  */
18fd37a7SXin LI      if (line == alloc_lines)
18fd37a7SXin LI	{
18fd37a7SXin LI	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
18fd37a7SXin LI	  if (PTRDIFF_MAX / 3 <= alloc_lines
18fd37a7SXin LI	      || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
18fd37a7SXin LI	      || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
18fd37a7SXin LI	    xalloc_die ();
18fd37a7SXin LI	  alloc_lines = 2 * alloc_lines - linbuf_base;
18fd37a7SXin LI	  linbuf += linbuf_base;
18fd37a7SXin LI	  linbuf = xrealloc (linbuf,
18fd37a7SXin LI			     (alloc_lines - linbuf_base) * sizeof *linbuf);
18fd37a7SXin LI	  linbuf -= linbuf_base;
18fd37a7SXin LI	}
18fd37a7SXin LI      linbuf[line] = p;
18fd37a7SXin LI
18fd37a7SXin LI      if (p == bufend)
18fd37a7SXin LI	break;
18fd37a7SXin LI
18fd37a7SXin LI      if (context <= i && no_diff_means_no_output)
18fd37a7SXin LI	break;
18fd37a7SXin LI
18fd37a7SXin LI      line++;
18fd37a7SXin LI
18fd37a7SXin LI      while (*p++ != '\n')
18fd37a7SXin LI	continue;
18fd37a7SXin LI    }
18fd37a7SXin LI
18fd37a7SXin LI  /* Done with cache in local variables.  */
18fd37a7SXin LI  current->linbuf = linbuf;
18fd37a7SXin LI  current->valid_lines = line;
18fd37a7SXin LI  current->alloc_lines = alloc_lines;
18fd37a7SXin LI  current->equivs = cureqs;
18fd37a7SXin LI  equivs = eqs;
18fd37a7SXin LI  equivs_alloc = eqs_alloc;
18fd37a7SXin LI  equivs_index = eqs_index;
18fd37a7SXin LI}
18fd37a7SXin LI
18fd37a7SXin LI/* Prepare the text.  Make sure the text end is initialized.
18fd37a7SXin LI   Make sure text ends in a newline,
18fd37a7SXin LI   but remember that we had to add one.
18fd37a7SXin LI   Strip trailing CRs, if that was requested.  */
18fd37a7SXin LI
18fd37a7SXin LIstatic void
18fd37a7SXin LIprepare_text (struct file_data *current)
18fd37a7SXin LI{
18fd37a7SXin LI  size_t buffered = current->buffered;
18fd37a7SXin LI  char *p = FILE_BUFFER (current);
18fd37a7SXin LI  char *dst;
18fd37a7SXin LI
18fd37a7SXin LI  if (buffered == 0 || p[buffered - 1] == '\n')
18fd37a7SXin LI    current->missing_newline = false;
18fd37a7SXin LI  else
18fd37a7SXin LI    {
18fd37a7SXin LI      p[buffered++] = '\n';
18fd37a7SXin LI      current->missing_newline = true;
18fd37a7SXin LI    }
18fd37a7SXin LI
18fd37a7SXin LI  if (!p)
18fd37a7SXin LI    return;
18fd37a7SXin LI
18fd37a7SXin LI  /* Don't use uninitialized storage when planting or using sentinels.  */
18fd37a7SXin LI  memset (p + buffered, 0, sizeof (word));
18fd37a7SXin LI
18fd37a7SXin LI  if (strip_trailing_cr && (dst = memchr (p, '\r', buffered)))
18fd37a7SXin LI    {
18fd37a7SXin LI      char const *src = dst;
18fd37a7SXin LI      char const *srclim = p + buffered;
18fd37a7SXin LI
18fd37a7SXin LI      do
18fd37a7SXin LI	dst += ! ((*dst = *src++) == '\r' && *src == '\n');
18fd37a7SXin LI      while (src < srclim);
18fd37a7SXin LI
18fd37a7SXin LI      buffered -= src - dst;
18fd37a7SXin LI    }
18fd37a7SXin LI
18fd37a7SXin LI  current->buffered = buffered;
18fd37a7SXin LI}
18fd37a7SXin LI
18fd37a7SXin LI/* We have found N lines in a buffer of size S; guess the
18fd37a7SXin LI   proportionate number of lines that will be found in a buffer of
18fd37a7SXin LI   size T.  However, do not guess a number of lines so large that the
18fd37a7SXin LI   resulting line table might cause overflow in size calculations.  */
18fd37a7SXin LIstatic lin
18fd37a7SXin LIguess_lines (lin n, size_t s, size_t t)
18fd37a7SXin LI{
18fd37a7SXin LI  size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
18fd37a7SXin LI  lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
18fd37a7SXin LI  return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5) + 5;
18fd37a7SXin LI}
18fd37a7SXin LI
18fd37a7SXin LI/* Given a vector of two file_data objects, find the identical
18fd37a7SXin LI   prefixes and suffixes of each object.  */
18fd37a7SXin LI
18fd37a7SXin LIstatic void
18fd37a7SXin LIfind_identical_ends (struct file_data filevec[])
18fd37a7SXin LI{
18fd37a7SXin LI  word *w0, *w1;
18fd37a7SXin LI  char *p0, *p1, *buffer0, *buffer1;
18fd37a7SXin LI  char const *end0, *beg0;
18fd37a7SXin LI  char const **linbuf0, **linbuf1;
18fd37a7SXin LI  lin i, lines;
18fd37a7SXin LI  size_t n0, n1;
18fd37a7SXin LI  lin alloc_lines0, alloc_lines1;
18fd37a7SXin LI  lin buffered_prefix, prefix_count, prefix_mask;
18fd37a7SXin LI  lin middle_guess, suffix_guess;
18fd37a7SXin LI
18fd37a7SXin LI  slurp (&filevec[0]);
18fd37a7SXin LI  prepare_text (&filevec[0]);
18fd37a7SXin LI  if (filevec[0].desc != filevec[1].desc)
18fd37a7SXin LI    {
18fd37a7SXin LI      slurp (&filevec[1]);
18fd37a7SXin LI      prepare_text (&filevec[1]);
18fd37a7SXin LI    }
18fd37a7SXin LI  else
18fd37a7SXin LI    {
18fd37a7SXin LI      filevec[1].buffer = filevec[0].buffer;
18fd37a7SXin LI      filevec[1].bufsize = filevec[0].bufsize;
18fd37a7SXin LI      filevec[1].buffered = filevec[0].buffered;
18fd37a7SXin LI      filevec[1].missing_newline = filevec[0].missing_newline;
18fd37a7SXin LI    }
18fd37a7SXin LI
18fd37a7SXin LI  /* Find identical prefix.  */
18fd37a7SXin LI
18fd37a7SXin LI  w0 = filevec[0].buffer;
18fd37a7SXin LI  w1 = filevec[1].buffer;
18fd37a7SXin LI  p0 = buffer0 = (char *) w0;
18fd37a7SXin LI  p1 = buffer1 = (char *) w1;
18fd37a7SXin LI  n0 = filevec[0].buffered;
18fd37a7SXin LI  n1 = filevec[1].buffered;
18fd37a7SXin LI
18fd37a7SXin LI  if (p0 == p1)
18fd37a7SXin LI    /* The buffers are the same; sentinels won't work.  */
18fd37a7SXin LI    p0 = p1 += n1;
18fd37a7SXin LI  else
18fd37a7SXin LI    {
18fd37a7SXin LI      /* Insert end sentinels, in this case characters that are guaranteed
18fd37a7SXin LI	 to make the equality test false, and thus terminate the loop.  */
18fd37a7SXin LI
18fd37a7SXin LI      if (n0 < n1)
18fd37a7SXin LI	p0[n0] = ~p1[n0];
18fd37a7SXin LI      else
18fd37a7SXin LI	p1[n1] = ~p0[n1];
18fd37a7SXin LI
18fd37a7SXin LI      /* Loop until first mismatch, or to the sentinel characters.  */
18fd37a7SXin LI
18fd37a7SXin LI      /* Compare a word at a time for speed.  */
18fd37a7SXin LI      while (*w0 == *w1)
18fd37a7SXin LI	w0++, w1++;
18fd37a7SXin LI
18fd37a7SXin LI      /* Do the last few bytes of comparison a byte at a time.  */
18fd37a7SXin LI      p0 = (char *) w0;
18fd37a7SXin LI      p1 = (char *) w1;
18fd37a7SXin LI      while (*p0 == *p1)
18fd37a7SXin LI	p0++, p1++;
18fd37a7SXin LI
18fd37a7SXin LI      /* Don't mistakenly count missing newline as part of prefix.  */
18fd37a7SXin LI      if (ROBUST_OUTPUT_STYLE (output_style)
18fd37a7SXin LI	  && ((buffer0 + n0 - filevec[0].missing_newline < p0)
18fd37a7SXin LI	      !=
18fd37a7SXin LI	      (buffer1 + n1 - filevec[1].missing_newline < p1)))
18fd37a7SXin LI	p0--, p1--;
18fd37a7SXin LI    }
18fd37a7SXin LI
18fd37a7SXin LI  /* Now P0 and P1 point at the first nonmatching characters.  */
18fd37a7SXin LI
18fd37a7SXin LI  /* Skip back to last line-beginning in the prefix,
18fd37a7SXin LI     and then discard up to HORIZON_LINES lines from the prefix.  */
18fd37a7SXin LI  i = horizon_lines;
18fd37a7SXin LI  while (p0 != buffer0 && (p0[-1] != '\n' || i--))
18fd37a7SXin LI    p0--, p1--;
18fd37a7SXin LI
18fd37a7SXin LI  /* Record the prefix.  */
18fd37a7SXin LI  filevec[0].prefix_end = p0;
18fd37a7SXin LI  filevec[1].prefix_end = p1;
18fd37a7SXin LI
18fd37a7SXin LI  /* Find identical suffix.  */
18fd37a7SXin LI
18fd37a7SXin LI  /* P0 and P1 point beyond the last chars not yet compared.  */
18fd37a7SXin LI  p0 = buffer0 + n0;
18fd37a7SXin LI  p1 = buffer1 + n1;
18fd37a7SXin LI
18fd37a7SXin LI  if (! ROBUST_OUTPUT_STYLE (output_style)
18fd37a7SXin LI      || filevec[0].missing_newline == filevec[1].missing_newline)
18fd37a7SXin LI    {
18fd37a7SXin LI      end0 = p0;	/* Addr of last char in file 0.  */
18fd37a7SXin LI
18fd37a7SXin LI      /* Get value of P0 at which we should stop scanning backward:
18fd37a7SXin LI	 this is when either P0 or P1 points just past the last char
18fd37a7SXin LI	 of the identical prefix.  */
18fd37a7SXin LI      beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
18fd37a7SXin LI
18fd37a7SXin LI      /* Scan back until chars don't match or we reach that point.  */
18fd37a7SXin LI      for (; p0 != beg0; p0--, p1--)
18fd37a7SXin LI	if (*p0 != *p1)
18fd37a7SXin LI	  {
18fd37a7SXin LI	    /* Point at the first char of the matching suffix.  */
18fd37a7SXin LI	    beg0 = p0;
18fd37a7SXin LI	    break;
18fd37a7SXin LI	  }
18fd37a7SXin LI
18fd37a7SXin LI      /* Are we at a line-beginning in both files?  If not, add the rest of
18fd37a7SXin LI	 this line to the main body.  Discard up to HORIZON_LINES lines from
18fd37a7SXin LI	 the identical suffix.  Also, discard one extra line,
18fd37a7SXin LI	 because shift_boundaries may need it.  */
18fd37a7SXin LI      i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
18fd37a7SXin LI			    &&
18fd37a7SXin LI			    (buffer1 == p1 || p1[-1] == '\n'));
18fd37a7SXin LI      while (i-- && p0 != end0)
18fd37a7SXin LI	while (*p0++ != '\n')
18fd37a7SXin LI	  continue;
18fd37a7SXin LI
18fd37a7SXin LI      p1 += p0 - beg0;
18fd37a7SXin LI    }
18fd37a7SXin LI
18fd37a7SXin LI  /* Record the suffix.  */
18fd37a7SXin LI  filevec[0].suffix_begin = p0;
18fd37a7SXin LI  filevec[1].suffix_begin = p1;
18fd37a7SXin LI
18fd37a7SXin LI  /* Calculate number of lines of prefix to save.
18fd37a7SXin LI
18fd37a7SXin LI     prefix_count == 0 means save the whole prefix;
18fd37a7SXin LI     we need this for options like -D that output the whole file,
18fd37a7SXin LI     or for enormous contexts (to avoid worrying about arithmetic overflow).
18fd37a7SXin LI     We also need it for options like -F that output some preceding line;
18fd37a7SXin LI     at least we will need to find the last few lines,
18fd37a7SXin LI     but since we don't know how many, it's easiest to find them all.
18fd37a7SXin LI
18fd37a7SXin LI     Otherwise, prefix_count != 0.  Save just prefix_count lines at start
18fd37a7SXin LI     of the line buffer; they'll be moved to the proper location later.
18fd37a7SXin LI     Handle 1 more line than the context says (because we count 1 too many),
18fd37a7SXin LI     rounded up to the next power of 2 to speed index computation.  */
18fd37a7SXin LI
18fd37a7SXin LI  if (no_diff_means_no_output && ! function_regexp.fastmap
18fd37a7SXin LI      && context < LIN_MAX / 4 && context < n0)
18fd37a7SXin LI    {
18fd37a7SXin LI      middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end);
18fd37a7SXin LI      suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0);
18fd37a7SXin LI      for (prefix_count = 1;  prefix_count <= context;  prefix_count *= 2)
18fd37a7SXin LI	continue;
18fd37a7SXin LI      alloc_lines0 = (prefix_count + middle_guess
18fd37a7SXin LI		      + MIN (context, suffix_guess));
18fd37a7SXin LI    }
18fd37a7SXin LI  else
18fd37a7SXin LI    {
18fd37a7SXin LI      prefix_count = 0;
18fd37a7SXin LI      alloc_lines0 = guess_lines (0, 0, n0);
18fd37a7SXin LI    }
18fd37a7SXin LI
18fd37a7SXin LI  prefix_mask = prefix_count - 1;
18fd37a7SXin LI  lines = 0;
18fd37a7SXin LI  linbuf0 = xmalloc (alloc_lines0 * sizeof *linbuf0);
18fd37a7SXin LI  p0 = buffer0;
18fd37a7SXin LI
18fd37a7SXin LI  /* If the prefix is needed, find the prefix lines.  */
18fd37a7SXin LI  if (! (no_diff_means_no_output
18fd37a7SXin LI	 && filevec[0].prefix_end == p0
18fd37a7SXin LI	 && filevec[1].prefix_end == p1))
18fd37a7SXin LI    {
18fd37a7SXin LI      end0 = filevec[0].prefix_end;
18fd37a7SXin LI      while (p0 != end0)
18fd37a7SXin LI	{
18fd37a7SXin LI	  lin l = lines++ & prefix_mask;
18fd37a7SXin LI	  if (l == alloc_lines0)
18fd37a7SXin LI	    {
18fd37a7SXin LI	      if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0)
18fd37a7SXin LI		xalloc_die ();
18fd37a7SXin LI	      alloc_lines0 *= 2;
18fd37a7SXin LI	      linbuf0 = xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0);
18fd37a7SXin LI	    }
18fd37a7SXin LI	  linbuf0[l] = p0;
18fd37a7SXin LI	  while (*p0++ != '\n')
18fd37a7SXin LI	    continue;
18fd37a7SXin LI	}
18fd37a7SXin LI    }
18fd37a7SXin LI  buffered_prefix = prefix_count && context < lines ? context : lines;
18fd37a7SXin LI
18fd37a7SXin LI  /* Allocate line buffer 1.  */
18fd37a7SXin LI
18fd37a7SXin LI  middle_guess = guess_lines (lines, p0 - buffer0, p1 - filevec[1].prefix_end);
18fd37a7SXin LI  suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1);
18fd37a7SXin LI  alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
18fd37a7SXin LI  if (alloc_lines1 < buffered_prefix
18fd37a7SXin LI      || PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1)
18fd37a7SXin LI    xalloc_die ();
18fd37a7SXin LI  linbuf1 = xmalloc (alloc_lines1 * sizeof *linbuf1);
18fd37a7SXin LI
18fd37a7SXin LI  if (buffered_prefix != lines)
18fd37a7SXin LI    {
18fd37a7SXin LI      /* Rotate prefix lines to proper location.  */
18fd37a7SXin LI      for (i = 0;  i < buffered_prefix;  i++)
18fd37a7SXin LI	linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
18fd37a7SXin LI      for (i = 0;  i < buffered_prefix;  i++)
18fd37a7SXin LI	linbuf0[i] = linbuf1[i];
18fd37a7SXin LI    }
18fd37a7SXin LI
18fd37a7SXin LI  /* Initialize line buffer 1 from line buffer 0.  */
18fd37a7SXin LI  for (i = 0; i < buffered_prefix; i++)
18fd37a7SXin LI    linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
18fd37a7SXin LI
18fd37a7SXin LI  /* Record the line buffer, adjusted so that
18fd37a7SXin LI     linbuf[0] points at the first differing line.  */
18fd37a7SXin LI  filevec[0].linbuf = linbuf0 + buffered_prefix;
18fd37a7SXin LI  filevec[1].linbuf = linbuf1 + buffered_prefix;
18fd37a7SXin LI  filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
18fd37a7SXin LI  filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
18fd37a7SXin LI  filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
18fd37a7SXin LI  filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
18fd37a7SXin LI}
18fd37a7SXin LI
18fd37a7SXin LI/* If 1 < k, then (2**k - prime_offset[k]) is the largest prime less
18fd37a7SXin LI   than 2**k.  This table is derived from Chris K. Caldwell's list
18fd37a7SXin LI   <http://www.utm.edu/research/primes/lists/2small/>.  */
18fd37a7SXin LI
18fd37a7SXin LIstatic unsigned char const prime_offset[] =
18fd37a7SXin LI{
18fd37a7SXin LI  0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3,
18fd37a7SXin LI  15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21,
18fd37a7SXin LI  11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27,
18fd37a7SXin LI  55, 93, 1, 57, 25
18fd37a7SXin LI};
18fd37a7SXin LI
18fd37a7SXin LI/* Verify that this host's size_t is not too wide for the above table.  */
18fd37a7SXin LI
18fd37a7SXin LIverify (enough_prime_offsets,
18fd37a7SXin LI	sizeof (size_t) * CHAR_BIT <= sizeof prime_offset);
18fd37a7SXin LI
18fd37a7SXin LI/* Given a vector of two file_data objects, read the file associated
18fd37a7SXin LI   with each one, and build the table of equivalence classes.
18fd37a7SXin LI   Return nonzero if either file appears to be a binary file.
18fd37a7SXin LI   If PRETEND_BINARY is nonzero, pretend they are binary regardless.  */
18fd37a7SXin LI
18fd37a7SXin LIbool
18fd37a7SXin LIread_files (struct file_data filevec[], bool pretend_binary)
18fd37a7SXin LI{
18fd37a7SXin LI  int i;
18fd37a7SXin LI  bool skip_test = text | pretend_binary;
18fd37a7SXin LI  bool appears_binary = pretend_binary | sip (&filevec[0], skip_test);
18fd37a7SXin LI
18fd37a7SXin LI  if (filevec[0].desc != filevec[1].desc)
18fd37a7SXin LI    appears_binary |= sip (&filevec[1], skip_test | appears_binary);
18fd37a7SXin LI  else
18fd37a7SXin LI    {
18fd37a7SXin LI      filevec[1].buffer = filevec[0].buffer;
18fd37a7SXin LI      filevec[1].bufsize = filevec[0].bufsize;
18fd37a7SXin LI      filevec[1].buffered = filevec[0].buffered;
18fd37a7SXin LI    }
18fd37a7SXin LI  if (appears_binary)
18fd37a7SXin LI    {
18fd37a7SXin LI      set_binary_mode (filevec[0].desc, true);
18fd37a7SXin LI      set_binary_mode (filevec[1].desc, true);
18fd37a7SXin LI      return true;
18fd37a7SXin LI    }
18fd37a7SXin LI
18fd37a7SXin LI  find_identical_ends (filevec);
18fd37a7SXin LI
18fd37a7SXin LI  equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
18fd37a7SXin LI  if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc)
18fd37a7SXin LI    xalloc_die ();
18fd37a7SXin LI  equivs = xmalloc (equivs_alloc * sizeof *equivs);
18fd37a7SXin LI  /* Equivalence class 0 is permanently safe for lines that were not
18fd37a7SXin LI     hashed.  Real equivalence classes start at 1.  */
18fd37a7SXin LI  equivs_index = 1;
18fd37a7SXin LI
18fd37a7SXin LI  /* Allocate (one plus) a prime number of hash buckets.  Use a prime
18fd37a7SXin LI     number between 1/3 and 2/3 of the value of equiv_allocs,
18fd37a7SXin LI     approximately.  */
18fd37a7SXin LI  for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++)
18fd37a7SXin LI    continue;
18fd37a7SXin LI  nbuckets = ((size_t) 1 << i) - prime_offset[i];
18fd37a7SXin LI  if (PTRDIFF_MAX / sizeof *buckets <= nbuckets)
18fd37a7SXin LI    xalloc_die ();
18fd37a7SXin LI  buckets = zalloc ((nbuckets + 1) * sizeof *buckets);
18fd37a7SXin LI  buckets++;
18fd37a7SXin LI
18fd37a7SXin LI  for (i = 0; i < 2; i++)
18fd37a7SXin LI    find_and_hash_each_line (&filevec[i]);
18fd37a7SXin LI
18fd37a7SXin LI  filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
18fd37a7SXin LI
18fd37a7SXin LI  free (equivs);
18fd37a7SXin LI  free (buckets - 1);
18fd37a7SXin LI
18fd37a7SXin LI  return false;
18fd37a7SXin LI}