xref: /freebsd/contrib/diff/src/util.c (revision 6683132d54bd6d589889e43dabdc53d35e38a028)
1 /* Support routines for GNU DIFF.
2 
3    Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002,
4    2004 Free Software Foundation, Inc.
5 
6    This file is part of GNU DIFF.
7 
8    GNU DIFF is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 2, or (at your option)
11    any later version.
12 
13    GNU DIFF is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program; see the file COPYING.
20    If not, write to the Free Software Foundation,
21    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22 
23 #include "diff.h"
24 #include <dirname.h>
25 #include <error.h>
26 #include <quotesys.h>
27 #include <xalloc.h>
28 
29 char const pr_program[] = PR_PROGRAM;
30 
31 /* Queue up one-line messages to be printed at the end,
32    when -l is specified.  Each message is recorded with a `struct msg'.  */
33 
34 struct msg
35 {
36   struct msg *next;
37   char args[1]; /* Format + 4 args, each '\0' terminated, concatenated.  */
38 };
39 
40 /* Head of the chain of queues messages.  */
41 
42 static struct msg *msg_chain;
43 
44 /* Tail of the chain of queues messages.  */
45 
46 static struct msg **msg_chain_end = &msg_chain;
47 
48 /* Use when a system call returns non-zero status.
49    NAME should normally be the file name.  */
50 
51 void
52 perror_with_name (char const *name)
53 {
54   error (0, errno, "%s", name);
55 }
56 
57 /* Use when a system call returns non-zero status and that is fatal.  */
58 
59 void
60 pfatal_with_name (char const *name)
61 {
62   int e = errno;
63   print_message_queue ();
64   error (EXIT_TROUBLE, e, "%s", name);
65   abort ();
66 }
67 
68 /* Print an error message containing MSGID, then exit.  */
69 
70 void
71 fatal (char const *msgid)
72 {
73   print_message_queue ();
74   error (EXIT_TROUBLE, 0, "%s", _(msgid));
75   abort ();
76 }
77 
78 /* Like printf, except if -l in effect then save the message and print later.
79    This is used for things like "Only in ...".  */
80 
81 void
82 message (char const *format_msgid, char const *arg1, char const *arg2)
83 {
84   message5 (format_msgid, arg1, arg2, 0, 0);
85 }
86 
87 void
88 message5 (char const *format_msgid, char const *arg1, char const *arg2,
89 	  char const *arg3, char const *arg4)
90 {
91   if (paginate)
92     {
93       char *p;
94       char const *arg[5];
95       int i;
96       size_t size[5];
97       size_t total_size = offsetof (struct msg, args);
98       struct msg *new;
99 
100       arg[0] = format_msgid;
101       arg[1] = arg1;
102       arg[2] = arg2;
103       arg[3] = arg3 ? arg3 : "";
104       arg[4] = arg4 ? arg4 : "";
105 
106       for (i = 0;  i < 5;  i++)
107 	total_size += size[i] = strlen (arg[i]) + 1;
108 
109       new = xmalloc (total_size);
110 
111       for (i = 0, p = new->args;  i < 5;  p += size[i++])
112 	memcpy (p, arg[i], size[i]);
113 
114       *msg_chain_end = new;
115       new->next = 0;
116       msg_chain_end = &new->next;
117     }
118   else
119     {
120       if (sdiff_merge_assist)
121 	putchar (' ');
122       printf (_(format_msgid), arg1, arg2, arg3, arg4);
123     }
124 }
125 
126 /* Output all the messages that were saved up by calls to `message'.  */
127 
128 void
129 print_message_queue (void)
130 {
131   char const *arg[5];
132   int i;
133   struct msg *m = msg_chain;
134 
135   while (m)
136     {
137       struct msg *next = m->next;
138       arg[0] = m->args;
139       for (i = 0;  i < 4;  i++)
140 	arg[i + 1] = arg[i] + strlen (arg[i]) + 1;
141       printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]);
142       free (m);
143       m = next;
144     }
145 }
146 
147 /* Call before outputting the results of comparing files NAME0 and NAME1
148    to set up OUTFILE, the stdio stream for the output to go to.
149 
150    Usually, OUTFILE is just stdout.  But when -l was specified
151    we fork off a `pr' and make OUTFILE a pipe to it.
152    `pr' then outputs to our stdout.  */
153 
154 static char const *current_name0;
155 static char const *current_name1;
156 static bool currently_recursive;
157 
158 void
159 setup_output (char const *name0, char const *name1, bool recursive)
160 {
161   current_name0 = name0;
162   current_name1 = name1;
163   currently_recursive = recursive;
164   outfile = 0;
165 }
166 
167 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
168 static pid_t pr_pid;
169 #endif
170 
171 void
172 begin_output (void)
173 {
174   char *name;
175 
176   if (outfile != 0)
177     return;
178 
179   /* Construct the header of this piece of diff.  */
180   name = xmalloc (strlen (current_name0) + strlen (current_name1)
181 		  + strlen (switch_string) + 7);
182 
183   /* POSIX 1003.1-2001 specifies this format.  But there are some bugs in
184      the standard: it says that we must print only the last component
185      of the pathnames, and it requires two spaces after "diff" if
186      there are no options.  These requirements are silly and do not
187      match historical practice.  */
188   sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
189 
190   if (paginate)
191     {
192       if (fflush (stdout) != 0)
193 	pfatal_with_name (_("write failed"));
194 
195       /* Make OUTFILE a pipe to a subsidiary `pr'.  */
196       {
197 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
198 	int pipes[2];
199 
200 	if (pipe (pipes) != 0)
201 	  pfatal_with_name ("pipe");
202 
203 	pr_pid = vfork ();
204 	if (pr_pid < 0)
205 	  pfatal_with_name ("fork");
206 
207 	if (pr_pid == 0)
208 	  {
209 	    close (pipes[1]);
210 	    if (pipes[0] != STDIN_FILENO)
211 	      {
212 		if (dup2 (pipes[0], STDIN_FILENO) < 0)
213 		  pfatal_with_name ("dup2");
214 		close (pipes[0]);
215 	      }
216 
217 	    execl (pr_program, pr_program, "-h", name, (char *) 0);
218 	    _exit (errno == ENOENT ? 127 : 126);
219 	  }
220 	else
221 	  {
222 	    close (pipes[0]);
223 	    outfile = fdopen (pipes[1], "w");
224 	    if (!outfile)
225 	      pfatal_with_name ("fdopen");
226 	  }
227 #else
228 	char *command = xmalloc (sizeof pr_program - 1 + 7
229 				 + quote_system_arg ((char *) 0, name) + 1);
230 	char *p;
231 	sprintf (command, "%s -f -h ", pr_program);
232 	p = command + sizeof pr_program - 1 + 7;
233 	p += quote_system_arg (p, name);
234 	*p = 0;
235 	errno = 0;
236 	outfile = popen (command, "w");
237 	if (!outfile)
238 	  pfatal_with_name (command);
239 	free (command);
240 #endif
241       }
242     }
243   else
244     {
245 
246       /* If -l was not specified, output the diff straight to `stdout'.  */
247 
248       outfile = stdout;
249 
250       /* If handling multiple files (because scanning a directory),
251 	 print which files the following output is about.  */
252       if (currently_recursive)
253 	printf ("%s\n", name);
254     }
255 
256   free (name);
257 
258   /* A special header is needed at the beginning of context output.  */
259   switch (output_style)
260     {
261     case OUTPUT_CONTEXT:
262       print_context_header (files, false);
263       break;
264 
265     case OUTPUT_UNIFIED:
266       print_context_header (files, true);
267       break;
268 
269     default:
270       break;
271     }
272 }
273 
274 /* Call after the end of output of diffs for one file.
275    Close OUTFILE and get rid of the `pr' subfork.  */
276 
277 void
278 finish_output (void)
279 {
280   if (outfile != 0 && outfile != stdout)
281     {
282       int status;
283       int wstatus;
284       int werrno = 0;
285       if (ferror (outfile))
286 	fatal ("write failed");
287 #if ! (HAVE_WORKING_FORK || HAVE_WORKING_VFORK)
288       wstatus = pclose (outfile);
289       if (wstatus == -1)
290 	werrno = errno;
291 #else
292       if (fclose (outfile) != 0)
293 	pfatal_with_name (_("write failed"));
294       if (waitpid (pr_pid, &wstatus, 0) < 0)
295 	pfatal_with_name ("waitpid");
296 #endif
297       status = (! werrno && WIFEXITED (wstatus)
298 		? WEXITSTATUS (wstatus)
299 		: INT_MAX);
300       if (status)
301 	error (EXIT_TROUBLE, werrno,
302 	       _(status == 126
303 		 ? "subsidiary program `%s' could not be invoked"
304 		 : status == 127
305 		 ? "subsidiary program `%s' not found"
306 		 : status == INT_MAX
307 		 ? "subsidiary program `%s' failed"
308 		 : "subsidiary program `%s' failed (exit status %d)"),
309 	       pr_program, status);
310     }
311 
312   outfile = 0;
313 }
314 
315 /* Compare two lines (typically one from each input file)
316    according to the command line options.
317    For efficiency, this is invoked only when the lines do not match exactly
318    but an option like -i might cause us to ignore the difference.
319    Return nonzero if the lines differ.  */
320 
321 bool
322 lines_differ (char const *s1, char const *s2)
323 {
324   register char const *t1 = s1;
325   register char const *t2 = s2;
326   size_t column = 0;
327 
328   while (1)
329     {
330       register unsigned char c1 = *t1++;
331       register unsigned char c2 = *t2++;
332 
333       /* Test for exact char equality first, since it's a common case.  */
334       if (c1 != c2)
335 	{
336 	  switch (ignore_white_space)
337 	    {
338 	    case IGNORE_ALL_SPACE:
339 	      /* For -w, just skip past any white space.  */
340 	      while (isspace (c1) && c1 != '\n') c1 = *t1++;
341 	      while (isspace (c2) && c2 != '\n') c2 = *t2++;
342 	      break;
343 
344 	    case IGNORE_SPACE_CHANGE:
345 	      /* For -b, advance past any sequence of white space in
346 		 line 1 and consider it just one space, or nothing at
347 		 all if it is at the end of the line.  */
348 	      if (isspace (c1))
349 		{
350 		  while (c1 != '\n')
351 		    {
352 		      c1 = *t1++;
353 		      if (! isspace (c1))
354 			{
355 			  --t1;
356 			  c1 = ' ';
357 			  break;
358 			}
359 		    }
360 		}
361 
362 	      /* Likewise for line 2.  */
363 	      if (isspace (c2))
364 		{
365 		  while (c2 != '\n')
366 		    {
367 		      c2 = *t2++;
368 		      if (! isspace (c2))
369 			{
370 			  --t2;
371 			  c2 = ' ';
372 			  break;
373 			}
374 		    }
375 		}
376 
377 	      if (c1 != c2)
378 		{
379 		  /* If we went too far when doing the simple test
380 		     for equality, go back to the first non-white-space
381 		     character in both sides and try again.  */
382 		  if (c2 == ' ' && c1 != '\n'
383 		      && s1 + 1 < t1
384 		      && isspace ((unsigned char) t1[-2]))
385 		    {
386 		      --t1;
387 		      continue;
388 		    }
389 		  if (c1 == ' ' && c2 != '\n'
390 		      && s2 + 1 < t2
391 		      && isspace ((unsigned char) t2[-2]))
392 		    {
393 		      --t2;
394 		      continue;
395 		    }
396 		}
397 
398 	      break;
399 
400 	    case IGNORE_TAB_EXPANSION:
401 	      if ((c1 == ' ' && c2 == '\t')
402 		  || (c1 == '\t' && c2 == ' '))
403 		{
404 		  size_t column2 = column;
405 		  for (;; c1 = *t1++)
406 		    {
407 		      if (c1 == ' ')
408 			column++;
409 		      else if (c1 == '\t')
410 			column += tabsize - column % tabsize;
411 		      else
412 			break;
413 		    }
414 		  for (;; c2 = *t2++)
415 		    {
416 		      if (c2 == ' ')
417 			column2++;
418 		      else if (c2 == '\t')
419 			column2 += tabsize - column2 % tabsize;
420 		      else
421 			break;
422 		    }
423 		  if (column != column2)
424 		    return true;
425 		}
426 	      break;
427 
428 	    case IGNORE_NO_WHITE_SPACE:
429 	      break;
430 	    }
431 
432 	  /* Lowercase all letters if -i is specified.  */
433 
434 	  if (ignore_case)
435 	    {
436 	      c1 = tolower (c1);
437 	      c2 = tolower (c2);
438 	    }
439 
440 	  if (c1 != c2)
441 	    break;
442 	}
443       if (c1 == '\n')
444 	return false;
445 
446       column += c1 == '\t' ? tabsize - column % tabsize : 1;
447     }
448 
449   return true;
450 }
451 
452 /* Find the consecutive changes at the start of the script START.
453    Return the last link before the first gap.  */
454 
455 struct change *
456 find_change (struct change *start)
457 {
458   return start;
459 }
460 
461 struct change *
462 find_reverse_change (struct change *start)
463 {
464   return start;
465 }
466 
467 /* Divide SCRIPT into pieces by calling HUNKFUN and
468    print each piece with PRINTFUN.
469    Both functions take one arg, an edit script.
470 
471    HUNKFUN is called with the tail of the script
472    and returns the last link that belongs together with the start
473    of the tail.
474 
475    PRINTFUN takes a subscript which belongs together (with a null
476    link at the end) and prints it.  */
477 
478 void
479 print_script (struct change *script,
480 	      struct change * (*hunkfun) (struct change *),
481 	      void (*printfun) (struct change *))
482 {
483   struct change *next = script;
484 
485   while (next)
486     {
487       struct change *this, *end;
488 
489       /* Find a set of changes that belong together.  */
490       this = next;
491       end = (*hunkfun) (next);
492 
493       /* Disconnect them from the rest of the changes,
494 	 making them a hunk, and remember the rest for next iteration.  */
495       next = end->link;
496       end->link = 0;
497 #ifdef DEBUG
498       debug_script (this);
499 #endif
500 
501       /* Print this hunk.  */
502       (*printfun) (this);
503 
504       /* Reconnect the script so it will all be freed properly.  */
505       end->link = next;
506     }
507 }
508 
509 /* Print the text of a single line LINE,
510    flagging it with the characters in LINE_FLAG (which say whether
511    the line is inserted, deleted, changed, etc.).  */
512 
513 void
514 print_1_line (char const *line_flag, char const *const *line)
515 {
516   char const *base = line[0], *limit = line[1]; /* Help the compiler.  */
517   FILE *out = outfile; /* Help the compiler some more.  */
518   char const *flag_format = 0;
519 
520   /* If -T was specified, use a Tab between the line-flag and the text.
521      Otherwise use a Space (as Unix diff does).
522      Print neither space nor tab if line-flags are empty.  */
523 
524   if (line_flag && *line_flag)
525     {
526       flag_format = initial_tab ? "%s\t" : "%s ";
527       fprintf (out, flag_format, line_flag);
528     }
529 
530   output_1_line (base, limit, flag_format, line_flag);
531 
532   if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
533     fprintf (out, "\n\\ %s\n", _("No newline at end of file"));
534 }
535 
536 /* Output a line from BASE up to LIMIT.
537    With -t, expand white space characters to spaces, and if FLAG_FORMAT
538    is nonzero, output it with argument LINE_FLAG after every
539    internal carriage return, so that tab stops continue to line up.  */
540 
541 void
542 output_1_line (char const *base, char const *limit, char const *flag_format,
543 	       char const *line_flag)
544 {
545   if (!expand_tabs)
546     fwrite (base, sizeof (char), limit - base, outfile);
547   else
548     {
549       register FILE *out = outfile;
550       register unsigned char c;
551       register char const *t = base;
552       register size_t column = 0;
553       size_t tab_size = tabsize;
554 
555       while (t < limit)
556 	switch ((c = *t++))
557 	  {
558 	  case '\t':
559 	    {
560 	      size_t spaces = tab_size - column % tab_size;
561 	      column += spaces;
562 	      do
563 		putc (' ', out);
564 	      while (--spaces);
565 	    }
566 	    break;
567 
568 	  case '\r':
569 	    putc (c, out);
570 	    if (flag_format && t < limit && *t != '\n')
571 	      fprintf (out, flag_format, line_flag);
572 	    column = 0;
573 	    break;
574 
575 	  case '\b':
576 	    if (column == 0)
577 	      continue;
578 	    column--;
579 	    putc (c, out);
580 	    break;
581 
582 	  default:
583 	    column += isprint (c) != 0;
584 	    putc (c, out);
585 	    break;
586 	  }
587     }
588 }
589 
590 char const change_letter[] = { 0, 'd', 'a', 'c' };
591 
592 /* Translate an internal line number (an index into diff's table of lines)
593    into an actual line number in the input file.
594    The internal line number is I.  FILE points to the data on the file.
595 
596    Internal line numbers count from 0 starting after the prefix.
597    Actual line numbers count from 1 within the entire file.  */
598 
599 lin
600 translate_line_number (struct file_data const *file, lin i)
601 {
602   return i + file->prefix_lines + 1;
603 }
604 
605 /* Translate a line number range.  This is always done for printing,
606    so for convenience translate to long int rather than lin, so that the
607    caller can use printf with "%ld" without casting.  */
608 
609 void
610 translate_range (struct file_data const *file,
611 		 lin a, lin b,
612 		 long int *aptr, long int *bptr)
613 {
614   *aptr = translate_line_number (file, a - 1) + 1;
615   *bptr = translate_line_number (file, b + 1) - 1;
616 }
617 
618 /* Print a pair of line numbers with SEPCHAR, translated for file FILE.
619    If the two numbers are identical, print just one number.
620 
621    Args A and B are internal line numbers.
622    We print the translated (real) line numbers.  */
623 
624 void
625 print_number_range (char sepchar, struct file_data *file, lin a, lin b)
626 {
627   long int trans_a, trans_b;
628   translate_range (file, a, b, &trans_a, &trans_b);
629 
630   /* Note: we can have B < A in the case of a range of no lines.
631      In this case, we should print the line number before the range,
632      which is B.  */
633   if (trans_b > trans_a)
634     fprintf (outfile, "%ld%c%ld", trans_a, sepchar, trans_b);
635   else
636     fprintf (outfile, "%ld", trans_b);
637 }
638 
639 /* Look at a hunk of edit script and report the range of lines in each file
640    that it applies to.  HUNK is the start of the hunk, which is a chain
641    of `struct change'.  The first and last line numbers of file 0 are stored in
642    *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
643    Note that these are internal line numbers that count from 0.
644 
645    If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
646 
647    Return UNCHANGED if only ignorable lines are inserted or deleted,
648    OLD if lines of file 0 are deleted,
649    NEW if lines of file 1 are inserted,
650    and CHANGED if both kinds of changes are found. */
651 
652 enum changes
653 analyze_hunk (struct change *hunk,
654 	      lin *first0, lin *last0,
655 	      lin *first1, lin *last1)
656 {
657   struct change *next;
658   lin l0, l1;
659   lin show_from, show_to;
660   lin i;
661   bool trivial = ignore_blank_lines || ignore_regexp.fastmap;
662   size_t trivial_length = ignore_blank_lines - 1;
663     /* If 0, ignore zero-length lines;
664        if SIZE_MAX, do not ignore lines just because of their length.  */
665   bool skip_leading_white_space =
666     (ignore_blank_lines && IGNORE_SPACE_CHANGE <= ignore_white_space);
667 
668   char const * const *linbuf0 = files[0].linbuf;  /* Help the compiler.  */
669   char const * const *linbuf1 = files[1].linbuf;
670 
671   show_from = show_to = 0;
672 
673   *first0 = hunk->line0;
674   *first1 = hunk->line1;
675 
676   next = hunk;
677   do
678     {
679       l0 = next->line0 + next->deleted - 1;
680       l1 = next->line1 + next->inserted - 1;
681       show_from += next->deleted;
682       show_to += next->inserted;
683 
684       for (i = next->line0; i <= l0 && trivial; i++)
685 	{
686 	  char const *line = linbuf0[i];
687 	  char const *newline = linbuf0[i + 1] - 1;
688 	  size_t len = newline - line;
689 	  char const *p = line;
690 	  if (skip_leading_white_space)
691 	    while (isspace ((unsigned char) *p) && *p != '\n')
692 	      p++;
693 	  if (newline - p != trivial_length
694 	      && (! ignore_regexp.fastmap
695 		  || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
696 	    trivial = 0;
697 	}
698 
699       for (i = next->line1; i <= l1 && trivial; i++)
700 	{
701 	  char const *line = linbuf1[i];
702 	  char const *newline = linbuf1[i + 1] - 1;
703 	  size_t len = newline - line;
704 	  char const *p = line;
705 	  if (skip_leading_white_space)
706 	    while (isspace ((unsigned char) *p) && *p != '\n')
707 	      p++;
708 	  if (newline - p != trivial_length
709 	      && (! ignore_regexp.fastmap
710 		  || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
711 	    trivial = 0;
712 	}
713     }
714   while ((next = next->link) != 0);
715 
716   *last0 = l0;
717   *last1 = l1;
718 
719   /* If all inserted or deleted lines are ignorable,
720      tell the caller to ignore this hunk.  */
721 
722   if (trivial)
723     return UNCHANGED;
724 
725   return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
726 }
727 
728 /* Concatenate three strings, returning a newly malloc'd string.  */
729 
730 char *
731 concat (char const *s1, char const *s2, char const *s3)
732 {
733   char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1);
734   sprintf (new, "%s%s%s", s1, s2, s3);
735   return new;
736 }
737 
738 /* Yield a new block of SIZE bytes, initialized to zero.  */
739 
740 void *
741 zalloc (size_t size)
742 {
743   void *p = xmalloc (size);
744   memset (p, 0, size);
745   return p;
746 }
747 
748 /* Yield the newly malloc'd pathname
749    of the file in DIR whose filename is FILE.  */
750 
751 char *
752 dir_file_pathname (char const *dir, char const *file)
753 {
754   char const *base = base_name (dir);
755   bool omit_slash = !*base || base[strlen (base) - 1] == '/';
756   return concat (dir, "/" + omit_slash, file);
757 }
758 
759 void
760 debug_script (struct change *sp)
761 {
762   fflush (stdout);
763 
764   for (; sp; sp = sp->link)
765     {
766       long int line0 = sp->line0;
767       long int line1 = sp->line1;
768       long int deleted = sp->deleted;
769       long int inserted = sp->inserted;
770       fprintf (stderr, "%3ld %3ld delete %ld insert %ld\n",
771 	       line0, line1, deleted, inserted);
772     }
773 
774   fflush (stderr);
775 }
776