1 /* Support routines for GNU DIFF.
2
3 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002,
4 2004 Free Software Foundation, Inc.
5
6 This file is part of GNU DIFF.
7
8 GNU DIFF is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU DIFF is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING.
20 If not, write to the Free Software Foundation,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23 #include "diff.h"
24 #include <dirname.h>
25 #include <error.h>
26 #include <quotesys.h>
27 #include <xalloc.h>
28
29 char const pr_program[] = PR_PROGRAM;
30
31 /* Queue up one-line messages to be printed at the end,
32 when -l is specified. Each message is recorded with a `struct msg'. */
33
34 struct msg
35 {
36 struct msg *next;
37 char args[1]; /* Format + 4 args, each '\0' terminated, concatenated. */
38 };
39
40 /* Head of the chain of queues messages. */
41
42 static struct msg *msg_chain;
43
44 /* Tail of the chain of queues messages. */
45
46 static struct msg **msg_chain_end = &msg_chain;
47
48 /* Use when a system call returns non-zero status.
49 NAME should normally be the file name. */
50
51 void
perror_with_name(char const * name)52 perror_with_name (char const *name)
53 {
54 error (0, errno, "%s", name);
55 }
56
57 /* Use when a system call returns non-zero status and that is fatal. */
58
59 void
pfatal_with_name(char const * name)60 pfatal_with_name (char const *name)
61 {
62 int e = errno;
63 print_message_queue ();
64 error (EXIT_TROUBLE, e, "%s", name);
65 abort ();
66 }
67
68 /* Print an error message containing MSGID, then exit. */
69
70 void
fatal(char const * msgid)71 fatal (char const *msgid)
72 {
73 print_message_queue ();
74 error (EXIT_TROUBLE, 0, "%s", _(msgid));
75 abort ();
76 }
77
78 /* Like printf, except if -l in effect then save the message and print later.
79 This is used for things like "Only in ...". */
80
81 void
message(char const * format_msgid,char const * arg1,char const * arg2)82 message (char const *format_msgid, char const *arg1, char const *arg2)
83 {
84 message5 (format_msgid, arg1, arg2, 0, 0);
85 }
86
87 void
message5(char const * format_msgid,char const * arg1,char const * arg2,char const * arg3,char const * arg4)88 message5 (char const *format_msgid, char const *arg1, char const *arg2,
89 char const *arg3, char const *arg4)
90 {
91 if (paginate)
92 {
93 char *p;
94 char const *arg[5];
95 int i;
96 size_t size[5];
97 size_t total_size = offsetof (struct msg, args);
98 struct msg *new;
99
100 arg[0] = format_msgid;
101 arg[1] = arg1;
102 arg[2] = arg2;
103 arg[3] = arg3 ? arg3 : "";
104 arg[4] = arg4 ? arg4 : "";
105
106 for (i = 0; i < 5; i++)
107 total_size += size[i] = strlen (arg[i]) + 1;
108
109 new = xmalloc (total_size);
110
111 for (i = 0, p = new->args; i < 5; p += size[i++])
112 memcpy (p, arg[i], size[i]);
113
114 *msg_chain_end = new;
115 new->next = 0;
116 msg_chain_end = &new->next;
117 }
118 else
119 {
120 if (sdiff_merge_assist)
121 putchar (' ');
122 printf (_(format_msgid), arg1, arg2, arg3, arg4);
123 }
124 }
125
126 /* Output all the messages that were saved up by calls to `message'. */
127
128 void
print_message_queue(void)129 print_message_queue (void)
130 {
131 char const *arg[5];
132 int i;
133 struct msg *m = msg_chain;
134
135 while (m)
136 {
137 struct msg *next = m->next;
138 arg[0] = m->args;
139 for (i = 0; i < 4; i++)
140 arg[i + 1] = arg[i] + strlen (arg[i]) + 1;
141 printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]);
142 free (m);
143 m = next;
144 }
145 }
146
147 /* Call before outputting the results of comparing files NAME0 and NAME1
148 to set up OUTFILE, the stdio stream for the output to go to.
149
150 Usually, OUTFILE is just stdout. But when -l was specified
151 we fork off a `pr' and make OUTFILE a pipe to it.
152 `pr' then outputs to our stdout. */
153
154 static char const *current_name0;
155 static char const *current_name1;
156 static bool currently_recursive;
157
158 void
setup_output(char const * name0,char const * name1,bool recursive)159 setup_output (char const *name0, char const *name1, bool recursive)
160 {
161 current_name0 = name0;
162 current_name1 = name1;
163 currently_recursive = recursive;
164 outfile = 0;
165 }
166
167 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
168 static pid_t pr_pid;
169 #endif
170
171 void
begin_output(void)172 begin_output (void)
173 {
174 char *name;
175
176 if (outfile != 0)
177 return;
178
179 /* Construct the header of this piece of diff. */
180 name = xmalloc (strlen (current_name0) + strlen (current_name1)
181 + strlen (switch_string) + 7);
182
183 /* POSIX 1003.1-2001 specifies this format. But there are some bugs in
184 the standard: it says that we must print only the last component
185 of the pathnames, and it requires two spaces after "diff" if
186 there are no options. These requirements are silly and do not
187 match historical practice. */
188 sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
189
190 if (paginate)
191 {
192 if (fflush (stdout) != 0)
193 pfatal_with_name (_("write failed"));
194
195 /* Make OUTFILE a pipe to a subsidiary `pr'. */
196 {
197 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
198 int pipes[2];
199
200 if (pipe (pipes) != 0)
201 pfatal_with_name ("pipe");
202
203 pr_pid = vfork ();
204 if (pr_pid < 0)
205 pfatal_with_name ("fork");
206
207 if (pr_pid == 0)
208 {
209 close (pipes[1]);
210 if (pipes[0] != STDIN_FILENO)
211 {
212 if (dup2 (pipes[0], STDIN_FILENO) < 0)
213 pfatal_with_name ("dup2");
214 close (pipes[0]);
215 }
216
217 execl (pr_program, pr_program, "-h", name, (char *) 0);
218 _exit (errno == ENOENT ? 127 : 126);
219 }
220 else
221 {
222 close (pipes[0]);
223 outfile = fdopen (pipes[1], "w");
224 if (!outfile)
225 pfatal_with_name ("fdopen");
226 }
227 #else
228 char *command = xmalloc (sizeof pr_program - 1 + 7
229 + quote_system_arg ((char *) 0, name) + 1);
230 char *p;
231 sprintf (command, "%s -f -h ", pr_program);
232 p = command + sizeof pr_program - 1 + 7;
233 p += quote_system_arg (p, name);
234 *p = 0;
235 errno = 0;
236 outfile = popen (command, "w");
237 if (!outfile)
238 pfatal_with_name (command);
239 free (command);
240 #endif
241 }
242 }
243 else
244 {
245
246 /* If -l was not specified, output the diff straight to `stdout'. */
247
248 outfile = stdout;
249
250 /* If handling multiple files (because scanning a directory),
251 print which files the following output is about. */
252 if (currently_recursive)
253 printf ("%s\n", name);
254 }
255
256 free (name);
257
258 /* A special header is needed at the beginning of context output. */
259 switch (output_style)
260 {
261 case OUTPUT_CONTEXT:
262 print_context_header (files, false);
263 break;
264
265 case OUTPUT_UNIFIED:
266 print_context_header (files, true);
267 break;
268
269 default:
270 break;
271 }
272 }
273
274 /* Call after the end of output of diffs for one file.
275 Close OUTFILE and get rid of the `pr' subfork. */
276
277 void
finish_output(void)278 finish_output (void)
279 {
280 if (outfile != 0 && outfile != stdout)
281 {
282 int status;
283 int wstatus;
284 int werrno = 0;
285 if (ferror (outfile))
286 fatal ("write failed");
287 #if ! (HAVE_WORKING_FORK || HAVE_WORKING_VFORK)
288 wstatus = pclose (outfile);
289 if (wstatus == -1)
290 werrno = errno;
291 #else
292 if (fclose (outfile) != 0)
293 pfatal_with_name (_("write failed"));
294 if (waitpid (pr_pid, &wstatus, 0) < 0)
295 pfatal_with_name ("waitpid");
296 #endif
297 status = (! werrno && WIFEXITED (wstatus)
298 ? WEXITSTATUS (wstatus)
299 : INT_MAX);
300 if (status)
301 error (EXIT_TROUBLE, werrno,
302 _(status == 126
303 ? "subsidiary program `%s' could not be invoked"
304 : status == 127
305 ? "subsidiary program `%s' not found"
306 : status == INT_MAX
307 ? "subsidiary program `%s' failed"
308 : "subsidiary program `%s' failed (exit status %d)"),
309 pr_program, status);
310 }
311
312 outfile = 0;
313 }
314
315 /* Compare two lines (typically one from each input file)
316 according to the command line options.
317 For efficiency, this is invoked only when the lines do not match exactly
318 but an option like -i might cause us to ignore the difference.
319 Return nonzero if the lines differ. */
320
321 bool
lines_differ(char const * s1,char const * s2)322 lines_differ (char const *s1, char const *s2)
323 {
324 register char const *t1 = s1;
325 register char const *t2 = s2;
326 size_t column = 0;
327
328 while (1)
329 {
330 register unsigned char c1 = *t1++;
331 register unsigned char c2 = *t2++;
332
333 /* Test for exact char equality first, since it's a common case. */
334 if (c1 != c2)
335 {
336 switch (ignore_white_space)
337 {
338 case IGNORE_ALL_SPACE:
339 /* For -w, just skip past any white space. */
340 while (isspace (c1) && c1 != '\n') c1 = *t1++;
341 while (isspace (c2) && c2 != '\n') c2 = *t2++;
342 break;
343
344 case IGNORE_SPACE_CHANGE:
345 /* For -b, advance past any sequence of white space in
346 line 1 and consider it just one space, or nothing at
347 all if it is at the end of the line. */
348 if (isspace (c1))
349 {
350 while (c1 != '\n')
351 {
352 c1 = *t1++;
353 if (! isspace (c1))
354 {
355 --t1;
356 c1 = ' ';
357 break;
358 }
359 }
360 }
361
362 /* Likewise for line 2. */
363 if (isspace (c2))
364 {
365 while (c2 != '\n')
366 {
367 c2 = *t2++;
368 if (! isspace (c2))
369 {
370 --t2;
371 c2 = ' ';
372 break;
373 }
374 }
375 }
376
377 if (c1 != c2)
378 {
379 /* If we went too far when doing the simple test
380 for equality, go back to the first non-white-space
381 character in both sides and try again. */
382 if (c2 == ' ' && c1 != '\n'
383 && s1 + 1 < t1
384 && isspace ((unsigned char) t1[-2]))
385 {
386 --t1;
387 continue;
388 }
389 if (c1 == ' ' && c2 != '\n'
390 && s2 + 1 < t2
391 && isspace ((unsigned char) t2[-2]))
392 {
393 --t2;
394 continue;
395 }
396 }
397
398 break;
399
400 case IGNORE_TAB_EXPANSION:
401 if ((c1 == ' ' && c2 == '\t')
402 || (c1 == '\t' && c2 == ' '))
403 {
404 size_t column2 = column;
405 for (;; c1 = *t1++)
406 {
407 if (c1 == ' ')
408 column++;
409 else if (c1 == '\t')
410 column += tabsize - column % tabsize;
411 else
412 break;
413 }
414 for (;; c2 = *t2++)
415 {
416 if (c2 == ' ')
417 column2++;
418 else if (c2 == '\t')
419 column2 += tabsize - column2 % tabsize;
420 else
421 break;
422 }
423 if (column != column2)
424 return true;
425 }
426 break;
427
428 case IGNORE_NO_WHITE_SPACE:
429 break;
430 }
431
432 /* Lowercase all letters if -i is specified. */
433
434 if (ignore_case)
435 {
436 c1 = tolower (c1);
437 c2 = tolower (c2);
438 }
439
440 if (c1 != c2)
441 break;
442 }
443 if (c1 == '\n')
444 return false;
445
446 column += c1 == '\t' ? tabsize - column % tabsize : 1;
447 }
448
449 return true;
450 }
451
452 /* Find the consecutive changes at the start of the script START.
453 Return the last link before the first gap. */
454
455 struct change *
find_change(struct change * start)456 find_change (struct change *start)
457 {
458 return start;
459 }
460
461 struct change *
find_reverse_change(struct change * start)462 find_reverse_change (struct change *start)
463 {
464 return start;
465 }
466
467 /* Divide SCRIPT into pieces by calling HUNKFUN and
468 print each piece with PRINTFUN.
469 Both functions take one arg, an edit script.
470
471 HUNKFUN is called with the tail of the script
472 and returns the last link that belongs together with the start
473 of the tail.
474
475 PRINTFUN takes a subscript which belongs together (with a null
476 link at the end) and prints it. */
477
478 void
print_script(struct change * script,struct change * (* hunkfun)(struct change *),void (* printfun)(struct change *))479 print_script (struct change *script,
480 struct change * (*hunkfun) (struct change *),
481 void (*printfun) (struct change *))
482 {
483 struct change *next = script;
484
485 while (next)
486 {
487 struct change *this, *end;
488
489 /* Find a set of changes that belong together. */
490 this = next;
491 end = (*hunkfun) (next);
492
493 /* Disconnect them from the rest of the changes,
494 making them a hunk, and remember the rest for next iteration. */
495 next = end->link;
496 end->link = 0;
497 #ifdef DEBUG
498 debug_script (this);
499 #endif
500
501 /* Print this hunk. */
502 (*printfun) (this);
503
504 /* Reconnect the script so it will all be freed properly. */
505 end->link = next;
506 }
507 }
508
509 /* Print the text of a single line LINE,
510 flagging it with the characters in LINE_FLAG (which say whether
511 the line is inserted, deleted, changed, etc.). */
512
513 void
print_1_line(char const * line_flag,char const * const * line)514 print_1_line (char const *line_flag, char const *const *line)
515 {
516 char const *base = line[0], *limit = line[1]; /* Help the compiler. */
517 FILE *out = outfile; /* Help the compiler some more. */
518 char const *flag_format = 0;
519
520 /* If -T was specified, use a Tab between the line-flag and the text.
521 Otherwise use a Space (as Unix diff does).
522 Print neither space nor tab if line-flags are empty. */
523
524 if (line_flag && *line_flag)
525 {
526 flag_format = initial_tab ? "%s\t" : "%s ";
527 fprintf (out, flag_format, line_flag);
528 }
529
530 output_1_line (base, limit, flag_format, line_flag);
531
532 if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
533 fprintf (out, "\n\\ %s\n", _("No newline at end of file"));
534 }
535
536 /* Output a line from BASE up to LIMIT.
537 With -t, expand white space characters to spaces, and if FLAG_FORMAT
538 is nonzero, output it with argument LINE_FLAG after every
539 internal carriage return, so that tab stops continue to line up. */
540
541 void
output_1_line(char const * base,char const * limit,char const * flag_format,char const * line_flag)542 output_1_line (char const *base, char const *limit, char const *flag_format,
543 char const *line_flag)
544 {
545 if (!expand_tabs)
546 fwrite (base, sizeof (char), limit - base, outfile);
547 else
548 {
549 register FILE *out = outfile;
550 register unsigned char c;
551 register char const *t = base;
552 register size_t column = 0;
553 size_t tab_size = tabsize;
554
555 while (t < limit)
556 switch ((c = *t++))
557 {
558 case '\t':
559 {
560 size_t spaces = tab_size - column % tab_size;
561 column += spaces;
562 do
563 putc (' ', out);
564 while (--spaces);
565 }
566 break;
567
568 case '\r':
569 putc (c, out);
570 if (flag_format && t < limit && *t != '\n')
571 fprintf (out, flag_format, line_flag);
572 column = 0;
573 break;
574
575 case '\b':
576 if (column == 0)
577 continue;
578 column--;
579 putc (c, out);
580 break;
581
582 default:
583 column += isprint (c) != 0;
584 putc (c, out);
585 break;
586 }
587 }
588 }
589
590 char const change_letter[] = { 0, 'd', 'a', 'c' };
591
592 /* Translate an internal line number (an index into diff's table of lines)
593 into an actual line number in the input file.
594 The internal line number is I. FILE points to the data on the file.
595
596 Internal line numbers count from 0 starting after the prefix.
597 Actual line numbers count from 1 within the entire file. */
598
599 lin
translate_line_number(struct file_data const * file,lin i)600 translate_line_number (struct file_data const *file, lin i)
601 {
602 return i + file->prefix_lines + 1;
603 }
604
605 /* Translate a line number range. This is always done for printing,
606 so for convenience translate to long int rather than lin, so that the
607 caller can use printf with "%ld" without casting. */
608
609 void
translate_range(struct file_data const * file,lin a,lin b,long int * aptr,long int * bptr)610 translate_range (struct file_data const *file,
611 lin a, lin b,
612 long int *aptr, long int *bptr)
613 {
614 *aptr = translate_line_number (file, a - 1) + 1;
615 *bptr = translate_line_number (file, b + 1) - 1;
616 }
617
618 /* Print a pair of line numbers with SEPCHAR, translated for file FILE.
619 If the two numbers are identical, print just one number.
620
621 Args A and B are internal line numbers.
622 We print the translated (real) line numbers. */
623
624 void
print_number_range(char sepchar,struct file_data * file,lin a,lin b)625 print_number_range (char sepchar, struct file_data *file, lin a, lin b)
626 {
627 long int trans_a, trans_b;
628 translate_range (file, a, b, &trans_a, &trans_b);
629
630 /* Note: we can have B < A in the case of a range of no lines.
631 In this case, we should print the line number before the range,
632 which is B. */
633 if (trans_b > trans_a)
634 fprintf (outfile, "%ld%c%ld", trans_a, sepchar, trans_b);
635 else
636 fprintf (outfile, "%ld", trans_b);
637 }
638
639 /* Look at a hunk of edit script and report the range of lines in each file
640 that it applies to. HUNK is the start of the hunk, which is a chain
641 of `struct change'. The first and last line numbers of file 0 are stored in
642 *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
643 Note that these are internal line numbers that count from 0.
644
645 If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
646
647 Return UNCHANGED if only ignorable lines are inserted or deleted,
648 OLD if lines of file 0 are deleted,
649 NEW if lines of file 1 are inserted,
650 and CHANGED if both kinds of changes are found. */
651
652 enum changes
analyze_hunk(struct change * hunk,lin * first0,lin * last0,lin * first1,lin * last1)653 analyze_hunk (struct change *hunk,
654 lin *first0, lin *last0,
655 lin *first1, lin *last1)
656 {
657 struct change *next;
658 lin l0, l1;
659 lin show_from, show_to;
660 lin i;
661 bool trivial = ignore_blank_lines || ignore_regexp.fastmap;
662 size_t trivial_length = ignore_blank_lines - 1;
663 /* If 0, ignore zero-length lines;
664 if SIZE_MAX, do not ignore lines just because of their length. */
665 bool skip_leading_white_space =
666 (ignore_blank_lines && IGNORE_SPACE_CHANGE <= ignore_white_space);
667
668 char const * const *linbuf0 = files[0].linbuf; /* Help the compiler. */
669 char const * const *linbuf1 = files[1].linbuf;
670
671 show_from = show_to = 0;
672
673 *first0 = hunk->line0;
674 *first1 = hunk->line1;
675
676 next = hunk;
677 do
678 {
679 l0 = next->line0 + next->deleted - 1;
680 l1 = next->line1 + next->inserted - 1;
681 show_from += next->deleted;
682 show_to += next->inserted;
683
684 for (i = next->line0; i <= l0 && trivial; i++)
685 {
686 char const *line = linbuf0[i];
687 char const *newline = linbuf0[i + 1] - 1;
688 size_t len = newline - line;
689 char const *p = line;
690 if (skip_leading_white_space)
691 while (isspace ((unsigned char) *p) && *p != '\n')
692 p++;
693 if (newline - p != trivial_length
694 && (! ignore_regexp.fastmap
695 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
696 trivial = 0;
697 }
698
699 for (i = next->line1; i <= l1 && trivial; i++)
700 {
701 char const *line = linbuf1[i];
702 char const *newline = linbuf1[i + 1] - 1;
703 size_t len = newline - line;
704 char const *p = line;
705 if (skip_leading_white_space)
706 while (isspace ((unsigned char) *p) && *p != '\n')
707 p++;
708 if (newline - p != trivial_length
709 && (! ignore_regexp.fastmap
710 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
711 trivial = 0;
712 }
713 }
714 while ((next = next->link) != 0);
715
716 *last0 = l0;
717 *last1 = l1;
718
719 /* If all inserted or deleted lines are ignorable,
720 tell the caller to ignore this hunk. */
721
722 if (trivial)
723 return UNCHANGED;
724
725 return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
726 }
727
728 /* Concatenate three strings, returning a newly malloc'd string. */
729
730 char *
concat(char const * s1,char const * s2,char const * s3)731 concat (char const *s1, char const *s2, char const *s3)
732 {
733 char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1);
734 sprintf (new, "%s%s%s", s1, s2, s3);
735 return new;
736 }
737
738 /* Yield a new block of SIZE bytes, initialized to zero. */
739
740 void *
zalloc(size_t size)741 zalloc (size_t size)
742 {
743 void *p = xmalloc (size);
744 memset (p, 0, size);
745 return p;
746 }
747
748 /* Yield the newly malloc'd pathname
749 of the file in DIR whose filename is FILE. */
750
751 char *
dir_file_pathname(char const * dir,char const * file)752 dir_file_pathname (char const *dir, char const *file)
753 {
754 char const *base = base_name (dir);
755 bool omit_slash = !*base || base[strlen (base) - 1] == '/';
756 return concat (dir, "/" + omit_slash, file);
757 }
758
759 void
debug_script(struct change * sp)760 debug_script (struct change *sp)
761 {
762 fflush (stdout);
763
764 for (; sp; sp = sp->link)
765 {
766 long int line0 = sp->line0;
767 long int line1 = sp->line1;
768 long int deleted = sp->deleted;
769 long int inserted = sp->inserted;
770 fprintf (stderr, "%3ld %3ld delete %ld insert %ld\n",
771 line0, line1, deleted, inserted);
772 }
773
774 fflush (stderr);
775 }
776