xref: /freebsd/contrib/libdiff/lib/diff_output_unidiff.c (revision 1b1e392aed4957a38c49599512b4f65b844a0772)
1  /* Produce a unidiff output from a diff_result. */
2  /*
3   * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4   *
5   * Permission to use, copy, modify, and distribute this software for any
6   * purpose with or without fee is hereby granted, provided that the above
7   * copyright notice and this permission notice appear in all copies.
8   *
9   * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10   * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11   * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12   * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13   * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14   * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16   */
17  
18  #include <errno.h>
19  #include <stdbool.h>
20  #include <stdint.h>
21  #include <stdio.h>
22  #include <stdlib.h>
23  #include <string.h>
24  #include <assert.h>
25  
26  #include <arraylist.h>
27  #include <diff_main.h>
28  #include <diff_output.h>
29  
30  #include "diff_internal.h"
31  #include "diff_debug.h"
32  
33  off_t
34  diff_chunk_get_left_start_pos(const struct diff_chunk *c)
35  {
36  	return c->left_start->pos;
37  }
38  
39  off_t
40  diff_chunk_get_right_start_pos(const struct diff_chunk *c)
41  {
42  	return c->right_start->pos;
43  }
44  
45  bool
46  diff_chunk_context_empty(const struct diff_chunk_context *cc)
47  {
48  	return diff_range_empty(&cc->chunk);
49  }
50  
51  int
52  diff_chunk_get_left_start(const struct diff_chunk *c,
53      const struct diff_result *r, int context_lines)
54  {
55  	int left_start = diff_atom_root_idx(r->left, c->left_start);
56  	return MAX(0, left_start - context_lines);
57  }
58  
59  int
60  diff_chunk_get_left_end(const struct diff_chunk *c,
61      const struct diff_result *r, int context_lines)
62  {
63  	int left_start = diff_chunk_get_left_start(c, r, 0);
64  	return MIN(r->left->atoms.len,
65  	    left_start + c->left_count + context_lines);
66  }
67  
68  int
69  diff_chunk_get_right_start(const struct diff_chunk *c,
70      const struct diff_result *r, int context_lines)
71  {
72  	int right_start = diff_atom_root_idx(r->right, c->right_start);
73  	return MAX(0, right_start - context_lines);
74  }
75  
76  int
77  diff_chunk_get_right_end(const struct diff_chunk *c,
78      const struct diff_result *r, int context_lines)
79  {
80  	int right_start = diff_chunk_get_right_start(c, r, 0);
81  	return MIN(r->right->atoms.len,
82  	    right_start + c->right_count + context_lines);
83  }
84  
85  struct diff_chunk *
86  diff_chunk_get(const struct diff_result *r, int chunk_idx)
87  {
88  	return &r->chunks.head[chunk_idx];
89  }
90  
91  int
92  diff_chunk_get_left_count(struct diff_chunk *c)
93  {
94  	return c->left_count;
95  }
96  
97  int
98  diff_chunk_get_right_count(struct diff_chunk *c)
99  {
100  	return c->right_count;
101  }
102  
103  void
104  diff_chunk_context_get(struct diff_chunk_context *cc, const struct diff_result *r,
105  		  int chunk_idx, int context_lines)
106  {
107  	const struct diff_chunk *c = &r->chunks.head[chunk_idx];
108  	int left_start = diff_chunk_get_left_start(c, r, context_lines);
109  	int left_end = diff_chunk_get_left_end(c, r, context_lines);
110  	int right_start = diff_chunk_get_right_start(c, r, context_lines);
111  	int right_end = diff_chunk_get_right_end(c, r,  context_lines);
112  
113  	*cc = (struct diff_chunk_context){
114  		.chunk = {
115  			.start = chunk_idx,
116  			.end = chunk_idx + 1,
117  		},
118  		.left = {
119  			.start = left_start,
120  			.end = left_end,
121  		},
122  		.right = {
123  			.start = right_start,
124  			.end = right_end,
125  		},
126  	};
127  }
128  
129  bool
130  diff_chunk_contexts_touch(const struct diff_chunk_context *cc,
131  			  const struct diff_chunk_context *other)
132  {
133  	return diff_ranges_touch(&cc->chunk, &other->chunk)
134  		|| diff_ranges_touch(&cc->left, &other->left)
135  		|| diff_ranges_touch(&cc->right, &other->right);
136  }
137  
138  void
139  diff_chunk_contexts_merge(struct diff_chunk_context *cc,
140  			  const struct diff_chunk_context *other)
141  {
142  	diff_ranges_merge(&cc->chunk, &other->chunk);
143  	diff_ranges_merge(&cc->left, &other->left);
144  	diff_ranges_merge(&cc->right, &other->right);
145  }
146  
147  void
148  diff_chunk_context_load_change(struct diff_chunk_context *cc,
149  			       int *nchunks_used,
150  			       struct diff_result *result,
151  			       int start_chunk_idx,
152  			       int context_lines)
153  {
154  	int i;
155  	int seen_minus = 0, seen_plus = 0;
156  
157  	if (nchunks_used)
158  		*nchunks_used = 0;
159  
160  	for (i = start_chunk_idx; i < result->chunks.len; i++) {
161  		struct diff_chunk *chunk = &result->chunks.head[i];
162  		enum diff_chunk_type t = diff_chunk_type(chunk);
163  		struct diff_chunk_context next;
164  
165  		if (t != CHUNK_MINUS && t != CHUNK_PLUS) {
166  			if (nchunks_used)
167  				(*nchunks_used)++;
168  			if (seen_minus || seen_plus)
169  				break;
170  			else
171  				continue;
172  		} else if (t == CHUNK_MINUS)
173  			seen_minus = 1;
174  		else if (t == CHUNK_PLUS)
175  			seen_plus = 1;
176  
177  		if (diff_chunk_context_empty(cc)) {
178  			/* Note down the start point, any number of subsequent
179  			 * chunks may be joined up to this chunk by being
180  			 * directly adjacent. */
181  			diff_chunk_context_get(cc, result, i, context_lines);
182  			if (nchunks_used)
183  				(*nchunks_used)++;
184  			continue;
185  		}
186  
187  		/* There already is a previous chunk noted down for being
188  		 * printed. Does it join up with this one? */
189  		diff_chunk_context_get(&next, result, i, context_lines);
190  
191  		if (diff_chunk_contexts_touch(cc, &next)) {
192  			/* This next context touches or overlaps the previous
193  			 * one, join. */
194  			diff_chunk_contexts_merge(cc, &next);
195  			if (nchunks_used)
196  				(*nchunks_used)++;
197  			continue;
198  		} else
199  			break;
200  	}
201  }
202  
203  struct diff_output_unidiff_state {
204  	bool header_printed;
205  	char prototype[DIFF_FUNCTION_CONTEXT_SIZE];
206  	int last_prototype_idx;
207  };
208  
209  struct diff_output_unidiff_state *
210  diff_output_unidiff_state_alloc(void)
211  {
212  	struct diff_output_unidiff_state *state;
213  
214  	state = calloc(1, sizeof(struct diff_output_unidiff_state));
215  	if (state != NULL)
216  		diff_output_unidiff_state_reset(state);
217  	return state;
218  }
219  
220  void
221  diff_output_unidiff_state_reset(struct diff_output_unidiff_state *state)
222  {
223  	state->header_printed = false;
224  	memset(state->prototype, 0, sizeof(state->prototype));
225  	state->last_prototype_idx = 0;
226  }
227  
228  void
229  diff_output_unidiff_state_free(struct diff_output_unidiff_state *state)
230  {
231  	free(state);
232  }
233  
234  static int
235  output_unidiff_chunk(struct diff_output_info *outinfo, FILE *dest,
236  		     struct diff_output_unidiff_state *state,
237  		     const struct diff_input_info *info,
238  		     const struct diff_result *result,
239  		     bool print_header, bool show_function_prototypes,
240  		     const struct diff_chunk_context *cc)
241  {
242  	int rc, left_start, left_len, right_start, right_len;
243  	off_t outoff = 0, *offp;
244  	uint8_t *typep;
245  
246  	if (diff_range_empty(&cc->left) && diff_range_empty(&cc->right))
247  		return DIFF_RC_OK;
248  
249  	if (outinfo && outinfo->line_offsets.len > 0) {
250  		unsigned int idx = outinfo->line_offsets.len - 1;
251  		outoff = outinfo->line_offsets.head[idx];
252  	}
253  
254  	if (print_header && !(state->header_printed)) {
255  		rc = fprintf(dest, "--- %s\n",
256  		    diff_output_get_label_left(info));
257  		if (rc < 0)
258  			return errno;
259  		if (outinfo) {
260  			ARRAYLIST_ADD(offp, outinfo->line_offsets);
261  			if (offp == NULL)
262  				return ENOMEM;
263  			outoff += rc;
264  			*offp = outoff;
265  			ARRAYLIST_ADD(typep, outinfo->line_types);
266  			if (typep == NULL)
267  				return ENOMEM;
268  			*typep = DIFF_LINE_MINUS;
269  		}
270  		rc = fprintf(dest, "+++ %s\n",
271  		    diff_output_get_label_right(info));
272  		if (rc < 0)
273  			return errno;
274  		if (outinfo) {
275  			ARRAYLIST_ADD(offp, outinfo->line_offsets);
276  			if (offp == NULL)
277  				return ENOMEM;
278  			outoff += rc;
279  			*offp = outoff;
280  			ARRAYLIST_ADD(typep, outinfo->line_types);
281  			if (typep == NULL)
282  				return ENOMEM;
283  			*typep = DIFF_LINE_PLUS;
284  		}
285  		state->header_printed = true;
286  	}
287  
288  	left_len = cc->left.end - cc->left.start;
289  	if (result->left->atoms.len == 0)
290  		left_start = 0;
291  	else if (left_len == 0 && cc->left.start > 0)
292  		left_start = cc->left.start;
293  	else
294  		left_start = cc->left.start + 1;
295  
296  	right_len = cc->right.end - cc->right.start;
297  	if (result->right->atoms.len == 0)
298  		right_start = 0;
299  	else if (right_len == 0 && cc->right.start > 0)
300  		right_start = cc->right.start;
301  	else
302  		right_start = cc->right.start + 1;
303  
304  	/* Got the absolute line numbers where to start printing, and the index
305  	 * of the interesting (non-context) chunk.
306  	 * To print context lines above the interesting chunk, nipping on the
307  	 * previous chunk index may be necessary.
308  	 * It is guaranteed to be only context lines where left == right, so it
309  	 * suffices to look on the left. */
310  	const struct diff_chunk *first_chunk;
311  	int chunk_start_line;
312  	first_chunk = &result->chunks.head[cc->chunk.start];
313  	chunk_start_line = diff_atom_root_idx(result->left,
314  					      first_chunk->left_start);
315  	if (show_function_prototypes) {
316  		rc = diff_output_match_function_prototype(state->prototype,
317  		    sizeof(state->prototype), &state->last_prototype_idx,
318  		    result, chunk_start_line);
319  		if (rc)
320  			return rc;
321  	}
322  
323  	if (left_len == 1 && right_len == 1) {
324  		rc = fprintf(dest, "@@ -%d +%d @@%s%s\n",
325  			left_start, right_start,
326  			state->prototype[0] ? " " : "",
327  			state->prototype[0] ? state->prototype : "");
328  	} else if (left_len == 1 && right_len != 1) {
329  		rc = fprintf(dest, "@@ -%d +%d,%d @@%s%s\n",
330  			left_start, right_start, right_len,
331  			state->prototype[0] ? " " : "",
332  			state->prototype[0] ? state->prototype : "");
333  	} else if (left_len != 1 && right_len == 1) {
334  		rc = fprintf(dest, "@@ -%d,%d +%d @@%s%s\n",
335  			left_start, left_len, right_start,
336  			state->prototype[0] ? " " : "",
337  			state->prototype[0] ? state->prototype : "");
338  	} else {
339  		rc = fprintf(dest, "@@ -%d,%d +%d,%d @@%s%s\n",
340  			left_start, left_len, right_start, right_len,
341  			state->prototype[0] ? " " : "",
342  			state->prototype[0] ? state->prototype : "");
343  	}
344  	if (rc < 0)
345  		return errno;
346  	if (outinfo) {
347  		ARRAYLIST_ADD(offp, outinfo->line_offsets);
348  		if (offp == NULL)
349  			return ENOMEM;
350  		outoff += rc;
351  		*offp = outoff;
352  		ARRAYLIST_ADD(typep, outinfo->line_types);
353  		if (typep == NULL)
354  			return ENOMEM;
355  		*typep = DIFF_LINE_HUNK;
356  	}
357  
358  	if (cc->left.start < chunk_start_line) {
359  		rc = diff_output_lines(outinfo, dest, " ",
360  				  &result->left->atoms.head[cc->left.start],
361  				  chunk_start_line - cc->left.start);
362  		if (rc)
363  			return rc;
364  	}
365  
366  	/* Now write out all the joined chunks and contexts between them */
367  	int c_idx;
368  	for (c_idx = cc->chunk.start; c_idx < cc->chunk.end; c_idx++) {
369  		const struct diff_chunk *c = &result->chunks.head[c_idx];
370  
371  		if (c->left_count && c->right_count)
372  			rc = diff_output_lines(outinfo, dest,
373  					  c->solved ? " " : "?",
374  					  c->left_start, c->left_count);
375  		else if (c->left_count && !c->right_count)
376  			rc = diff_output_lines(outinfo, dest,
377  					  c->solved ? "-" : "?",
378  					  c->left_start, c->left_count);
379  		else if (c->right_count && !c->left_count)
380  			rc = diff_output_lines(outinfo, dest,
381  					  c->solved ? "+" : "?",
382  					  c->right_start, c->right_count);
383  		if (rc)
384  			return rc;
385  
386  		if (cc->chunk.end == result->chunks.len) {
387  			rc = diff_output_trailing_newline_msg(outinfo, dest, c);
388  			if (rc != DIFF_RC_OK)
389  				return rc;
390  		}
391  	}
392  
393  	/* Trailing context? */
394  	const struct diff_chunk *last_chunk;
395  	int chunk_end_line;
396  	last_chunk = &result->chunks.head[cc->chunk.end - 1];
397  	chunk_end_line = diff_atom_root_idx(result->left,
398  					    last_chunk->left_start
399  					    + last_chunk->left_count);
400  	if (cc->left.end > chunk_end_line) {
401  		rc = diff_output_lines(outinfo, dest, " ",
402  				  &result->left->atoms.head[chunk_end_line],
403  				  cc->left.end - chunk_end_line);
404  		if (rc)
405  			return rc;
406  
407  		if (cc->left.end == result->left->atoms.len) {
408  			rc = diff_output_trailing_newline_msg(outinfo, dest,
409  			    &result->chunks.head[result->chunks.len - 1]);
410  			if (rc != DIFF_RC_OK)
411  				return rc;
412  		}
413  	}
414  
415  	return DIFF_RC_OK;
416  }
417  
418  int
419  diff_output_unidiff_chunk(struct diff_output_info **output_info, FILE *dest,
420  			  struct diff_output_unidiff_state *state,
421  			  const struct diff_input_info *info,
422  			  const struct diff_result *result,
423  			  const struct diff_chunk_context *cc)
424  {
425  	struct diff_output_info *outinfo = NULL;
426  	int flags = (result->left->root->diff_flags |
427  	    result->right->root->diff_flags);
428  	bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES);
429  
430  	if (output_info) {
431  		*output_info = diff_output_info_alloc();
432  		if (*output_info == NULL)
433  			return ENOMEM;
434  		outinfo = *output_info;
435  	}
436  
437  	return output_unidiff_chunk(outinfo, dest, state, info,
438  	    result, false, show_function_prototypes, cc);
439  }
440  
441  int
442  diff_output_unidiff(struct diff_output_info **output_info,
443  		    FILE *dest, const struct diff_input_info *info,
444  		    const struct diff_result *result,
445  		    unsigned int context_lines)
446  {
447  	struct diff_output_unidiff_state *state;
448  	struct diff_chunk_context cc = {};
449  	struct diff_output_info *outinfo = NULL;
450  	int atomizer_flags = (result->left->atomizer_flags|
451  	    result->right->atomizer_flags);
452  	int flags = (result->left->root->diff_flags |
453  	    result->right->root->diff_flags);
454  	bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES);
455  	bool force_text = (flags & DIFF_FLAG_FORCE_TEXT_DATA);
456  	bool have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
457  	off_t outoff = 0, *offp;
458  	uint8_t *typep;
459  	int rc, i;
460  
461  	if (!result)
462  		return EINVAL;
463  	if (result->rc != DIFF_RC_OK)
464  		return result->rc;
465  
466  	if (output_info) {
467  		*output_info = diff_output_info_alloc();
468  		if (*output_info == NULL)
469  			return ENOMEM;
470  		outinfo = *output_info;
471  	}
472  
473  	if (have_binary && !force_text) {
474  		for (i = 0; i < result->chunks.len; i++) {
475  			struct diff_chunk *c = &result->chunks.head[i];
476  			enum diff_chunk_type t = diff_chunk_type(c);
477  
478  			if (t != CHUNK_MINUS && t != CHUNK_PLUS)
479  				continue;
480  
481  			if (outinfo && outinfo->line_offsets.len > 0) {
482  				unsigned int idx =
483  				    outinfo->line_offsets.len - 1;
484  				outoff = outinfo->line_offsets.head[idx];
485  			}
486  
487  			rc = fprintf(dest, "Binary files %s and %s differ\n",
488  			    diff_output_get_label_left(info),
489  			    diff_output_get_label_right(info));
490  			if (outinfo) {
491  				ARRAYLIST_ADD(offp, outinfo->line_offsets);
492  				if (offp == NULL)
493  					return ENOMEM;
494  				outoff += rc;
495  				*offp = outoff;
496  				ARRAYLIST_ADD(typep, outinfo->line_types);
497  				if (typep == NULL)
498  					return ENOMEM;
499  				*typep = DIFF_LINE_NONE;
500  			}
501  			break;
502  		}
503  
504  		return DIFF_RC_OK;
505  	}
506  
507  	state = diff_output_unidiff_state_alloc();
508  	if (state == NULL) {
509  		if (output_info) {
510  			diff_output_info_free(*output_info);
511  			*output_info = NULL;
512  		}
513  		return ENOMEM;
514  	}
515  
516  #if DEBUG
517  	unsigned int check_left_pos, check_right_pos;
518  	check_left_pos = 0;
519  	check_right_pos = 0;
520  	for (i = 0; i < result->chunks.len; i++) {
521  		struct diff_chunk *c = &result->chunks.head[i];
522  		enum diff_chunk_type t = diff_chunk_type(c);
523  
524  		debug("[%d] %s lines L%d R%d @L %d @R %d\n",
525  		      i, (t == CHUNK_MINUS ? "minus" :
526  			  (t == CHUNK_PLUS ? "plus" :
527  			   (t == CHUNK_SAME ? "same" : "?"))),
528  		      c->left_count,
529  		      c->right_count,
530  		      c->left_start ? diff_atom_root_idx(result->left, c->left_start) : -1,
531  		      c->right_start ? diff_atom_root_idx(result->right, c->right_start) : -1);
532  		assert(check_left_pos == diff_atom_root_idx(result->left, c->left_start));
533  		assert(check_right_pos == diff_atom_root_idx(result->right, c->right_start));
534  		check_left_pos += c->left_count;
535  		check_right_pos += c->right_count;
536  
537  	}
538  	assert(check_left_pos == result->left->atoms.len);
539  	assert(check_right_pos == result->right->atoms.len);
540  #endif
541  
542  	for (i = 0; i < result->chunks.len; i++) {
543  		struct diff_chunk *c = &result->chunks.head[i];
544  		enum diff_chunk_type t = diff_chunk_type(c);
545  		struct diff_chunk_context next;
546  
547  		if (t != CHUNK_MINUS && t != CHUNK_PLUS)
548  			continue;
549  
550  		if (diff_chunk_context_empty(&cc)) {
551  			/* These are the first lines being printed.
552  			 * Note down the start point, any number of subsequent
553  			 * chunks may be joined up to this unidiff chunk by
554  			 * context lines or by being directly adjacent. */
555  			diff_chunk_context_get(&cc, result, i, context_lines);
556  			debug("new chunk to be printed:"
557  			      " chunk %d-%d left %d-%d right %d-%d\n",
558  			      cc.chunk.start, cc.chunk.end,
559  			      cc.left.start, cc.left.end,
560  			      cc.right.start, cc.right.end);
561  			continue;
562  		}
563  
564  		/* There already is a previous chunk noted down for being
565  		 * printed. Does it join up with this one? */
566  		diff_chunk_context_get(&next, result, i, context_lines);
567  		debug("new chunk to be printed:"
568  		      " chunk %d-%d left %d-%d right %d-%d\n",
569  		      next.chunk.start, next.chunk.end,
570  		      next.left.start, next.left.end,
571  		      next.right.start, next.right.end);
572  
573  		if (diff_chunk_contexts_touch(&cc, &next)) {
574  			/* This next context touches or overlaps the previous
575  			 * one, join. */
576  			diff_chunk_contexts_merge(&cc, &next);
577  			debug("new chunk to be printed touches previous chunk,"
578  			      " now: left %d-%d right %d-%d\n",
579  			      cc.left.start, cc.left.end,
580  			      cc.right.start, cc.right.end);
581  			continue;
582  		}
583  
584  		/* No touching, so the previous context is complete with a gap
585  		 * between it and this next one. Print the previous one and
586  		 * start fresh here. */
587  		debug("new chunk to be printed does not touch previous chunk;"
588  		      " print left %d-%d right %d-%d\n",
589  		      cc.left.start, cc.left.end, cc.right.start, cc.right.end);
590  		output_unidiff_chunk(outinfo, dest, state, info, result,
591  		    true, show_function_prototypes, &cc);
592  		cc = next;
593  		debug("new unprinted chunk is left %d-%d right %d-%d\n",
594  		      cc.left.start, cc.left.end, cc.right.start, cc.right.end);
595  	}
596  
597  	if (!diff_chunk_context_empty(&cc))
598  		output_unidiff_chunk(outinfo, dest, state, info, result,
599  		    true, show_function_prototypes, &cc);
600  	diff_output_unidiff_state_free(state);
601  	return DIFF_RC_OK;
602  }
603