1 /* Produce a unidiff output from a diff_result. */
2 /*
3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18 #include <errno.h>
19 #include <stdbool.h>
20 #include <stdint.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <assert.h>
25
26 #include <arraylist.h>
27 #include <diff_main.h>
28 #include <diff_output.h>
29
30 #include "diff_internal.h"
31 #include "diff_debug.h"
32
33 off_t
diff_chunk_get_left_start_pos(const struct diff_chunk * c)34 diff_chunk_get_left_start_pos(const struct diff_chunk *c)
35 {
36 return c->left_start->pos;
37 }
38
39 off_t
diff_chunk_get_right_start_pos(const struct diff_chunk * c)40 diff_chunk_get_right_start_pos(const struct diff_chunk *c)
41 {
42 return c->right_start->pos;
43 }
44
45 bool
diff_chunk_context_empty(const struct diff_chunk_context * cc)46 diff_chunk_context_empty(const struct diff_chunk_context *cc)
47 {
48 return diff_range_empty(&cc->chunk);
49 }
50
51 int
diff_chunk_get_left_start(const struct diff_chunk * c,const struct diff_result * r,int context_lines)52 diff_chunk_get_left_start(const struct diff_chunk *c,
53 const struct diff_result *r, int context_lines)
54 {
55 int left_start = diff_atom_root_idx(r->left, c->left_start);
56 return MAX(0, left_start - context_lines);
57 }
58
59 int
diff_chunk_get_left_end(const struct diff_chunk * c,const struct diff_result * r,int context_lines)60 diff_chunk_get_left_end(const struct diff_chunk *c,
61 const struct diff_result *r, int context_lines)
62 {
63 int left_start = diff_chunk_get_left_start(c, r, 0);
64 return MIN(r->left->atoms.len,
65 left_start + c->left_count + context_lines);
66 }
67
68 int
diff_chunk_get_right_start(const struct diff_chunk * c,const struct diff_result * r,int context_lines)69 diff_chunk_get_right_start(const struct diff_chunk *c,
70 const struct diff_result *r, int context_lines)
71 {
72 int right_start = diff_atom_root_idx(r->right, c->right_start);
73 return MAX(0, right_start - context_lines);
74 }
75
76 int
diff_chunk_get_right_end(const struct diff_chunk * c,const struct diff_result * r,int context_lines)77 diff_chunk_get_right_end(const struct diff_chunk *c,
78 const struct diff_result *r, int context_lines)
79 {
80 int right_start = diff_chunk_get_right_start(c, r, 0);
81 return MIN(r->right->atoms.len,
82 right_start + c->right_count + context_lines);
83 }
84
85 struct diff_chunk *
diff_chunk_get(const struct diff_result * r,int chunk_idx)86 diff_chunk_get(const struct diff_result *r, int chunk_idx)
87 {
88 return &r->chunks.head[chunk_idx];
89 }
90
91 int
diff_chunk_get_left_count(struct diff_chunk * c)92 diff_chunk_get_left_count(struct diff_chunk *c)
93 {
94 return c->left_count;
95 }
96
97 int
diff_chunk_get_right_count(struct diff_chunk * c)98 diff_chunk_get_right_count(struct diff_chunk *c)
99 {
100 return c->right_count;
101 }
102
103 void
diff_chunk_context_get(struct diff_chunk_context * cc,const struct diff_result * r,int chunk_idx,int context_lines)104 diff_chunk_context_get(struct diff_chunk_context *cc, const struct diff_result *r,
105 int chunk_idx, int context_lines)
106 {
107 const struct diff_chunk *c = &r->chunks.head[chunk_idx];
108 int left_start = diff_chunk_get_left_start(c, r, context_lines);
109 int left_end = diff_chunk_get_left_end(c, r, context_lines);
110 int right_start = diff_chunk_get_right_start(c, r, context_lines);
111 int right_end = diff_chunk_get_right_end(c, r, context_lines);
112
113 *cc = (struct diff_chunk_context){
114 .chunk = {
115 .start = chunk_idx,
116 .end = chunk_idx + 1,
117 },
118 .left = {
119 .start = left_start,
120 .end = left_end,
121 },
122 .right = {
123 .start = right_start,
124 .end = right_end,
125 },
126 };
127 }
128
129 bool
diff_chunk_contexts_touch(const struct diff_chunk_context * cc,const struct diff_chunk_context * other)130 diff_chunk_contexts_touch(const struct diff_chunk_context *cc,
131 const struct diff_chunk_context *other)
132 {
133 return diff_ranges_touch(&cc->chunk, &other->chunk)
134 || diff_ranges_touch(&cc->left, &other->left)
135 || diff_ranges_touch(&cc->right, &other->right);
136 }
137
138 void
diff_chunk_contexts_merge(struct diff_chunk_context * cc,const struct diff_chunk_context * other)139 diff_chunk_contexts_merge(struct diff_chunk_context *cc,
140 const struct diff_chunk_context *other)
141 {
142 diff_ranges_merge(&cc->chunk, &other->chunk);
143 diff_ranges_merge(&cc->left, &other->left);
144 diff_ranges_merge(&cc->right, &other->right);
145 }
146
147 void
diff_chunk_context_load_change(struct diff_chunk_context * cc,int * nchunks_used,struct diff_result * result,int start_chunk_idx,int context_lines)148 diff_chunk_context_load_change(struct diff_chunk_context *cc,
149 int *nchunks_used,
150 struct diff_result *result,
151 int start_chunk_idx,
152 int context_lines)
153 {
154 int i;
155 int seen_minus = 0, seen_plus = 0;
156
157 if (nchunks_used)
158 *nchunks_used = 0;
159
160 for (i = start_chunk_idx; i < result->chunks.len; i++) {
161 struct diff_chunk *chunk = &result->chunks.head[i];
162 enum diff_chunk_type t = diff_chunk_type(chunk);
163 struct diff_chunk_context next;
164
165 if (t != CHUNK_MINUS && t != CHUNK_PLUS) {
166 if (nchunks_used)
167 (*nchunks_used)++;
168 if (seen_minus || seen_plus)
169 break;
170 else
171 continue;
172 } else if (t == CHUNK_MINUS)
173 seen_minus = 1;
174 else if (t == CHUNK_PLUS)
175 seen_plus = 1;
176
177 if (diff_chunk_context_empty(cc)) {
178 /* Note down the start point, any number of subsequent
179 * chunks may be joined up to this chunk by being
180 * directly adjacent. */
181 diff_chunk_context_get(cc, result, i, context_lines);
182 if (nchunks_used)
183 (*nchunks_used)++;
184 continue;
185 }
186
187 /* There already is a previous chunk noted down for being
188 * printed. Does it join up with this one? */
189 diff_chunk_context_get(&next, result, i, context_lines);
190
191 if (diff_chunk_contexts_touch(cc, &next)) {
192 /* This next context touches or overlaps the previous
193 * one, join. */
194 diff_chunk_contexts_merge(cc, &next);
195 if (nchunks_used)
196 (*nchunks_used)++;
197 continue;
198 } else
199 break;
200 }
201 }
202
203 struct diff_output_unidiff_state {
204 bool header_printed;
205 char prototype[DIFF_FUNCTION_CONTEXT_SIZE];
206 int last_prototype_idx;
207 };
208
209 struct diff_output_unidiff_state *
diff_output_unidiff_state_alloc(void)210 diff_output_unidiff_state_alloc(void)
211 {
212 struct diff_output_unidiff_state *state;
213
214 state = calloc(1, sizeof(struct diff_output_unidiff_state));
215 if (state != NULL)
216 diff_output_unidiff_state_reset(state);
217 return state;
218 }
219
220 void
diff_output_unidiff_state_reset(struct diff_output_unidiff_state * state)221 diff_output_unidiff_state_reset(struct diff_output_unidiff_state *state)
222 {
223 state->header_printed = false;
224 memset(state->prototype, 0, sizeof(state->prototype));
225 state->last_prototype_idx = 0;
226 }
227
228 void
diff_output_unidiff_state_free(struct diff_output_unidiff_state * state)229 diff_output_unidiff_state_free(struct diff_output_unidiff_state *state)
230 {
231 free(state);
232 }
233
234 static int
output_unidiff_chunk(struct diff_output_info * outinfo,FILE * dest,struct diff_output_unidiff_state * state,const struct diff_input_info * info,const struct diff_result * result,bool print_header,bool show_function_prototypes,const struct diff_chunk_context * cc)235 output_unidiff_chunk(struct diff_output_info *outinfo, FILE *dest,
236 struct diff_output_unidiff_state *state,
237 const struct diff_input_info *info,
238 const struct diff_result *result,
239 bool print_header, bool show_function_prototypes,
240 const struct diff_chunk_context *cc)
241 {
242 int rc, left_start, left_len, right_start, right_len;
243 off_t outoff = 0, *offp;
244 uint8_t *typep;
245
246 if (diff_range_empty(&cc->left) && diff_range_empty(&cc->right))
247 return DIFF_RC_OK;
248
249 if (outinfo && outinfo->line_offsets.len > 0) {
250 unsigned int idx = outinfo->line_offsets.len - 1;
251 outoff = outinfo->line_offsets.head[idx];
252 }
253
254 if (print_header && !(state->header_printed)) {
255 rc = fprintf(dest, "--- %s\n",
256 diff_output_get_label_left(info));
257 if (rc < 0)
258 return errno;
259 if (outinfo) {
260 ARRAYLIST_ADD(offp, outinfo->line_offsets);
261 if (offp == NULL)
262 return ENOMEM;
263 outoff += rc;
264 *offp = outoff;
265 ARRAYLIST_ADD(typep, outinfo->line_types);
266 if (typep == NULL)
267 return ENOMEM;
268 *typep = DIFF_LINE_MINUS;
269 }
270 rc = fprintf(dest, "+++ %s\n",
271 diff_output_get_label_right(info));
272 if (rc < 0)
273 return errno;
274 if (outinfo) {
275 ARRAYLIST_ADD(offp, outinfo->line_offsets);
276 if (offp == NULL)
277 return ENOMEM;
278 outoff += rc;
279 *offp = outoff;
280 ARRAYLIST_ADD(typep, outinfo->line_types);
281 if (typep == NULL)
282 return ENOMEM;
283 *typep = DIFF_LINE_PLUS;
284 }
285 state->header_printed = true;
286 }
287
288 left_len = cc->left.end - cc->left.start;
289 if (result->left->atoms.len == 0)
290 left_start = 0;
291 else if (left_len == 0 && cc->left.start > 0)
292 left_start = cc->left.start;
293 else
294 left_start = cc->left.start + 1;
295
296 right_len = cc->right.end - cc->right.start;
297 if (result->right->atoms.len == 0)
298 right_start = 0;
299 else if (right_len == 0 && cc->right.start > 0)
300 right_start = cc->right.start;
301 else
302 right_start = cc->right.start + 1;
303
304 /* Got the absolute line numbers where to start printing, and the index
305 * of the interesting (non-context) chunk.
306 * To print context lines above the interesting chunk, nipping on the
307 * previous chunk index may be necessary.
308 * It is guaranteed to be only context lines where left == right, so it
309 * suffices to look on the left. */
310 const struct diff_chunk *first_chunk;
311 int chunk_start_line;
312 first_chunk = &result->chunks.head[cc->chunk.start];
313 chunk_start_line = diff_atom_root_idx(result->left,
314 first_chunk->left_start);
315 if (show_function_prototypes) {
316 rc = diff_output_match_function_prototype(state->prototype,
317 sizeof(state->prototype), &state->last_prototype_idx,
318 result, chunk_start_line);
319 if (rc)
320 return rc;
321 }
322
323 if (left_len == 1 && right_len == 1) {
324 rc = fprintf(dest, "@@ -%d +%d @@%s%s\n",
325 left_start, right_start,
326 state->prototype[0] ? " " : "",
327 state->prototype[0] ? state->prototype : "");
328 } else if (left_len == 1 && right_len != 1) {
329 rc = fprintf(dest, "@@ -%d +%d,%d @@%s%s\n",
330 left_start, right_start, right_len,
331 state->prototype[0] ? " " : "",
332 state->prototype[0] ? state->prototype : "");
333 } else if (left_len != 1 && right_len == 1) {
334 rc = fprintf(dest, "@@ -%d,%d +%d @@%s%s\n",
335 left_start, left_len, right_start,
336 state->prototype[0] ? " " : "",
337 state->prototype[0] ? state->prototype : "");
338 } else {
339 rc = fprintf(dest, "@@ -%d,%d +%d,%d @@%s%s\n",
340 left_start, left_len, right_start, right_len,
341 state->prototype[0] ? " " : "",
342 state->prototype[0] ? state->prototype : "");
343 }
344 if (rc < 0)
345 return errno;
346 if (outinfo) {
347 ARRAYLIST_ADD(offp, outinfo->line_offsets);
348 if (offp == NULL)
349 return ENOMEM;
350 outoff += rc;
351 *offp = outoff;
352 ARRAYLIST_ADD(typep, outinfo->line_types);
353 if (typep == NULL)
354 return ENOMEM;
355 *typep = DIFF_LINE_HUNK;
356 }
357
358 if (cc->left.start < chunk_start_line) {
359 rc = diff_output_lines(outinfo, dest, " ",
360 &result->left->atoms.head[cc->left.start],
361 chunk_start_line - cc->left.start);
362 if (rc)
363 return rc;
364 }
365
366 /* Now write out all the joined chunks and contexts between them */
367 int c_idx;
368 for (c_idx = cc->chunk.start; c_idx < cc->chunk.end; c_idx++) {
369 const struct diff_chunk *c = &result->chunks.head[c_idx];
370
371 if (c->left_count && c->right_count)
372 rc = diff_output_lines(outinfo, dest,
373 c->solved ? " " : "?",
374 c->left_start, c->left_count);
375 else if (c->left_count && !c->right_count)
376 rc = diff_output_lines(outinfo, dest,
377 c->solved ? "-" : "?",
378 c->left_start, c->left_count);
379 else if (c->right_count && !c->left_count)
380 rc = diff_output_lines(outinfo, dest,
381 c->solved ? "+" : "?",
382 c->right_start, c->right_count);
383 if (rc)
384 return rc;
385
386 if (cc->chunk.end == result->chunks.len) {
387 rc = diff_output_trailing_newline_msg(outinfo, dest, c);
388 if (rc != DIFF_RC_OK)
389 return rc;
390 }
391 }
392
393 /* Trailing context? */
394 const struct diff_chunk *last_chunk;
395 int chunk_end_line;
396 last_chunk = &result->chunks.head[cc->chunk.end - 1];
397 chunk_end_line = diff_atom_root_idx(result->left,
398 last_chunk->left_start
399 + last_chunk->left_count);
400 if (cc->left.end > chunk_end_line) {
401 rc = diff_output_lines(outinfo, dest, " ",
402 &result->left->atoms.head[chunk_end_line],
403 cc->left.end - chunk_end_line);
404 if (rc)
405 return rc;
406
407 if (cc->left.end == result->left->atoms.len) {
408 rc = diff_output_trailing_newline_msg(outinfo, dest,
409 &result->chunks.head[result->chunks.len - 1]);
410 if (rc != DIFF_RC_OK)
411 return rc;
412 }
413 }
414
415 return DIFF_RC_OK;
416 }
417
418 int
diff_output_unidiff_chunk(struct diff_output_info ** output_info,FILE * dest,struct diff_output_unidiff_state * state,const struct diff_input_info * info,const struct diff_result * result,const struct diff_chunk_context * cc)419 diff_output_unidiff_chunk(struct diff_output_info **output_info, FILE *dest,
420 struct diff_output_unidiff_state *state,
421 const struct diff_input_info *info,
422 const struct diff_result *result,
423 const struct diff_chunk_context *cc)
424 {
425 struct diff_output_info *outinfo = NULL;
426 int flags = (result->left->root->diff_flags |
427 result->right->root->diff_flags);
428 bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES);
429
430 if (output_info) {
431 *output_info = diff_output_info_alloc();
432 if (*output_info == NULL)
433 return ENOMEM;
434 outinfo = *output_info;
435 }
436
437 return output_unidiff_chunk(outinfo, dest, state, info,
438 result, false, show_function_prototypes, cc);
439 }
440
441 int
diff_output_unidiff(struct diff_output_info ** output_info,FILE * dest,const struct diff_input_info * info,const struct diff_result * result,unsigned int context_lines)442 diff_output_unidiff(struct diff_output_info **output_info,
443 FILE *dest, const struct diff_input_info *info,
444 const struct diff_result *result,
445 unsigned int context_lines)
446 {
447 struct diff_output_unidiff_state *state;
448 struct diff_chunk_context cc = {};
449 struct diff_output_info *outinfo = NULL;
450 int atomizer_flags = (result->left->atomizer_flags|
451 result->right->atomizer_flags);
452 int flags = (result->left->root->diff_flags |
453 result->right->root->diff_flags);
454 bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES);
455 bool force_text = (flags & DIFF_FLAG_FORCE_TEXT_DATA);
456 bool have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
457 off_t outoff = 0, *offp;
458 uint8_t *typep;
459 int rc, i;
460
461 if (!result)
462 return EINVAL;
463 if (result->rc != DIFF_RC_OK)
464 return result->rc;
465
466 if (output_info) {
467 *output_info = diff_output_info_alloc();
468 if (*output_info == NULL)
469 return ENOMEM;
470 outinfo = *output_info;
471 }
472
473 if (have_binary && !force_text) {
474 for (i = 0; i < result->chunks.len; i++) {
475 struct diff_chunk *c = &result->chunks.head[i];
476 enum diff_chunk_type t = diff_chunk_type(c);
477
478 if (t != CHUNK_MINUS && t != CHUNK_PLUS)
479 continue;
480
481 if (outinfo && outinfo->line_offsets.len > 0) {
482 unsigned int idx =
483 outinfo->line_offsets.len - 1;
484 outoff = outinfo->line_offsets.head[idx];
485 }
486
487 rc = fprintf(dest, "Binary files %s and %s differ\n",
488 diff_output_get_label_left(info),
489 diff_output_get_label_right(info));
490 if (outinfo) {
491 ARRAYLIST_ADD(offp, outinfo->line_offsets);
492 if (offp == NULL)
493 return ENOMEM;
494 outoff += rc;
495 *offp = outoff;
496 ARRAYLIST_ADD(typep, outinfo->line_types);
497 if (typep == NULL)
498 return ENOMEM;
499 *typep = DIFF_LINE_NONE;
500 }
501 break;
502 }
503
504 return DIFF_RC_OK;
505 }
506
507 state = diff_output_unidiff_state_alloc();
508 if (state == NULL) {
509 if (output_info) {
510 diff_output_info_free(*output_info);
511 *output_info = NULL;
512 }
513 return ENOMEM;
514 }
515
516 #if DEBUG
517 unsigned int check_left_pos, check_right_pos;
518 check_left_pos = 0;
519 check_right_pos = 0;
520 for (i = 0; i < result->chunks.len; i++) {
521 struct diff_chunk *c = &result->chunks.head[i];
522 enum diff_chunk_type t = diff_chunk_type(c);
523
524 debug("[%d] %s lines L%d R%d @L %d @R %d\n",
525 i, (t == CHUNK_MINUS ? "minus" :
526 (t == CHUNK_PLUS ? "plus" :
527 (t == CHUNK_SAME ? "same" : "?"))),
528 c->left_count,
529 c->right_count,
530 c->left_start ? diff_atom_root_idx(result->left, c->left_start) : -1,
531 c->right_start ? diff_atom_root_idx(result->right, c->right_start) : -1);
532 assert(check_left_pos == diff_atom_root_idx(result->left, c->left_start));
533 assert(check_right_pos == diff_atom_root_idx(result->right, c->right_start));
534 check_left_pos += c->left_count;
535 check_right_pos += c->right_count;
536
537 }
538 assert(check_left_pos == result->left->atoms.len);
539 assert(check_right_pos == result->right->atoms.len);
540 #endif
541
542 for (i = 0; i < result->chunks.len; i++) {
543 struct diff_chunk *c = &result->chunks.head[i];
544 enum diff_chunk_type t = diff_chunk_type(c);
545 struct diff_chunk_context next;
546
547 if (t != CHUNK_MINUS && t != CHUNK_PLUS)
548 continue;
549
550 if (diff_chunk_context_empty(&cc)) {
551 /* These are the first lines being printed.
552 * Note down the start point, any number of subsequent
553 * chunks may be joined up to this unidiff chunk by
554 * context lines or by being directly adjacent. */
555 diff_chunk_context_get(&cc, result, i, context_lines);
556 debug("new chunk to be printed:"
557 " chunk %d-%d left %d-%d right %d-%d\n",
558 cc.chunk.start, cc.chunk.end,
559 cc.left.start, cc.left.end,
560 cc.right.start, cc.right.end);
561 continue;
562 }
563
564 /* There already is a previous chunk noted down for being
565 * printed. Does it join up with this one? */
566 diff_chunk_context_get(&next, result, i, context_lines);
567 debug("new chunk to be printed:"
568 " chunk %d-%d left %d-%d right %d-%d\n",
569 next.chunk.start, next.chunk.end,
570 next.left.start, next.left.end,
571 next.right.start, next.right.end);
572
573 if (diff_chunk_contexts_touch(&cc, &next)) {
574 /* This next context touches or overlaps the previous
575 * one, join. */
576 diff_chunk_contexts_merge(&cc, &next);
577 debug("new chunk to be printed touches previous chunk,"
578 " now: left %d-%d right %d-%d\n",
579 cc.left.start, cc.left.end,
580 cc.right.start, cc.right.end);
581 continue;
582 }
583
584 /* No touching, so the previous context is complete with a gap
585 * between it and this next one. Print the previous one and
586 * start fresh here. */
587 debug("new chunk to be printed does not touch previous chunk;"
588 " print left %d-%d right %d-%d\n",
589 cc.left.start, cc.left.end, cc.right.start, cc.right.end);
590 output_unidiff_chunk(outinfo, dest, state, info, result,
591 true, show_function_prototypes, &cc);
592 cc = next;
593 debug("new unprinted chunk is left %d-%d right %d-%d\n",
594 cc.left.start, cc.left.end, cc.right.start, cc.right.end);
595 }
596
597 if (!diff_chunk_context_empty(&cc))
598 output_unidiff_chunk(outinfo, dest, state, info, result,
599 true, show_function_prototypes, &cc);
600 diff_output_unidiff_state_free(state);
601 return DIFF_RC_OK;
602 }
603