1 /* Produce a unidiff output from a diff_result. */ 2 /* 3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <errno.h> 19 #include <stdbool.h> 20 #include <stdint.h> 21 #include <stdio.h> 22 #include <stdlib.h> 23 #include <string.h> 24 #include <assert.h> 25 26 #include <arraylist.h> 27 #include <diff_main.h> 28 #include <diff_output.h> 29 30 #include "diff_internal.h" 31 #include "diff_debug.h" 32 33 off_t 34 diff_chunk_get_left_start_pos(const struct diff_chunk *c) 35 { 36 return c->left_start->pos; 37 } 38 39 off_t 40 diff_chunk_get_right_start_pos(const struct diff_chunk *c) 41 { 42 return c->right_start->pos; 43 } 44 45 bool 46 diff_chunk_context_empty(const struct diff_chunk_context *cc) 47 { 48 return diff_range_empty(&cc->chunk); 49 } 50 51 int 52 diff_chunk_get_left_start(const struct diff_chunk *c, 53 const struct diff_result *r, int context_lines) 54 { 55 int left_start = diff_atom_root_idx(r->left, c->left_start); 56 return MAX(0, left_start - context_lines); 57 } 58 59 int 60 diff_chunk_get_left_end(const struct diff_chunk *c, 61 const struct diff_result *r, int context_lines) 62 { 63 int left_start = diff_chunk_get_left_start(c, r, 0); 64 return MIN(r->left->atoms.len, 65 left_start + c->left_count + context_lines); 66 } 67 68 int 69 diff_chunk_get_right_start(const struct diff_chunk *c, 70 const struct diff_result *r, int context_lines) 71 { 72 int right_start = diff_atom_root_idx(r->right, c->right_start); 73 return MAX(0, right_start - context_lines); 74 } 75 76 int 77 diff_chunk_get_right_end(const struct diff_chunk *c, 78 const struct diff_result *r, int context_lines) 79 { 80 int right_start = diff_chunk_get_right_start(c, r, 0); 81 return MIN(r->right->atoms.len, 82 right_start + c->right_count + context_lines); 83 } 84 85 struct diff_chunk * 86 diff_chunk_get(const struct diff_result *r, int chunk_idx) 87 { 88 return &r->chunks.head[chunk_idx]; 89 } 90 91 int 92 diff_chunk_get_left_count(struct diff_chunk *c) 93 { 94 return c->left_count; 95 } 96 97 int 98 diff_chunk_get_right_count(struct diff_chunk *c) 99 { 100 return c->right_count; 101 } 102 103 void 104 diff_chunk_context_get(struct diff_chunk_context *cc, const struct diff_result *r, 105 int chunk_idx, int context_lines) 106 { 107 const struct diff_chunk *c = &r->chunks.head[chunk_idx]; 108 int left_start = diff_chunk_get_left_start(c, r, context_lines); 109 int left_end = diff_chunk_get_left_end(c, r, context_lines); 110 int right_start = diff_chunk_get_right_start(c, r, context_lines); 111 int right_end = diff_chunk_get_right_end(c, r, context_lines); 112 113 *cc = (struct diff_chunk_context){ 114 .chunk = { 115 .start = chunk_idx, 116 .end = chunk_idx + 1, 117 }, 118 .left = { 119 .start = left_start, 120 .end = left_end, 121 }, 122 .right = { 123 .start = right_start, 124 .end = right_end, 125 }, 126 }; 127 } 128 129 bool 130 diff_chunk_contexts_touch(const struct diff_chunk_context *cc, 131 const struct diff_chunk_context *other) 132 { 133 return diff_ranges_touch(&cc->chunk, &other->chunk) 134 || diff_ranges_touch(&cc->left, &other->left) 135 || diff_ranges_touch(&cc->right, &other->right); 136 } 137 138 void 139 diff_chunk_contexts_merge(struct diff_chunk_context *cc, 140 const struct diff_chunk_context *other) 141 { 142 diff_ranges_merge(&cc->chunk, &other->chunk); 143 diff_ranges_merge(&cc->left, &other->left); 144 diff_ranges_merge(&cc->right, &other->right); 145 } 146 147 void 148 diff_chunk_context_load_change(struct diff_chunk_context *cc, 149 int *nchunks_used, 150 struct diff_result *result, 151 int start_chunk_idx, 152 int context_lines) 153 { 154 int i; 155 int seen_minus = 0, seen_plus = 0; 156 157 if (nchunks_used) 158 *nchunks_used = 0; 159 160 for (i = start_chunk_idx; i < result->chunks.len; i++) { 161 struct diff_chunk *chunk = &result->chunks.head[i]; 162 enum diff_chunk_type t = diff_chunk_type(chunk); 163 struct diff_chunk_context next; 164 165 if (t != CHUNK_MINUS && t != CHUNK_PLUS) { 166 if (nchunks_used) 167 (*nchunks_used)++; 168 if (seen_minus || seen_plus) 169 break; 170 else 171 continue; 172 } else if (t == CHUNK_MINUS) 173 seen_minus = 1; 174 else if (t == CHUNK_PLUS) 175 seen_plus = 1; 176 177 if (diff_chunk_context_empty(cc)) { 178 /* Note down the start point, any number of subsequent 179 * chunks may be joined up to this chunk by being 180 * directly adjacent. */ 181 diff_chunk_context_get(cc, result, i, context_lines); 182 if (nchunks_used) 183 (*nchunks_used)++; 184 continue; 185 } 186 187 /* There already is a previous chunk noted down for being 188 * printed. Does it join up with this one? */ 189 diff_chunk_context_get(&next, result, i, context_lines); 190 191 if (diff_chunk_contexts_touch(cc, &next)) { 192 /* This next context touches or overlaps the previous 193 * one, join. */ 194 diff_chunk_contexts_merge(cc, &next); 195 if (nchunks_used) 196 (*nchunks_used)++; 197 continue; 198 } else 199 break; 200 } 201 } 202 203 struct diff_output_unidiff_state { 204 bool header_printed; 205 char prototype[DIFF_FUNCTION_CONTEXT_SIZE]; 206 int last_prototype_idx; 207 }; 208 209 struct diff_output_unidiff_state * 210 diff_output_unidiff_state_alloc(void) 211 { 212 struct diff_output_unidiff_state *state; 213 214 state = calloc(1, sizeof(struct diff_output_unidiff_state)); 215 if (state != NULL) 216 diff_output_unidiff_state_reset(state); 217 return state; 218 } 219 220 void 221 diff_output_unidiff_state_reset(struct diff_output_unidiff_state *state) 222 { 223 state->header_printed = false; 224 memset(state->prototype, 0, sizeof(state->prototype)); 225 state->last_prototype_idx = 0; 226 } 227 228 void 229 diff_output_unidiff_state_free(struct diff_output_unidiff_state *state) 230 { 231 free(state); 232 } 233 234 static int 235 output_unidiff_chunk(struct diff_output_info *outinfo, FILE *dest, 236 struct diff_output_unidiff_state *state, 237 const struct diff_input_info *info, 238 const struct diff_result *result, 239 bool print_header, bool show_function_prototypes, 240 const struct diff_chunk_context *cc) 241 { 242 int rc, left_start, left_len, right_start, right_len; 243 off_t outoff = 0, *offp; 244 uint8_t *typep; 245 246 if (diff_range_empty(&cc->left) && diff_range_empty(&cc->right)) 247 return DIFF_RC_OK; 248 249 if (outinfo && outinfo->line_offsets.len > 0) { 250 unsigned int idx = outinfo->line_offsets.len - 1; 251 outoff = outinfo->line_offsets.head[idx]; 252 } 253 254 if (print_header && !(state->header_printed)) { 255 rc = fprintf(dest, "--- %s\n", 256 diff_output_get_label_left(info)); 257 if (rc < 0) 258 return errno; 259 if (outinfo) { 260 ARRAYLIST_ADD(offp, outinfo->line_offsets); 261 if (offp == NULL) 262 return ENOMEM; 263 outoff += rc; 264 *offp = outoff; 265 ARRAYLIST_ADD(typep, outinfo->line_types); 266 if (typep == NULL) 267 return ENOMEM; 268 *typep = DIFF_LINE_MINUS; 269 } 270 rc = fprintf(dest, "+++ %s\n", 271 diff_output_get_label_right(info)); 272 if (rc < 0) 273 return errno; 274 if (outinfo) { 275 ARRAYLIST_ADD(offp, outinfo->line_offsets); 276 if (offp == NULL) 277 return ENOMEM; 278 outoff += rc; 279 *offp = outoff; 280 ARRAYLIST_ADD(typep, outinfo->line_types); 281 if (typep == NULL) 282 return ENOMEM; 283 *typep = DIFF_LINE_PLUS; 284 } 285 state->header_printed = true; 286 } 287 288 left_len = cc->left.end - cc->left.start; 289 if (result->left->atoms.len == 0) 290 left_start = 0; 291 else if (left_len == 0 && cc->left.start > 0) 292 left_start = cc->left.start; 293 else 294 left_start = cc->left.start + 1; 295 296 right_len = cc->right.end - cc->right.start; 297 if (result->right->atoms.len == 0) 298 right_start = 0; 299 else if (right_len == 0 && cc->right.start > 0) 300 right_start = cc->right.start; 301 else 302 right_start = cc->right.start + 1; 303 304 if (show_function_prototypes) { 305 rc = diff_output_match_function_prototype(state->prototype, 306 sizeof(state->prototype), &state->last_prototype_idx, 307 result, cc); 308 if (rc) 309 return rc; 310 } 311 312 if (left_len == 1 && right_len == 1) { 313 rc = fprintf(dest, "@@ -%d +%d @@%s%s\n", 314 left_start, right_start, 315 state->prototype[0] ? " " : "", 316 state->prototype[0] ? state->prototype : ""); 317 } else if (left_len == 1 && right_len != 1) { 318 rc = fprintf(dest, "@@ -%d +%d,%d @@%s%s\n", 319 left_start, right_start, right_len, 320 state->prototype[0] ? " " : "", 321 state->prototype[0] ? state->prototype : ""); 322 } else if (left_len != 1 && right_len == 1) { 323 rc = fprintf(dest, "@@ -%d,%d +%d @@%s%s\n", 324 left_start, left_len, right_start, 325 state->prototype[0] ? " " : "", 326 state->prototype[0] ? state->prototype : ""); 327 } else { 328 rc = fprintf(dest, "@@ -%d,%d +%d,%d @@%s%s\n", 329 left_start, left_len, right_start, right_len, 330 state->prototype[0] ? " " : "", 331 state->prototype[0] ? state->prototype : ""); 332 } 333 if (rc < 0) 334 return errno; 335 if (outinfo) { 336 ARRAYLIST_ADD(offp, outinfo->line_offsets); 337 if (offp == NULL) 338 return ENOMEM; 339 outoff += rc; 340 *offp = outoff; 341 ARRAYLIST_ADD(typep, outinfo->line_types); 342 if (typep == NULL) 343 return ENOMEM; 344 *typep = DIFF_LINE_HUNK; 345 } 346 347 /* Got the absolute line numbers where to start printing, and the index 348 * of the interesting (non-context) chunk. 349 * To print context lines above the interesting chunk, nipping on the 350 * previous chunk index may be necessary. 351 * It is guaranteed to be only context lines where left == right, so it 352 * suffices to look on the left. */ 353 const struct diff_chunk *first_chunk; 354 int chunk_start_line; 355 first_chunk = &result->chunks.head[cc->chunk.start]; 356 chunk_start_line = diff_atom_root_idx(result->left, 357 first_chunk->left_start); 358 if (cc->left.start < chunk_start_line) { 359 rc = diff_output_lines(outinfo, dest, " ", 360 &result->left->atoms.head[cc->left.start], 361 chunk_start_line - cc->left.start); 362 if (rc) 363 return rc; 364 } 365 366 /* Now write out all the joined chunks and contexts between them */ 367 int c_idx; 368 for (c_idx = cc->chunk.start; c_idx < cc->chunk.end; c_idx++) { 369 const struct diff_chunk *c = &result->chunks.head[c_idx]; 370 371 if (c->left_count && c->right_count) 372 rc = diff_output_lines(outinfo, dest, 373 c->solved ? " " : "?", 374 c->left_start, c->left_count); 375 else if (c->left_count && !c->right_count) 376 rc = diff_output_lines(outinfo, dest, 377 c->solved ? "-" : "?", 378 c->left_start, c->left_count); 379 else if (c->right_count && !c->left_count) 380 rc = diff_output_lines(outinfo, dest, 381 c->solved ? "+" : "?", 382 c->right_start, c->right_count); 383 if (rc) 384 return rc; 385 386 if (cc->chunk.end == result->chunks.len) { 387 rc = diff_output_trailing_newline_msg(outinfo, dest, c); 388 if (rc != DIFF_RC_OK) 389 return rc; 390 } 391 } 392 393 /* Trailing context? */ 394 const struct diff_chunk *last_chunk; 395 int chunk_end_line; 396 last_chunk = &result->chunks.head[cc->chunk.end - 1]; 397 chunk_end_line = diff_atom_root_idx(result->left, 398 last_chunk->left_start 399 + last_chunk->left_count); 400 if (cc->left.end > chunk_end_line) { 401 rc = diff_output_lines(outinfo, dest, " ", 402 &result->left->atoms.head[chunk_end_line], 403 cc->left.end - chunk_end_line); 404 if (rc) 405 return rc; 406 407 if (cc->left.end == result->left->atoms.len) { 408 rc = diff_output_trailing_newline_msg(outinfo, dest, 409 &result->chunks.head[result->chunks.len - 1]); 410 if (rc != DIFF_RC_OK) 411 return rc; 412 } 413 } 414 415 return DIFF_RC_OK; 416 } 417 418 int 419 diff_output_unidiff_chunk(struct diff_output_info **output_info, FILE *dest, 420 struct diff_output_unidiff_state *state, 421 const struct diff_input_info *info, 422 const struct diff_result *result, 423 const struct diff_chunk_context *cc) 424 { 425 struct diff_output_info *outinfo = NULL; 426 int flags = (result->left->root->diff_flags | 427 result->right->root->diff_flags); 428 bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES); 429 430 if (output_info) { 431 *output_info = diff_output_info_alloc(); 432 if (*output_info == NULL) 433 return ENOMEM; 434 outinfo = *output_info; 435 } 436 437 return output_unidiff_chunk(outinfo, dest, state, info, 438 result, false, show_function_prototypes, cc); 439 } 440 441 int 442 diff_output_unidiff(struct diff_output_info **output_info, 443 FILE *dest, const struct diff_input_info *info, 444 const struct diff_result *result, 445 unsigned int context_lines) 446 { 447 struct diff_output_unidiff_state *state; 448 struct diff_chunk_context cc = {}; 449 struct diff_output_info *outinfo = NULL; 450 int atomizer_flags = (result->left->atomizer_flags| 451 result->right->atomizer_flags); 452 int flags = (result->left->root->diff_flags | 453 result->right->root->diff_flags); 454 bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES); 455 bool force_text = (flags & DIFF_FLAG_FORCE_TEXT_DATA); 456 bool have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA); 457 off_t outoff = 0, *offp; 458 uint8_t *typep; 459 int rc, i; 460 461 if (!result) 462 return EINVAL; 463 if (result->rc != DIFF_RC_OK) 464 return result->rc; 465 466 if (output_info) { 467 *output_info = diff_output_info_alloc(); 468 if (*output_info == NULL) 469 return ENOMEM; 470 outinfo = *output_info; 471 } 472 473 if (have_binary && !force_text) { 474 for (i = 0; i < result->chunks.len; i++) { 475 struct diff_chunk *c = &result->chunks.head[i]; 476 enum diff_chunk_type t = diff_chunk_type(c); 477 478 if (t != CHUNK_MINUS && t != CHUNK_PLUS) 479 continue; 480 481 if (outinfo && outinfo->line_offsets.len > 0) { 482 unsigned int idx = 483 outinfo->line_offsets.len - 1; 484 outoff = outinfo->line_offsets.head[idx]; 485 } 486 487 rc = fprintf(dest, "Binary files %s and %s differ\n", 488 diff_output_get_label_left(info), 489 diff_output_get_label_right(info)); 490 if (outinfo) { 491 ARRAYLIST_ADD(offp, outinfo->line_offsets); 492 if (offp == NULL) 493 return ENOMEM; 494 outoff += rc; 495 *offp = outoff; 496 ARRAYLIST_ADD(typep, outinfo->line_types); 497 if (typep == NULL) 498 return ENOMEM; 499 *typep = DIFF_LINE_NONE; 500 } 501 break; 502 } 503 504 return DIFF_RC_OK; 505 } 506 507 state = diff_output_unidiff_state_alloc(); 508 if (state == NULL) { 509 if (output_info) { 510 diff_output_info_free(*output_info); 511 *output_info = NULL; 512 } 513 return ENOMEM; 514 } 515 516 #if DEBUG 517 unsigned int check_left_pos, check_right_pos; 518 check_left_pos = 0; 519 check_right_pos = 0; 520 for (i = 0; i < result->chunks.len; i++) { 521 struct diff_chunk *c = &result->chunks.head[i]; 522 enum diff_chunk_type t = diff_chunk_type(c); 523 524 debug("[%d] %s lines L%d R%d @L %d @R %d\n", 525 i, (t == CHUNK_MINUS ? "minus" : 526 (t == CHUNK_PLUS ? "plus" : 527 (t == CHUNK_SAME ? "same" : "?"))), 528 c->left_count, 529 c->right_count, 530 c->left_start ? diff_atom_root_idx(result->left, c->left_start) : -1, 531 c->right_start ? diff_atom_root_idx(result->right, c->right_start) : -1); 532 assert(check_left_pos == diff_atom_root_idx(result->left, c->left_start)); 533 assert(check_right_pos == diff_atom_root_idx(result->right, c->right_start)); 534 check_left_pos += c->left_count; 535 check_right_pos += c->right_count; 536 537 } 538 assert(check_left_pos == result->left->atoms.len); 539 assert(check_right_pos == result->right->atoms.len); 540 #endif 541 542 for (i = 0; i < result->chunks.len; i++) { 543 struct diff_chunk *c = &result->chunks.head[i]; 544 enum diff_chunk_type t = diff_chunk_type(c); 545 struct diff_chunk_context next; 546 547 if (t != CHUNK_MINUS && t != CHUNK_PLUS) 548 continue; 549 550 if (diff_chunk_context_empty(&cc)) { 551 /* These are the first lines being printed. 552 * Note down the start point, any number of subsequent 553 * chunks may be joined up to this unidiff chunk by 554 * context lines or by being directly adjacent. */ 555 diff_chunk_context_get(&cc, result, i, context_lines); 556 debug("new chunk to be printed:" 557 " chunk %d-%d left %d-%d right %d-%d\n", 558 cc.chunk.start, cc.chunk.end, 559 cc.left.start, cc.left.end, 560 cc.right.start, cc.right.end); 561 continue; 562 } 563 564 /* There already is a previous chunk noted down for being 565 * printed. Does it join up with this one? */ 566 diff_chunk_context_get(&next, result, i, context_lines); 567 debug("new chunk to be printed:" 568 " chunk %d-%d left %d-%d right %d-%d\n", 569 next.chunk.start, next.chunk.end, 570 next.left.start, next.left.end, 571 next.right.start, next.right.end); 572 573 if (diff_chunk_contexts_touch(&cc, &next)) { 574 /* This next context touches or overlaps the previous 575 * one, join. */ 576 diff_chunk_contexts_merge(&cc, &next); 577 debug("new chunk to be printed touches previous chunk," 578 " now: left %d-%d right %d-%d\n", 579 cc.left.start, cc.left.end, 580 cc.right.start, cc.right.end); 581 continue; 582 } 583 584 /* No touching, so the previous context is complete with a gap 585 * between it and this next one. Print the previous one and 586 * start fresh here. */ 587 debug("new chunk to be printed does not touch previous chunk;" 588 " print left %d-%d right %d-%d\n", 589 cc.left.start, cc.left.end, cc.right.start, cc.right.end); 590 output_unidiff_chunk(outinfo, dest, state, info, result, 591 true, show_function_prototypes, &cc); 592 cc = next; 593 debug("new unprinted chunk is left %d-%d right %d-%d\n", 594 cc.left.start, cc.left.end, cc.right.start, cc.right.end); 595 } 596 597 if (!diff_chunk_context_empty(&cc)) 598 output_unidiff_chunk(outinfo, dest, state, info, result, 599 true, show_function_prototypes, &cc); 600 diff_output_unidiff_state_free(state); 601 return DIFF_RC_OK; 602 } 603