xref: /freebsd/contrib/libdiff/lib/diff_internal.h (revision 5fbe8912d6a2d1178468d1994c301d4c7cd4f975)
159c8e88eSDag-Erling Smørgrav /* Generic infrastructure to implement various diff algorithms. */
259c8e88eSDag-Erling Smørgrav /*
359c8e88eSDag-Erling Smørgrav  * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
459c8e88eSDag-Erling Smørgrav  *
559c8e88eSDag-Erling Smørgrav  * Permission to use, copy, modify, and distribute this software for any
659c8e88eSDag-Erling Smørgrav  * purpose with or without fee is hereby granted, provided that the above
759c8e88eSDag-Erling Smørgrav  * copyright notice and this permission notice appear in all copies.
859c8e88eSDag-Erling Smørgrav  *
959c8e88eSDag-Erling Smørgrav  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1059c8e88eSDag-Erling Smørgrav  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1159c8e88eSDag-Erling Smørgrav  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1259c8e88eSDag-Erling Smørgrav  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1359c8e88eSDag-Erling Smørgrav  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1459c8e88eSDag-Erling Smørgrav  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1559c8e88eSDag-Erling Smørgrav  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1659c8e88eSDag-Erling Smørgrav  */
1759c8e88eSDag-Erling Smørgrav 
1859c8e88eSDag-Erling Smørgrav #ifndef MAX
1959c8e88eSDag-Erling Smørgrav #define MAX(A,B) ((A)>(B)?(A):(B))
2059c8e88eSDag-Erling Smørgrav #endif
2159c8e88eSDag-Erling Smørgrav #ifndef MIN
2259c8e88eSDag-Erling Smørgrav #define MIN(A,B) ((A)<(B)?(A):(B))
2359c8e88eSDag-Erling Smørgrav #endif
2459c8e88eSDag-Erling Smørgrav 
2559c8e88eSDag-Erling Smørgrav static inline bool
diff_range_empty(const struct diff_range * r)2659c8e88eSDag-Erling Smørgrav diff_range_empty(const struct diff_range *r)
2759c8e88eSDag-Erling Smørgrav {
2859c8e88eSDag-Erling Smørgrav 	return r->start == r->end;
2959c8e88eSDag-Erling Smørgrav }
3059c8e88eSDag-Erling Smørgrav 
3159c8e88eSDag-Erling Smørgrav static inline bool
diff_ranges_touch(const struct diff_range * a,const struct diff_range * b)3259c8e88eSDag-Erling Smørgrav diff_ranges_touch(const struct diff_range *a, const struct diff_range *b)
3359c8e88eSDag-Erling Smørgrav {
3459c8e88eSDag-Erling Smørgrav 	return (a->end >= b->start) && (a->start <= b->end);
3559c8e88eSDag-Erling Smørgrav }
3659c8e88eSDag-Erling Smørgrav 
3759c8e88eSDag-Erling Smørgrav static inline void
diff_ranges_merge(struct diff_range * a,const struct diff_range * b)3859c8e88eSDag-Erling Smørgrav diff_ranges_merge(struct diff_range *a, const struct diff_range *b)
3959c8e88eSDag-Erling Smørgrav {
4059c8e88eSDag-Erling Smørgrav 	*a = (struct diff_range){
4159c8e88eSDag-Erling Smørgrav 		.start = MIN(a->start, b->start),
4259c8e88eSDag-Erling Smørgrav 		.end = MAX(a->end, b->end),
4359c8e88eSDag-Erling Smørgrav 	};
4459c8e88eSDag-Erling Smørgrav }
4559c8e88eSDag-Erling Smørgrav 
4659c8e88eSDag-Erling Smørgrav static inline int
diff_range_len(const struct diff_range * r)4759c8e88eSDag-Erling Smørgrav diff_range_len(const struct diff_range *r)
4859c8e88eSDag-Erling Smørgrav {
4959c8e88eSDag-Erling Smørgrav 	if (!r)
5059c8e88eSDag-Erling Smørgrav 		return 0;
5159c8e88eSDag-Erling Smørgrav 	return r->end - r->start;
5259c8e88eSDag-Erling Smørgrav }
5359c8e88eSDag-Erling Smørgrav 
5459c8e88eSDag-Erling Smørgrav /* Indicate whether two given diff atoms match. */
5559c8e88eSDag-Erling Smørgrav int
5659c8e88eSDag-Erling Smørgrav diff_atom_same(bool *same,
5759c8e88eSDag-Erling Smørgrav 	       const struct diff_atom *left,
5859c8e88eSDag-Erling Smørgrav 	       const struct diff_atom *right);
5959c8e88eSDag-Erling Smørgrav 
6059c8e88eSDag-Erling Smørgrav /* A diff chunk represents a set of atoms on the left and/or a set of atoms on
6159c8e88eSDag-Erling Smørgrav  * the right.
6259c8e88eSDag-Erling Smørgrav  *
6359c8e88eSDag-Erling Smørgrav  * If solved == false:
6459c8e88eSDag-Erling Smørgrav  * The diff algorithm has divided the source file, and this is a chunk that the
6559c8e88eSDag-Erling Smørgrav  * inner_algo should run on next.
6659c8e88eSDag-Erling Smørgrav  * The lines on the left should be diffed against the lines on the right.
6759c8e88eSDag-Erling Smørgrav  * (If there are no left lines or no right lines, it implies solved == true,
6859c8e88eSDag-Erling Smørgrav  * because there is nothing to diff.)
6959c8e88eSDag-Erling Smørgrav  *
7059c8e88eSDag-Erling Smørgrav  * If solved == true:
7159c8e88eSDag-Erling Smørgrav  * If there are only left atoms, it is a chunk removing atoms from the left ("a
7259c8e88eSDag-Erling Smørgrav  * minus chunk").
7359c8e88eSDag-Erling Smørgrav  * If there are only right atoms, it is a chunk adding atoms from the right ("a
7459c8e88eSDag-Erling Smørgrav  * plus chunk").
7559c8e88eSDag-Erling Smørgrav  * If there are both left and right lines, it is a chunk of equal content on
7659c8e88eSDag-Erling Smørgrav  * both sides, and left_count == right_count:
7759c8e88eSDag-Erling Smørgrav  *
7859c8e88eSDag-Erling Smørgrav  * - foo  }
7959c8e88eSDag-Erling Smørgrav  * - bar  }-- diff_chunk{ left_start = &left.atoms.head[0], left_count = 3,
8059c8e88eSDag-Erling Smørgrav  * - baz  }               right_start = NULL, right_count = 0 }
8159c8e88eSDag-Erling Smørgrav  *   moo    }
8259c8e88eSDag-Erling Smørgrav  *   goo    }-- diff_chunk{ left_start = &left.atoms.head[3], left_count = 3,
8359c8e88eSDag-Erling Smørgrav  *   zoo    }              right_start = &right.atoms.head[0], right_count = 3 }
8459c8e88eSDag-Erling Smørgrav  *  +loo      }
8559c8e88eSDag-Erling Smørgrav  *  +roo      }-- diff_chunk{ left_start = NULL, left_count = 0,
8659c8e88eSDag-Erling Smørgrav  *  +too      }            right_start = &right.atoms.head[3], right_count = 3 }
8759c8e88eSDag-Erling Smørgrav  *
8859c8e88eSDag-Erling Smørgrav  */
8959c8e88eSDag-Erling Smørgrav struct diff_chunk {
9059c8e88eSDag-Erling Smørgrav 	bool solved;
9159c8e88eSDag-Erling Smørgrav 	struct diff_atom *left_start;
9259c8e88eSDag-Erling Smørgrav 	unsigned int left_count;
9359c8e88eSDag-Erling Smørgrav 	struct diff_atom *right_start;
9459c8e88eSDag-Erling Smørgrav 	unsigned int right_count;
9559c8e88eSDag-Erling Smørgrav };
9659c8e88eSDag-Erling Smørgrav 
9759c8e88eSDag-Erling Smørgrav #define DIFF_RESULT_ALLOC_BLOCKSIZE 128
9859c8e88eSDag-Erling Smørgrav 
9959c8e88eSDag-Erling Smørgrav struct diff_chunk_context;
10059c8e88eSDag-Erling Smørgrav 
10159c8e88eSDag-Erling Smørgrav bool
10259c8e88eSDag-Erling Smørgrav diff_chunk_context_empty(const struct diff_chunk_context *cc);
10359c8e88eSDag-Erling Smørgrav 
10459c8e88eSDag-Erling Smørgrav bool
10559c8e88eSDag-Erling Smørgrav diff_chunk_contexts_touch(const struct diff_chunk_context *cc,
10659c8e88eSDag-Erling Smørgrav 			  const struct diff_chunk_context *other);
10759c8e88eSDag-Erling Smørgrav 
10859c8e88eSDag-Erling Smørgrav void
10959c8e88eSDag-Erling Smørgrav diff_chunk_contexts_merge(struct diff_chunk_context *cc,
11059c8e88eSDag-Erling Smørgrav 			  const struct diff_chunk_context *other);
11159c8e88eSDag-Erling Smørgrav 
11259c8e88eSDag-Erling Smørgrav struct diff_state {
11359c8e88eSDag-Erling Smørgrav 	/* The final result passed to the original diff caller. */
11459c8e88eSDag-Erling Smørgrav 	struct diff_result *result;
11559c8e88eSDag-Erling Smørgrav 
11659c8e88eSDag-Erling Smørgrav 	/* The root diff_data is in result->left,right, these are (possibly)
11759c8e88eSDag-Erling Smørgrav 	 * subsections of the root data. */
11859c8e88eSDag-Erling Smørgrav 	struct diff_data left;
11959c8e88eSDag-Erling Smørgrav 	struct diff_data right;
12059c8e88eSDag-Erling Smørgrav 
12159c8e88eSDag-Erling Smørgrav 	unsigned int recursion_depth_left;
12259c8e88eSDag-Erling Smørgrav 
12359c8e88eSDag-Erling Smørgrav 	/* Remaining chunks from one diff algorithm pass, if any solved == false
12459c8e88eSDag-Erling Smørgrav 	 * chunks came up. */
12559c8e88eSDag-Erling Smørgrav 	diff_chunk_arraylist_t temp_result;
12659c8e88eSDag-Erling Smørgrav 
12759c8e88eSDag-Erling Smørgrav  	/* State buffer used by Myers algorithm. */
12859c8e88eSDag-Erling Smørgrav 	int *kd_buf;
12959c8e88eSDag-Erling Smørgrav 	size_t kd_buf_size; /* in units of sizeof(int), not bytes */
13059c8e88eSDag-Erling Smørgrav };
13159c8e88eSDag-Erling Smørgrav 
13259c8e88eSDag-Erling Smørgrav struct diff_chunk *diff_state_add_chunk(struct diff_state *state, bool solved,
13359c8e88eSDag-Erling Smørgrav 					struct diff_atom *left_start,
13459c8e88eSDag-Erling Smørgrav 					unsigned int left_count,
13559c8e88eSDag-Erling Smørgrav 					struct diff_atom *right_start,
13659c8e88eSDag-Erling Smørgrav 					unsigned int right_count);
13759c8e88eSDag-Erling Smørgrav 
13859c8e88eSDag-Erling Smørgrav struct diff_output_info;
13959c8e88eSDag-Erling Smørgrav 
14059c8e88eSDag-Erling Smørgrav int diff_output_lines(struct diff_output_info *output_info, FILE *dest,
14159c8e88eSDag-Erling Smørgrav 		       const char *prefix, struct diff_atom *start_atom,
14259c8e88eSDag-Erling Smørgrav 		       unsigned int count);
14359c8e88eSDag-Erling Smørgrav 
14459c8e88eSDag-Erling Smørgrav int diff_output_trailing_newline_msg(struct diff_output_info *outinfo,
14559c8e88eSDag-Erling Smørgrav 				     FILE *dest,
14659c8e88eSDag-Erling Smørgrav 				     const struct diff_chunk *c);
14759c8e88eSDag-Erling Smørgrav #define DIFF_FUNCTION_CONTEXT_SIZE	55
14859c8e88eSDag-Erling Smørgrav int diff_output_match_function_prototype(char *prototype, size_t prototype_size,
14959c8e88eSDag-Erling Smørgrav 					 int *last_prototype_idx,
15059c8e88eSDag-Erling Smørgrav 					 const struct diff_result *result,
151*5fbe8912SDag-Erling Smørgrav 					 int chunk_start_line);
15259c8e88eSDag-Erling Smørgrav 
15359c8e88eSDag-Erling Smørgrav struct diff_output_info *diff_output_info_alloc(void);
15459c8e88eSDag-Erling Smørgrav 
15559c8e88eSDag-Erling Smørgrav void
15659c8e88eSDag-Erling Smørgrav diff_data_init_subsection(struct diff_data *d, struct diff_data *parent,
15759c8e88eSDag-Erling Smørgrav 			  struct diff_atom *from_atom, unsigned int atoms_count);
158