xref: /freebsd/contrib/xz/src/liblzma/common/index_encoder.c (revision 128836d304d93f2d00eb14069c27089ab46c38d4)
13b35e7eeSXin LI // SPDX-License-Identifier: 0BSD
23b35e7eeSXin LI 
381ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
481ad8388SMartin Matuska //
581ad8388SMartin Matuska /// \file       index_encoder.c
681ad8388SMartin Matuska /// \brief      Encodes the Index field
781ad8388SMartin Matuska //
881ad8388SMartin Matuska //  Author:     Lasse Collin
981ad8388SMartin Matuska //
1081ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
1181ad8388SMartin Matuska 
1281ad8388SMartin Matuska #include "index_encoder.h"
1381ad8388SMartin Matuska #include "index.h"
1481ad8388SMartin Matuska #include "check.h"
1581ad8388SMartin Matuska 
1681ad8388SMartin Matuska 
171456f0f9SXin LI typedef struct {
1881ad8388SMartin Matuska 	enum {
1981ad8388SMartin Matuska 		SEQ_INDICATOR,
2081ad8388SMartin Matuska 		SEQ_COUNT,
2181ad8388SMartin Matuska 		SEQ_UNPADDED,
2281ad8388SMartin Matuska 		SEQ_UNCOMPRESSED,
2381ad8388SMartin Matuska 		SEQ_NEXT,
2481ad8388SMartin Matuska 		SEQ_PADDING,
2581ad8388SMartin Matuska 		SEQ_CRC32,
2681ad8388SMartin Matuska 	} sequence;
2781ad8388SMartin Matuska 
2881ad8388SMartin Matuska 	/// Index being encoded
2981ad8388SMartin Matuska 	const lzma_index *index;
3081ad8388SMartin Matuska 
3181ad8388SMartin Matuska 	/// Iterator for the Index being encoded
3281ad8388SMartin Matuska 	lzma_index_iter iter;
3381ad8388SMartin Matuska 
3481ad8388SMartin Matuska 	/// Position in integers
3581ad8388SMartin Matuska 	size_t pos;
3681ad8388SMartin Matuska 
3781ad8388SMartin Matuska 	/// CRC32 of the List of Records field
3881ad8388SMartin Matuska 	uint32_t crc32;
391456f0f9SXin LI } lzma_index_coder;
4081ad8388SMartin Matuska 
4181ad8388SMartin Matuska 
4281ad8388SMartin Matuska static lzma_ret
index_encode(void * coder_ptr,const lzma_allocator * allocator lzma_attribute ((__unused__)),const uint8_t * restrict in lzma_attribute ((__unused__)),size_t * restrict in_pos lzma_attribute ((__unused__)),size_t in_size lzma_attribute ((__unused__)),uint8_t * restrict out,size_t * restrict out_pos,size_t out_size,lzma_action action lzma_attribute ((__unused__)))431456f0f9SXin LI index_encode(void *coder_ptr,
4453200025SRui Paulo 		const lzma_allocator *allocator lzma_attribute((__unused__)),
45e24134bcSMartin Matuska 		const uint8_t *restrict in lzma_attribute((__unused__)),
46e24134bcSMartin Matuska 		size_t *restrict in_pos lzma_attribute((__unused__)),
47e24134bcSMartin Matuska 		size_t in_size lzma_attribute((__unused__)),
4881ad8388SMartin Matuska 		uint8_t *restrict out, size_t *restrict out_pos,
49e24134bcSMartin Matuska 		size_t out_size,
50e24134bcSMartin Matuska 		lzma_action action lzma_attribute((__unused__)))
5181ad8388SMartin Matuska {
521456f0f9SXin LI 	lzma_index_coder *coder = coder_ptr;
531456f0f9SXin LI 
5481ad8388SMartin Matuska 	// Position where to start calculating CRC32. The idea is that we
5581ad8388SMartin Matuska 	// need to call lzma_crc32() only once per call to index_encode().
5681ad8388SMartin Matuska 	const size_t out_start = *out_pos;
5781ad8388SMartin Matuska 
5881ad8388SMartin Matuska 	// Return value to use if we return at the end of this function.
5981ad8388SMartin Matuska 	// We use "goto out" to jump out of the while-switch construct
6081ad8388SMartin Matuska 	// instead of returning directly, because that way we don't need
6181ad8388SMartin Matuska 	// to copypaste the lzma_crc32() call to many places.
6281ad8388SMartin Matuska 	lzma_ret ret = LZMA_OK;
6381ad8388SMartin Matuska 
6481ad8388SMartin Matuska 	while (*out_pos < out_size)
6581ad8388SMartin Matuska 	switch (coder->sequence) {
6681ad8388SMartin Matuska 	case SEQ_INDICATOR:
67047153b4SXin LI 		out[*out_pos] = INDEX_INDICATOR;
6881ad8388SMartin Matuska 		++*out_pos;
6981ad8388SMartin Matuska 		coder->sequence = SEQ_COUNT;
7081ad8388SMartin Matuska 		break;
7181ad8388SMartin Matuska 
7281ad8388SMartin Matuska 	case SEQ_COUNT: {
7381ad8388SMartin Matuska 		const lzma_vli count = lzma_index_block_count(coder->index);
7481ad8388SMartin Matuska 		ret = lzma_vli_encode(count, &coder->pos,
7581ad8388SMartin Matuska 				out, out_pos, out_size);
7681ad8388SMartin Matuska 		if (ret != LZMA_STREAM_END)
7781ad8388SMartin Matuska 			goto out;
7881ad8388SMartin Matuska 
7981ad8388SMartin Matuska 		ret = LZMA_OK;
8081ad8388SMartin Matuska 		coder->pos = 0;
8181ad8388SMartin Matuska 		coder->sequence = SEQ_NEXT;
8281ad8388SMartin Matuska 		break;
8381ad8388SMartin Matuska 	}
8481ad8388SMartin Matuska 
8581ad8388SMartin Matuska 	case SEQ_NEXT:
8681ad8388SMartin Matuska 		if (lzma_index_iter_next(
8781ad8388SMartin Matuska 				&coder->iter, LZMA_INDEX_ITER_BLOCK)) {
8881ad8388SMartin Matuska 			// Get the size of the Index Padding field.
8981ad8388SMartin Matuska 			coder->pos = lzma_index_padding_size(coder->index);
9081ad8388SMartin Matuska 			assert(coder->pos <= 3);
9181ad8388SMartin Matuska 			coder->sequence = SEQ_PADDING;
9281ad8388SMartin Matuska 			break;
9381ad8388SMartin Matuska 		}
9481ad8388SMartin Matuska 
9581ad8388SMartin Matuska 		coder->sequence = SEQ_UNPADDED;
96*128836d3SXin LI 		FALLTHROUGH;
9781ad8388SMartin Matuska 
9881ad8388SMartin Matuska 	case SEQ_UNPADDED:
9981ad8388SMartin Matuska 	case SEQ_UNCOMPRESSED: {
10081ad8388SMartin Matuska 		const lzma_vli size = coder->sequence == SEQ_UNPADDED
10181ad8388SMartin Matuska 				? coder->iter.block.unpadded_size
10281ad8388SMartin Matuska 				: coder->iter.block.uncompressed_size;
10381ad8388SMartin Matuska 
10481ad8388SMartin Matuska 		ret = lzma_vli_encode(size, &coder->pos,
10581ad8388SMartin Matuska 				out, out_pos, out_size);
10681ad8388SMartin Matuska 		if (ret != LZMA_STREAM_END)
10781ad8388SMartin Matuska 			goto out;
10881ad8388SMartin Matuska 
10981ad8388SMartin Matuska 		ret = LZMA_OK;
11081ad8388SMartin Matuska 		coder->pos = 0;
11181ad8388SMartin Matuska 
11281ad8388SMartin Matuska 		// Advance to SEQ_UNCOMPRESSED or SEQ_NEXT.
11381ad8388SMartin Matuska 		++coder->sequence;
11481ad8388SMartin Matuska 		break;
11581ad8388SMartin Matuska 	}
11681ad8388SMartin Matuska 
11781ad8388SMartin Matuska 	case SEQ_PADDING:
11881ad8388SMartin Matuska 		if (coder->pos > 0) {
11981ad8388SMartin Matuska 			--coder->pos;
12081ad8388SMartin Matuska 			out[(*out_pos)++] = 0x00;
12181ad8388SMartin Matuska 			break;
12281ad8388SMartin Matuska 		}
12381ad8388SMartin Matuska 
12481ad8388SMartin Matuska 		// Finish the CRC32 calculation.
12581ad8388SMartin Matuska 		coder->crc32 = lzma_crc32(out + out_start,
12681ad8388SMartin Matuska 				*out_pos - out_start, coder->crc32);
12781ad8388SMartin Matuska 
12881ad8388SMartin Matuska 		coder->sequence = SEQ_CRC32;
129*128836d3SXin LI 		FALLTHROUGH;
13081ad8388SMartin Matuska 
13181ad8388SMartin Matuska 	case SEQ_CRC32:
13281ad8388SMartin Matuska 		// We don't use the main loop, because we don't want
13381ad8388SMartin Matuska 		// coder->crc32 to be touched anymore.
13481ad8388SMartin Matuska 		do {
13581ad8388SMartin Matuska 			if (*out_pos == out_size)
13681ad8388SMartin Matuska 				return LZMA_OK;
13781ad8388SMartin Matuska 
13881ad8388SMartin Matuska 			out[*out_pos] = (coder->crc32 >> (coder->pos * 8))
13981ad8388SMartin Matuska 					& 0xFF;
14081ad8388SMartin Matuska 			++*out_pos;
14181ad8388SMartin Matuska 
14281ad8388SMartin Matuska 		} while (++coder->pos < 4);
14381ad8388SMartin Matuska 
14481ad8388SMartin Matuska 		return LZMA_STREAM_END;
14581ad8388SMartin Matuska 
14681ad8388SMartin Matuska 	default:
14781ad8388SMartin Matuska 		assert(0);
14881ad8388SMartin Matuska 		return LZMA_PROG_ERROR;
14981ad8388SMartin Matuska 	}
15081ad8388SMartin Matuska 
15181ad8388SMartin Matuska out:
15281ad8388SMartin Matuska 	// Update the CRC32.
153c917796cSXin LI 	//
154c917796cSXin LI 	// Avoid null pointer + 0 (undefined behavior) in "out + out_start".
155c917796cSXin LI 	// In such a case we had no input and thus out_used == 0.
156c917796cSXin LI 	{
157c917796cSXin LI 		const size_t out_used = *out_pos - out_start;
158c917796cSXin LI 		if (out_used > 0)
15981ad8388SMartin Matuska 			coder->crc32 = lzma_crc32(out + out_start,
160c917796cSXin LI 					out_used, coder->crc32);
161c917796cSXin LI 	}
16281ad8388SMartin Matuska 
16381ad8388SMartin Matuska 	return ret;
16481ad8388SMartin Matuska }
16581ad8388SMartin Matuska 
16681ad8388SMartin Matuska 
16781ad8388SMartin Matuska static void
index_encoder_end(void * coder,const lzma_allocator * allocator)1681456f0f9SXin LI index_encoder_end(void *coder, const lzma_allocator *allocator)
16981ad8388SMartin Matuska {
17081ad8388SMartin Matuska 	lzma_free(coder, allocator);
17181ad8388SMartin Matuska 	return;
17281ad8388SMartin Matuska }
17381ad8388SMartin Matuska 
17481ad8388SMartin Matuska 
17581ad8388SMartin Matuska static void
index_encoder_reset(lzma_index_coder * coder,const lzma_index * i)1761456f0f9SXin LI index_encoder_reset(lzma_index_coder *coder, const lzma_index *i)
17781ad8388SMartin Matuska {
17881ad8388SMartin Matuska 	lzma_index_iter_init(&coder->iter, i);
17981ad8388SMartin Matuska 
18081ad8388SMartin Matuska 	coder->sequence = SEQ_INDICATOR;
18181ad8388SMartin Matuska 	coder->index = i;
18281ad8388SMartin Matuska 	coder->pos = 0;
18381ad8388SMartin Matuska 	coder->crc32 = 0;
18481ad8388SMartin Matuska 
18581ad8388SMartin Matuska 	return;
18681ad8388SMartin Matuska }
18781ad8388SMartin Matuska 
18881ad8388SMartin Matuska 
18981ad8388SMartin Matuska extern lzma_ret
lzma_index_encoder_init(lzma_next_coder * next,const lzma_allocator * allocator,const lzma_index * i)19053200025SRui Paulo lzma_index_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
19181ad8388SMartin Matuska 		const lzma_index *i)
19281ad8388SMartin Matuska {
19381ad8388SMartin Matuska 	lzma_next_coder_init(&lzma_index_encoder_init, next, allocator);
19481ad8388SMartin Matuska 
19581ad8388SMartin Matuska 	if (i == NULL)
19681ad8388SMartin Matuska 		return LZMA_PROG_ERROR;
19781ad8388SMartin Matuska 
19881ad8388SMartin Matuska 	if (next->coder == NULL) {
1991456f0f9SXin LI 		next->coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
20081ad8388SMartin Matuska 		if (next->coder == NULL)
20181ad8388SMartin Matuska 			return LZMA_MEM_ERROR;
20281ad8388SMartin Matuska 
20381ad8388SMartin Matuska 		next->code = &index_encode;
20481ad8388SMartin Matuska 		next->end = &index_encoder_end;
20581ad8388SMartin Matuska 	}
20681ad8388SMartin Matuska 
20781ad8388SMartin Matuska 	index_encoder_reset(next->coder, i);
20881ad8388SMartin Matuska 
20981ad8388SMartin Matuska 	return LZMA_OK;
21081ad8388SMartin Matuska }
21181ad8388SMartin Matuska 
21281ad8388SMartin Matuska 
21381ad8388SMartin Matuska extern LZMA_API(lzma_ret)
lzma_index_encoder(lzma_stream * strm,const lzma_index * i)21481ad8388SMartin Matuska lzma_index_encoder(lzma_stream *strm, const lzma_index *i)
21581ad8388SMartin Matuska {
21681ad8388SMartin Matuska 	lzma_next_strm_init(lzma_index_encoder_init, strm, i);
21781ad8388SMartin Matuska 
21881ad8388SMartin Matuska 	strm->internal->supported_actions[LZMA_RUN] = true;
219542aef48SMartin Matuska 	strm->internal->supported_actions[LZMA_FINISH] = true;
22081ad8388SMartin Matuska 
22181ad8388SMartin Matuska 	return LZMA_OK;
22281ad8388SMartin Matuska }
22381ad8388SMartin Matuska 
22481ad8388SMartin Matuska 
22581ad8388SMartin Matuska extern LZMA_API(lzma_ret)
lzma_index_buffer_encode(const lzma_index * i,uint8_t * out,size_t * out_pos,size_t out_size)22681ad8388SMartin Matuska lzma_index_buffer_encode(const lzma_index *i,
22781ad8388SMartin Matuska 		uint8_t *out, size_t *out_pos, size_t out_size)
22881ad8388SMartin Matuska {
22981ad8388SMartin Matuska 	// Validate the arguments.
23081ad8388SMartin Matuska 	if (i == NULL || out == NULL || out_pos == NULL || *out_pos > out_size)
23181ad8388SMartin Matuska 		return LZMA_PROG_ERROR;
23281ad8388SMartin Matuska 
23381ad8388SMartin Matuska 	// Don't try to encode if there's not enough output space.
23481ad8388SMartin Matuska 	if (out_size - *out_pos < lzma_index_size(i))
23581ad8388SMartin Matuska 		return LZMA_BUF_ERROR;
23681ad8388SMartin Matuska 
23781ad8388SMartin Matuska 	// The Index encoder needs just one small data structure so we can
23881ad8388SMartin Matuska 	// allocate it on stack.
2391456f0f9SXin LI 	lzma_index_coder coder;
24081ad8388SMartin Matuska 	index_encoder_reset(&coder, i);
24181ad8388SMartin Matuska 
24281ad8388SMartin Matuska 	// Do the actual encoding. This should never fail, but store
24381ad8388SMartin Matuska 	// the original *out_pos just in case.
24481ad8388SMartin Matuska 	const size_t out_start = *out_pos;
24581ad8388SMartin Matuska 	lzma_ret ret = index_encode(&coder, NULL, NULL, NULL, 0,
24681ad8388SMartin Matuska 			out, out_pos, out_size, LZMA_RUN);
24781ad8388SMartin Matuska 
24881ad8388SMartin Matuska 	if (ret == LZMA_STREAM_END) {
24981ad8388SMartin Matuska 		ret = LZMA_OK;
25081ad8388SMartin Matuska 	} else {
25181ad8388SMartin Matuska 		// We should never get here, but just in case, restore the
25281ad8388SMartin Matuska 		// output position and set the error accordingly if something
25381ad8388SMartin Matuska 		// goes wrong and debugging isn't enabled.
25481ad8388SMartin Matuska 		assert(0);
25581ad8388SMartin Matuska 		*out_pos = out_start;
25681ad8388SMartin Matuska 		ret = LZMA_PROG_ERROR;
25781ad8388SMartin Matuska 	}
25881ad8388SMartin Matuska 
25981ad8388SMartin Matuska 	return ret;
26081ad8388SMartin Matuska }
261