xref: /freebsd/contrib/xz/src/liblzma/api/lzma/base.h (revision 81ad83880dcc267b198c781929dd9a009f98c5f7)
1 /**
2  * \file        lzma/base.h
3  * \brief       Data types and functions used in many places in liblzma API
4  */
5 
6 /*
7  * Author: Lasse Collin
8  *
9  * This file has been put into the public domain.
10  * You can do whatever you want with this file.
11  *
12  * See ../lzma.h for information about liblzma as a whole.
13  */
14 
15 #ifndef LZMA_H_INTERNAL
16 #	error Never include this file directly. Use <lzma.h> instead.
17 #endif
18 
19 
20 /**
21  * \brief       Boolean
22  *
23  * This is here because C89 doesn't have stdbool.h. To set a value for
24  * variables having type lzma_bool, you can use
25  *   - C99's `true' and `false' from stdbool.h;
26  *   - C++'s internal `true' and `false'; or
27  *   - integers one (true) and zero (false).
28  */
29 typedef unsigned char lzma_bool;
30 
31 
32 /**
33  * \brief       Type of reserved enumeration variable in structures
34  *
35  * To avoid breaking library ABI when new features are added, several
36  * structures contain extra variables that may be used in future. Since
37  * sizeof(enum) can be different than sizeof(int), and sizeof(enum) may
38  * even vary depending on the range of enumeration constants, we specify
39  * a separate type to be used for reserved enumeration variables. All
40  * enumeration constants in liblzma API will be non-negative and less
41  * than 128, which should guarantee that the ABI won't break even when
42  * new constants are added to existing enumerations.
43  */
44 typedef enum {
45 	LZMA_RESERVED_ENUM      = 0
46 } lzma_reserved_enum;
47 
48 
49 /**
50  * \brief       Return values used by several functions in liblzma
51  *
52  * Check the descriptions of specific functions to find out which return
53  * values they can return. With some functions the return values may have
54  * more specific meanings than described here; those differences are
55  * described per-function basis.
56  */
57 typedef enum {
58 	LZMA_OK                 = 0,
59 		/**<
60 		 * \brief       Operation completed successfully
61 		 */
62 
63 	LZMA_STREAM_END         = 1,
64 		/**<
65 		 * \brief       End of stream was reached
66 		 *
67 		 * In encoder, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or
68 		 * LZMA_FINISH was finished. In decoder, this indicates
69 		 * that all the data was successfully decoded.
70 		 *
71 		 * In all cases, when LZMA_STREAM_END is returned, the last
72 		 * output bytes should be picked from strm->next_out.
73 		 */
74 
75 	LZMA_NO_CHECK           = 2,
76 		/**<
77 		 * \brief       Input stream has no integrity check
78 		 *
79 		 * This return value can be returned only if the
80 		 * LZMA_TELL_NO_CHECK flag was used when initializing
81 		 * the decoder. LZMA_NO_CHECK is just a warning, and
82 		 * the decoding can be continued normally.
83 		 *
84 		 * It is possible to call lzma_get_check() immediately after
85 		 * lzma_code has returned LZMA_NO_CHECK. The result will
86 		 * naturally be LZMA_CHECK_NONE, but the possibility to call
87 		 * lzma_get_check() may be convenient in some applications.
88 		 */
89 
90 	LZMA_UNSUPPORTED_CHECK  = 3,
91 		/**<
92 		 * \brief       Cannot calculate the integrity check
93 		 *
94 		 * The usage of this return value is different in encoders
95 		 * and decoders.
96 		 *
97 		 * Encoders can return this value only from the initialization
98 		 * function. If initialization fails with this value, the
99 		 * encoding cannot be done, because there's no way to produce
100 		 * output with the correct integrity check.
101 		 *
102 		 * Decoders can return this value only from lzma_code() and
103 		 * only if the LZMA_TELL_UNSUPPORTED_CHECK flag was used when
104 		 * initializing the decoder. The decoding can still be
105 		 * continued normally even if the check type is unsupported,
106 		 * but naturally the check will not be validated, and possible
107 		 * errors may go undetected.
108 		 *
109 		 * With decoder, it is possible to call lzma_get_check()
110 		 * immediately after lzma_code() has returned
111 		 * LZMA_UNSUPPORTED_CHECK. This way it is possible to find
112 		 * out what the unsupported Check ID was.
113 		 */
114 
115 	LZMA_GET_CHECK          = 4,
116 		/**<
117 		 * \brief       Integrity check type is now available
118 		 *
119 		 * This value can be returned only by the lzma_code() function
120 		 * and only if the decoder was initialized with the
121 		 * LZMA_TELL_ANY_CHECK flag. LZMA_GET_CHECK tells the
122 		 * application that it may now call lzma_get_check() to find
123 		 * out the Check ID. This can be used, for example, to
124 		 * implement a decoder that accepts only files that have
125 		 * strong enough integrity check.
126 		 */
127 
128 	LZMA_MEM_ERROR          = 5,
129 		/**<
130 		 * \brief       Cannot allocate memory
131 		 *
132 		 * Memory allocation failed, or the size of the allocation
133 		 * would be greater than SIZE_MAX.
134 		 *
135 		 * Due to internal implementation reasons, the coding cannot
136 		 * be continued even if more memory were made available after
137 		 * LZMA_MEM_ERROR.
138 		 */
139 
140 	LZMA_MEMLIMIT_ERROR     = 6,
141 		/**
142 		 * \brief       Memory usage limit was reached
143 		 *
144 		 * Decoder would need more memory than allowed by the
145 		 * specified memory usage limit. To continue decoding,
146 		 * the memory usage limit has to be increased with
147 		 * lzma_memlimit_set().
148 		 */
149 
150 	LZMA_FORMAT_ERROR       = 7,
151 		/**<
152 		 * \brief       File format not recognized
153 		 *
154 		 * The decoder did not recognize the input as supported file
155 		 * format. This error can occur, for example, when trying to
156 		 * decode .lzma format file with lzma_stream_decoder,
157 		 * because lzma_stream_decoder accepts only the .xz format.
158 		 */
159 
160 	LZMA_OPTIONS_ERROR      = 8,
161 		/**<
162 		 * \brief       Invalid or unsupported options
163 		 *
164 		 * Invalid or unsupported options, for example
165 		 *  - unsupported filter(s) or filter options; or
166 		 *  - reserved bits set in headers (decoder only).
167 		 *
168 		 * Rebuilding liblzma with more features enabled, or
169 		 * upgrading to a newer version of liblzma may help.
170 		 */
171 
172 	LZMA_DATA_ERROR         = 9,
173 		/**<
174 		 * \brief       Data is corrupt
175 		 *
176 		 * The usage of this return value is different in encoders
177 		 * and decoders. In both encoder and decoder, the coding
178 		 * cannot continue after this error.
179 		 *
180 		 * Encoders return this if size limits of the target file
181 		 * format would be exceeded. These limits are huge, thus
182 		 * getting this error from an encoder is mostly theoretical.
183 		 * For example, the maximum compressed and uncompressed
184 		 * size of a .xz Stream is roughly 8 EiB (2^63 bytes).
185 		 *
186 		 * Decoders return this error if the input data is corrupt.
187 		 * This can mean, for example, invalid CRC32 in headers
188 		 * or invalid check of uncompressed data.
189 		 */
190 
191 	LZMA_BUF_ERROR          = 10,
192 		/**<
193 		 * \brief       No progress is possible
194 		 *
195 		 * This error code is returned when the coder cannot consume
196 		 * any new input and produce any new output. The most common
197 		 * reason for this error is that the input stream being
198 		 * decoded is truncated or corrupt.
199 		 *
200 		 * This error is not fatal. Coding can be continued normally
201 		 * by providing more input and/or more output space, if
202 		 * possible.
203 		 *
204 		 * Typically the first call to lzma_code() that can do no
205 		 * progress returns LZMA_OK instead of LZMA_BUF_ERROR. Only
206 		 * the second consecutive call doing no progress will return
207 		 * LZMA_BUF_ERROR. This is intentional.
208 		 *
209 		 * With zlib, Z_BUF_ERROR may be returned even if the
210 		 * application is doing nothing wrong, so apps will need
211 		 * to handle Z_BUF_ERROR specially. The above hack
212 		 * guarantees that liblzma never returns LZMA_BUF_ERROR
213 		 * to properly written applications unless the input file
214 		 * is truncated or corrupt. This should simplify the
215 		 * applications a little.
216 		 */
217 
218 	LZMA_PROG_ERROR         = 11,
219 		/**<
220 		 * \brief       Programming error
221 		 *
222 		 * This indicates that the arguments given to the function are
223 		 * invalid or the internal state of the decoder is corrupt.
224 		 *   - Function arguments are invalid or the structures
225 		 *     pointed by the argument pointers are invalid
226 		 *     e.g. if strm->next_out has been set to NULL and
227 		 *     strm->avail_out > 0 when calling lzma_code().
228 		 *   - lzma_* functions have been called in wrong order
229 		 *     e.g. lzma_code() was called right after lzma_end().
230 		 *   - If errors occur randomly, the reason might be flaky
231 		 *     hardware.
232 		 *
233 		 * If you think that your code is correct, this error code
234 		 * can be a sign of a bug in liblzma. See the documentation
235 		 * how to report bugs.
236 		 */
237 } lzma_ret;
238 
239 
240 /**
241  * \brief       The `action' argument for lzma_code()
242  *
243  * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or LZMA_FINISH,
244  * the same `action' must is used until lzma_code() returns LZMA_STREAM_END.
245  * Also, the amount of input (that is, strm->avail_in) must not be modified
246  * by the application until lzma_code() returns LZMA_STREAM_END. Changing the
247  * `action' or modifying the amount of input will make lzma_code() return
248  * LZMA_PROG_ERROR.
249  */
250 typedef enum {
251 	LZMA_RUN = 0,
252 		/**<
253 		 * \brief       Continue coding
254 		 *
255 		 * Encoder: Encode as much input as possible. Some internal
256 		 * buffering will probably be done (depends on the filter
257 		 * chain in use), which causes latency: the input used won't
258 		 * usually be decodeable from the output of the same
259 		 * lzma_code() call.
260 		 *
261 		 * Decoder: Decode as much input as possible and produce as
262 		 * much output as possible.
263 		 */
264 
265 	LZMA_SYNC_FLUSH = 1,
266 		/**<
267 		 * \brief       Make all the input available at output
268 		 *
269 		 * Normally the encoder introduces some latency.
270 		 * LZMA_SYNC_FLUSH forces all the buffered data to be
271 		 * available at output without resetting the internal
272 		 * state of the encoder. This way it is possible to use
273 		 * compressed stream for example for communication over
274 		 * network.
275 		 *
276 		 * Only some filters support LZMA_SYNC_FLUSH. Trying to use
277 		 * LZMA_SYNC_FLUSH with filters that don't support it will
278 		 * make lzma_code() return LZMA_OPTIONS_ERROR. For example,
279 		 * LZMA1 doesn't support LZMA_SYNC_FLUSH but LZMA2 does.
280 		 *
281 		 * Using LZMA_SYNC_FLUSH very often can dramatically reduce
282 		 * the compression ratio. With some filters (for example,
283 		 * LZMA2), fine-tuning the compression options may help
284 		 * mitigate this problem significantly.
285 		 *
286 		 * Decoders don't support LZMA_SYNC_FLUSH.
287 		 */
288 
289 	LZMA_FULL_FLUSH = 2,
290 		/**<
291 		 * \brief       Make all the input available at output
292 		 *
293 		 * Finish encoding of the current Block. All the input
294 		 * data going to the current Block must have been given
295 		 * to the encoder (the last bytes can still be pending in
296 		 * next_in). Call lzma_code() with LZMA_FULL_FLUSH until
297 		 * it returns LZMA_STREAM_END. Then continue normally with
298 		 * LZMA_RUN or finish the Stream with LZMA_FINISH.
299 		 *
300 		 * This action is currently supported only by Stream encoder
301 		 * and easy encoder (which uses Stream encoder). If there is
302 		 * no unfinished Block, no empty Block is created.
303 		 */
304 
305 	LZMA_FINISH = 3
306 		/**<
307 		 * \brief       Finish the coding operation
308 		 *
309 		 * Finishes the coding operation. All the input data must
310 		 * have been given to the encoder (the last bytes can still
311 		 * be pending in next_in). Call lzma_code() with LZMA_FINISH
312 		 * until it returns LZMA_STREAM_END. Once LZMA_FINISH has
313 		 * been used, the amount of input must no longer be changed
314 		 * by the application.
315 		 *
316 		 * When decoding, using LZMA_FINISH is optional unless the
317 		 * LZMA_CONCATENATED flag was used when the decoder was
318 		 * initialized. When LZMA_CONCATENATED was not used, the only
319 		 * effect of LZMA_FINISH is that the amount of input must not
320 		 * be changed just like in the encoder.
321 		 */
322 } lzma_action;
323 
324 
325 /**
326  * \brief       Custom functions for memory handling
327  *
328  * A pointer to lzma_allocator may be passed via lzma_stream structure
329  * to liblzma, and some advanced functions take a pointer to lzma_allocator
330  * as a separate function argument. The library will use the functions
331  * specified in lzma_allocator for memory handling instead of the default
332  * malloc() and free(). C++ users should note that the custom memory
333  * handling functions must not throw exceptions.
334  *
335  * liblzma doesn't make an internal copy of lzma_allocator. Thus, it is
336  * OK to change these function pointers in the middle of the coding
337  * process, but obviously it must be done carefully to make sure that the
338  * replacement `free' can deallocate memory allocated by the earlier
339  * `alloc' function(s).
340  */
341 typedef struct {
342 	/**
343 	 * \brief       Pointer to a custom memory allocation function
344 	 *
345 	 * If you don't want a custom allocator, but still want
346 	 * custom free(), set this to NULL and liblzma will use
347 	 * the standard malloc().
348 	 *
349 	 * \param       opaque  lzma_allocator.opaque (see below)
350 	 * \param       nmemb   Number of elements like in calloc(). liblzma
351 	 *                      will always set nmemb to 1, so it is safe to
352 	 *                      ignore nmemb in a custom allocator if you like.
353 	 *                      The nmemb argument exists only for
354 	 *                      compatibility with zlib and libbzip2.
355 	 * \param       size    Size of an element in bytes.
356 	 *                      liblzma never sets this to zero.
357 	 *
358 	 * \return      Pointer to the beginning of a memory block of
359 	 *              `size' bytes, or NULL if allocation fails
360 	 *              for some reason. When allocation fails, functions
361 	 *              of liblzma return LZMA_MEM_ERROR.
362 	 *
363 	 * The allocator should not waste time zeroing the allocated buffers.
364 	 * This is not only about speed, but also memory usage, since the
365 	 * operating system kernel doesn't necessarily allocate the requested
366 	 * memory in physical memory until it is actually used. With small
367 	 * input files, liblzma may actually need only a fraction of the
368 	 * memory that it requested for allocation.
369 	 *
370 	 * \note        LZMA_MEM_ERROR is also used when the size of the
371 	 *              allocation would be greater than SIZE_MAX. Thus,
372 	 *              don't assume that the custom allocator must have
373 	 *              returned NULL if some function from liblzma
374 	 *              returns LZMA_MEM_ERROR.
375 	 */
376 	void *(LZMA_API_CALL *alloc)(void *opaque, size_t nmemb, size_t size);
377 
378 	/**
379 	 * \brief       Pointer to a custom memory freeing function
380 	 *
381 	 * If you don't want a custom freeing function, but still
382 	 * want a custom allocator, set this to NULL and liblzma
383 	 * will use the standard free().
384 	 *
385 	 * \param       opaque  lzma_allocator.opaque (see below)
386 	 * \param       ptr     Pointer returned by lzma_allocator.alloc(),
387 	 *                      or when it is set to NULL, a pointer returned
388 	 *                      by the standard malloc().
389 	 */
390 	void (LZMA_API_CALL *free)(void *opaque, void *ptr);
391 
392 	/**
393 	 * \brief       Pointer passed to .alloc() and .free()
394 	 *
395 	 * opaque is passed as the first argument to lzma_allocator.alloc()
396 	 * and lzma_allocator.free(). This intended to ease implementing
397 	 * custom memory allocation functions for use with liblzma.
398 	 *
399 	 * If you don't need this, you should set this to NULL.
400 	 */
401 	void *opaque;
402 
403 } lzma_allocator;
404 
405 
406 /**
407  * \brief       Internal data structure
408  *
409  * The contents of this structure is not visible outside the library.
410  */
411 typedef struct lzma_internal_s lzma_internal;
412 
413 
414 /**
415  * \brief       Passing data to and from liblzma
416  *
417  * The lzma_stream structure is used for
418  *  - passing pointers to input and output buffers to liblzma;
419  *  - defining custom memory hander functions; and
420  *  - holding a pointer to coder-specific internal data structures.
421  *
422  * Typical usage:
423  *
424  *  - After allocating lzma_stream (on stack or with malloc()), it must be
425  *    initialized to LZMA_STREAM_INIT (see LZMA_STREAM_INIT for details).
426  *
427  *  - Initialize a coder to the lzma_stream, for example by using
428  *    lzma_easy_encoder() or lzma_auto_decoder(). Some notes:
429  *      - In contrast to zlib, strm->next_in and strm->next_out are
430  *        ignored by all initialization functions, thus it is safe
431  *        to not initialize them yet.
432  *      - The initialization functions always set strm->total_in and
433  *        strm->total_out to zero.
434  *      - If the initialization function fails, no memory is left allocated
435  *        that would require freeing with lzma_end() even if some memory was
436  *        associated with the lzma_stream structure when the initialization
437  *        function was called.
438  *
439  *  - Use lzma_code() to do the actual work.
440  *
441  *  - Once the coding has been finished, the existing lzma_stream can be
442  *    reused. It is OK to reuse lzma_stream with different initialization
443  *    function without calling lzma_end() first. Old allocations are
444  *    automatically freed.
445  *
446  *  - Finally, use lzma_end() to free the allocated memory. lzma_end() never
447  *    frees the lzma_stream structure itself.
448  *
449  * Application may modify the values of total_in and total_out as it wants.
450  * They are updated by liblzma to match the amount of data read and
451  * written, but aren't used for anything else.
452  */
453 typedef struct {
454 	const uint8_t *next_in; /**< Pointer to the next input byte. */
455 	size_t avail_in;    /**< Number of available input bytes in next_in. */
456 	uint64_t total_in;  /**< Total number of bytes read by liblzma. */
457 
458 	uint8_t *next_out;  /**< Pointer to the next output position. */
459 	size_t avail_out;   /**< Amount of free space in next_out. */
460 	uint64_t total_out; /**< Total number of bytes written by liblzma. */
461 
462 	/**
463 	 * \brief       Custom memory allocation functions
464 	 *
465 	 * In most cases this is NULL which makes liblzma use
466 	 * the standard malloc() and free().
467 	 */
468 	lzma_allocator *allocator;
469 
470 	/** Internal state is not visible to applications. */
471 	lzma_internal *internal;
472 
473 	/*
474 	 * Reserved space to allow possible future extensions without
475 	 * breaking the ABI. Excluding the initialization of this structure,
476 	 * you should not touch these, because the names of these variables
477 	 * may change.
478 	 */
479 	void *reserved_ptr1;
480 	void *reserved_ptr2;
481 	uint64_t reserved_int1;
482 	uint64_t reserved_int2;
483 	lzma_reserved_enum reserved_enum1;
484 	lzma_reserved_enum reserved_enum2;
485 
486 } lzma_stream;
487 
488 
489 /**
490  * \brief       Initialization for lzma_stream
491  *
492  * When you declare an instance of lzma_stream, you can immediately
493  * initialize it so that initialization functions know that no memory
494  * has been allocated yet:
495  *
496  *     lzma_stream strm = LZMA_STREAM_INIT;
497  *
498  * If you need to initialize a dynamically allocated lzma_stream, you can use
499  * memset(strm_pointer, 0, sizeof(lzma_stream)). Strictly speaking, this
500  * violates the C standard since NULL may have different internal
501  * representation than zero, but it should be portable enough in practice.
502  * Anyway, for maximum portability, you can use something like this:
503  *
504  *     lzma_stream tmp = LZMA_STREAM_INIT;
505  *     *strm = tmp;
506  */
507 #define LZMA_STREAM_INIT \
508 	{ NULL, 0, 0, NULL, 0, 0, NULL, NULL, \
509 	NULL, NULL, 0, 0, LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM }
510 
511 
512 /**
513  * \brief       Encode or decode data
514  *
515  * Once the lzma_stream has been successfully initialized (e.g. with
516  * lzma_stream_encoder()), the actual encoding or decoding is done
517  * using this function. The application has to update strm->next_in,
518  * strm->avail_in, strm->next_out, and strm->avail_out to pass input
519  * to and get output from liblzma.
520  *
521  * See the description of the coder-specific initialization function to find
522  * out what `action' values are supported by the coder.
523  */
524 extern LZMA_API(lzma_ret) lzma_code(lzma_stream *strm, lzma_action action)
525 		lzma_nothrow lzma_attr_warn_unused_result;
526 
527 
528 /**
529  * \brief       Free memory allocated for the coder data structures
530  *
531  * \param       strm    Pointer to lzma_stream that is at least initialized
532  *                      with LZMA_STREAM_INIT.
533  *
534  * After lzma_end(strm), strm->internal is guaranteed to be NULL. No other
535  * members of the lzma_stream structure are touched.
536  *
537  * \note        zlib indicates an error if application end()s unfinished
538  *              stream structure. liblzma doesn't do this, and assumes that
539  *              application knows what it is doing.
540  */
541 extern LZMA_API(void) lzma_end(lzma_stream *strm) lzma_nothrow;
542 
543 
544 /**
545  * \brief       Get the memory usage of decoder filter chain
546  *
547  * This function is currently supported only when *strm has been initialized
548  * with a function that takes a memlimit argument. With other functions, you
549  * should use e.g. lzma_raw_encoder_memusage() or lzma_raw_decoder_memusage()
550  * to estimate the memory requirements.
551  *
552  * This function is useful e.g. after LZMA_MEMLIMIT_ERROR to find out how big
553  * the memory usage limit should have been to decode the input. Note that
554  * this may give misleading information if decoding .xz Streams that have
555  * multiple Blocks, because each Block can have different memory requirements.
556  *
557  * \return      Rough estimate of how much memory is currently allocated
558  *              for the filter decoders. If no filter chain is currently
559  *              allocated, some non-zero value is still returned, which is
560  *              less than or equal to what any filter chain would indicate
561  *              as its memory requirement.
562  *
563  *              If this function isn't supported by *strm or some other error
564  *              occurs, zero is returned.
565  */
566 extern LZMA_API(uint64_t) lzma_memusage(const lzma_stream *strm)
567 		lzma_nothrow lzma_attr_pure;
568 
569 
570 /**
571  * \brief       Get the current memory usage limit
572  *
573  * This function is supported only when *strm has been initialized with
574  * a function that takes a memlimit argument.
575  *
576  * \return      On success, the current memory usage limit is returned
577  *              (always non-zero). On error, zero is returned.
578  */
579 extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm)
580 		lzma_nothrow lzma_attr_pure;
581 
582 
583 /**
584  * \brief       Set the memory usage limit
585  *
586  * This function is supported only when *strm has been initialized with
587  * a function that takes a memlimit argument.
588  *
589  * \return      - LZMA_OK: New memory usage limit successfully set.
590  *              - LZMA_MEMLIMIT_ERROR: The new limit is too small.
591  *                The limit was not changed.
592  *              - LZMA_PROG_ERROR: Invalid arguments, e.g. *strm doesn't
593  *                support memory usage limit or memlimit was zero.
594  */
595 extern LZMA_API(lzma_ret) lzma_memlimit_set(
596 		lzma_stream *strm, uint64_t memlimit) lzma_nothrow;
597