1 // SPDX-License-Identifier: 0BSD
2
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file coder.c
6 /// \brief Compresses or uncompresses a file
7 //
8 // Authors: Lasse Collin
9 // Jia Tan
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12
13 #include "private.h"
14 #include "tuklib_integer.h"
15
16
17 /// Return value type for coder_init().
18 enum coder_init_ret {
19 CODER_INIT_NORMAL,
20 CODER_INIT_PASSTHRU,
21 CODER_INIT_ERROR,
22 };
23
24
25 enum operation_mode opt_mode = MODE_COMPRESS;
26 enum format_type opt_format = FORMAT_AUTO;
27 bool opt_auto_adjust = true;
28 bool opt_single_stream = false;
29 uint64_t opt_block_size = 0;
30 block_list_entry *opt_block_list = NULL;
31 uint64_t block_list_largest;
32 uint32_t block_list_chain_mask;
33
34 /// Stream used to communicate with liblzma
35 static lzma_stream strm = LZMA_STREAM_INIT;
36
37 /// Maximum number of filter chains. The first filter chain is the default,
38 /// and 9 other filter chains can be specified with --filtersX.
39 #define NUM_FILTER_CHAIN_MAX 10
40
41 /// The default filter chain is in chains[0]. It is used for encoding
42 /// in all supported formats and also for decdoing raw streams. The other
43 /// filter chains are set by --filtersX to support changing filters with
44 /// the --block-list option.
45 static lzma_filter chains[NUM_FILTER_CHAIN_MAX][LZMA_FILTERS_MAX + 1];
46
47 /// Bitmask indicating which filter chains are actually used when encoding
48 /// in the .xz format. This is needed since the filter chains specified using
49 /// --filtersX (or the default filter chain) might in reality be unneeded
50 /// if they are never used in --block-list. When --block-list isn't
51 /// specified, only the default filter chain is used, thus the initial
52 /// value of this variable is 1U << 0 (the number of the default chain is 0).
53 static uint32_t chains_used_mask = 1U << 0;
54
55 /// Input and output buffers
56 static io_buf in_buf;
57 static io_buf out_buf;
58
59 /// Number of filters in the default filter chain. Zero indicates that
60 /// we are using a preset.
61 static uint32_t filters_count = 0;
62
63 /// Number of the preset (0-9)
64 static uint32_t preset_number = LZMA_PRESET_DEFAULT;
65
66 /// True if the current default filter chain was set using the --filters
67 /// option. The filter chain is reset if a preset option (like -9) or an
68 /// old-style filter option (like --lzma2) is used after a --filters option.
69 static bool string_to_filter_used = false;
70
71 /// Integrity check type
72 static lzma_check check;
73
74 /// This becomes false if the --check=CHECK option is used.
75 static bool check_default = true;
76
77 /// Indicates if unconsumed input is allowed to remain after
78 /// decoding has successfully finished. This is set for each file
79 /// in coder_init().
80 static bool allow_trailing_input;
81
82 #ifdef MYTHREAD_ENABLED
83 static lzma_mt mt_options = {
84 .flags = 0,
85 .timeout = 300,
86 };
87 #endif
88
89
90 extern void
coder_set_check(lzma_check new_check)91 coder_set_check(lzma_check new_check)
92 {
93 check = new_check;
94 check_default = false;
95 return;
96 }
97
98
99 static void
forget_filter_chain(void)100 forget_filter_chain(void)
101 {
102 // Setting a preset or using --filters makes us forget
103 // the earlier custom filter chain (if any).
104 if (filters_count > 0) {
105 lzma_filters_free(chains[0], NULL);
106 filters_count = 0;
107 }
108
109 string_to_filter_used = false;
110 return;
111 }
112
113
114 extern void
coder_set_preset(uint32_t new_preset)115 coder_set_preset(uint32_t new_preset)
116 {
117 preset_number &= ~LZMA_PRESET_LEVEL_MASK;
118 preset_number |= new_preset;
119 forget_filter_chain();
120 return;
121 }
122
123
124 extern void
coder_set_extreme(void)125 coder_set_extreme(void)
126 {
127 preset_number |= LZMA_PRESET_EXTREME;
128 forget_filter_chain();
129 return;
130 }
131
132
133 extern void
coder_add_filter(lzma_vli id,void * options)134 coder_add_filter(lzma_vli id, void *options)
135 {
136 if (filters_count == LZMA_FILTERS_MAX)
137 message_fatal(_("Maximum number of filters is four"));
138
139 if (string_to_filter_used)
140 forget_filter_chain();
141
142 chains[0][filters_count].id = id;
143 chains[0][filters_count].options = options;
144
145 // Terminate the filter chain with LZMA_VLI_UNKNOWN to simplify
146 // implementation of forget_filter_chain().
147 chains[0][++filters_count].id = LZMA_VLI_UNKNOWN;
148
149 // Setting a custom filter chain makes us forget the preset options.
150 // This makes a difference if one specifies e.g. "xz -9 --lzma2 -e"
151 // where the custom filter chain resets the preset level back to
152 // the default 6, making the example equivalent to "xz -6e".
153 preset_number = LZMA_PRESET_DEFAULT;
154
155 return;
156 }
157
158
159 static void
str_to_filters(const char * str,uint32_t index,uint32_t flags)160 str_to_filters(const char *str, uint32_t index, uint32_t flags)
161 {
162 int error_pos;
163 const char *err = lzma_str_to_filters(str, &error_pos,
164 chains[index], flags, NULL);
165
166 if (err != NULL) {
167 char filter_num[2] = "";
168 if (index > 0)
169 filter_num[0] = '0' + index;
170
171 // liblzma doesn't translate the error messages but
172 // the messages are included in xz's translations.
173 message(V_ERROR, _("Error in --filters%s=FILTERS option:"),
174 filter_num);
175 message(V_ERROR, "%s", str);
176 message(V_ERROR, "%*s^", error_pos, "");
177 message_fatal("%s", _(err));
178 }
179 }
180
181
182 extern void
coder_add_filters_from_str(const char * filter_str)183 coder_add_filters_from_str(const char *filter_str)
184 {
185 // Forget presets and previously defined filter chain. See
186 // coder_add_filter() above for why preset_number must be reset too.
187 forget_filter_chain();
188 preset_number = LZMA_PRESET_DEFAULT;
189
190 string_to_filter_used = true;
191
192 // Include LZMA_STR_ALL_FILTERS so this can be used with --format=raw.
193 str_to_filters(filter_str, 0, LZMA_STR_ALL_FILTERS);
194
195 // Set the filters_count to be the number of filters converted from
196 // the string.
197 for (filters_count = 0; chains[0][filters_count].id
198 != LZMA_VLI_UNKNOWN;
199 ++filters_count) ;
200
201 assert(filters_count > 0);
202 return;
203 }
204
205
206 extern void
coder_add_block_filters(const char * str,size_t slot)207 coder_add_block_filters(const char *str, size_t slot)
208 {
209 // Free old filters first, if they were previously allocated.
210 if (chains_used_mask & (1U << slot))
211 lzma_filters_free(chains[slot], NULL);
212
213 str_to_filters(str, slot, 0);
214
215 chains_used_mask |= 1U << slot;
216 }
217
218
219 tuklib_attr_noreturn
220 static void
memlimit_too_small(uint64_t memory_usage)221 memlimit_too_small(uint64_t memory_usage)
222 {
223 message(V_ERROR, _("Memory usage limit is too low for the given "
224 "filter setup."));
225 message_mem_needed(V_ERROR, memory_usage);
226 tuklib_exit(E_ERROR, E_ERROR, false);
227 }
228
229
230 #ifdef HAVE_ENCODERS
231 /// \brief Calculate the memory usage of each filter chain.
232 ///
233 /// \param chains_memusages If non-NULL, the memusage of the encoder
234 /// or decoder for each chain is stored in
235 /// this array.
236 /// \param mt If non-NULL, calculate memory usage of
237 /// multithreaded encoder.
238 /// \param encode Whether to calculate encoder or decoder
239 /// memory usage. This must be true if
240 /// mt != NULL.
241 ///
242 /// \return Return the highest memory usage of all of the filter chains.
243 static uint64_t
get_chains_memusage(uint64_t * chains_memusages,const lzma_mt * mt,bool encode)244 get_chains_memusage(uint64_t *chains_memusages, const lzma_mt *mt, bool encode)
245 {
246 uint64_t max_memusage = 0;
247
248 #ifdef MYTHREAD_ENABLED
249 // Copy multithreading options to a temporary struct since the
250 // "filters" member needs to be changed.
251 lzma_mt mt_local;
252 if (mt != NULL)
253 mt_local = *mt;
254 #else
255 (void)mt;
256 #endif
257
258 for (uint32_t i = 0; i < ARRAY_SIZE(chains); i++) {
259 if (!(chains_used_mask & (1U << i)))
260 continue;
261
262 uint64_t memusage = UINT64_MAX;
263 #ifdef MYTHREAD_ENABLED
264 if (mt != NULL) {
265 assert(encode);
266 mt_local.filters = chains[i];
267 memusage = lzma_stream_encoder_mt_memusage(&mt_local);
268 } else
269 #endif
270 if (encode) {
271 memusage = lzma_raw_encoder_memusage(chains[i]);
272 }
273 #ifdef HAVE_DECODERS
274 else {
275 memusage = lzma_raw_decoder_memusage(chains[i]);
276 }
277 #endif
278
279 if (chains_memusages != NULL)
280 chains_memusages[i] = memusage;
281
282 if (memusage > max_memusage)
283 max_memusage = memusage;
284 }
285
286 return max_memusage;
287 }
288 #endif
289
290
291 extern void
coder_set_compression_settings(void)292 coder_set_compression_settings(void)
293 {
294 #ifdef HAVE_LZIP_DECODER
295 // .lz compression isn't supported.
296 assert(opt_format != FORMAT_LZIP);
297 #endif
298
299 // The default check type is CRC64, but fallback to CRC32
300 // if CRC64 isn't supported by the copy of liblzma we are
301 // using. CRC32 is always supported.
302 if (check_default) {
303 check = LZMA_CHECK_CRC64;
304 if (!lzma_check_is_supported(check))
305 check = LZMA_CHECK_CRC32;
306 }
307
308 #ifdef HAVE_ENCODERS
309 if (opt_block_list != NULL) {
310 // args.c ensures these.
311 assert(opt_mode == MODE_COMPRESS);
312 assert(opt_format == FORMAT_XZ);
313
314 // Find out if block_list_chain_mask has a bit set that
315 // isn't set in chains_used_mask.
316 const uint32_t missing_chains_mask
317 = (block_list_chain_mask ^ chains_used_mask)
318 & block_list_chain_mask;
319
320 // If a filter chain was specified in --block-list but no
321 // matching --filtersX option was used, exit with an error.
322 if (missing_chains_mask != 0) {
323 // Get the number of the first missing filter chain
324 // and show it in the error message.
325 const unsigned first_missing
326 = (unsigned)ctz32(missing_chains_mask);
327
328 message_fatal(_("filter chain %u used by "
329 "--block-list but not specified "
330 "with --filters%u="),
331 first_missing, first_missing);
332 }
333
334 // Omit the unused filter chains from mask of used chains.
335 //
336 // (FIXME? When built with debugging, coder_free() will free()
337 // the filter chains (except the default chain) which makes
338 // Valgrind show fewer reachable allocations. But coder_free()
339 // uses this mask to determine which chains to free. Thus it
340 // won't free the ones that are cleared here from the mask.
341 // In practice this doesn't matter.)
342 chains_used_mask &= block_list_chain_mask;
343 } else {
344 // Reset filters used mask in case --block-list is not
345 // used, but --filtersX is used.
346 chains_used_mask = 1U << 0;
347 }
348 #endif
349
350 // Options for LZMA1 or LZMA2 in case we are using a preset.
351 static lzma_options_lzma opt_lzma;
352
353 // The first filter in the chains[] array is for the default
354 // filter chain.
355 lzma_filter *default_filters = chains[0];
356
357 if (filters_count == 0 && chains_used_mask & 1) {
358 // We are using a preset. This is not a good idea in raw mode
359 // except when playing around with things. Different versions
360 // of this software may use different options in presets, and
361 // thus make uncompressing the raw data difficult.
362 if (opt_format == FORMAT_RAW) {
363 // The message is shown only if warnings are allowed
364 // but the exit status isn't changed.
365 message(V_WARNING, _("Using a preset in raw mode "
366 "is discouraged."));
367 message(V_WARNING, _("The exact options of the "
368 "presets may vary between software "
369 "versions."));
370 }
371
372 // Get the preset for LZMA1 or LZMA2.
373 if (lzma_lzma_preset(&opt_lzma, preset_number))
374 message_bug();
375
376 // Use LZMA2 except with --format=lzma we use LZMA1.
377 default_filters[0].id = opt_format == FORMAT_LZMA
378 ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2;
379 default_filters[0].options = &opt_lzma;
380
381 filters_count = 1;
382
383 // Terminate the filter options array.
384 default_filters[1].id = LZMA_VLI_UNKNOWN;
385 }
386
387 // If we are using the .lzma format, allow exactly one filter
388 // which has to be LZMA1. There is no need to check if the default
389 // filter chain is being used since it can only be disabled if
390 // --block-list is used, which is incompatible with FORMAT_LZMA.
391 if (opt_format == FORMAT_LZMA && (filters_count != 1
392 || default_filters[0].id != LZMA_FILTER_LZMA1))
393 message_fatal(_("The .lzma format supports only "
394 "the LZMA1 filter"));
395
396 // If we are using the .xz format, make sure that there is no LZMA1
397 // filter to prevent LZMA_PROG_ERROR. With the chains from --filtersX
398 // we have already ensured this by calling lzma_str_to_filters()
399 // without setting the flags that would allow non-.xz filters.
400 if (opt_format == FORMAT_XZ && chains_used_mask & 1)
401 for (size_t i = 0; i < filters_count; ++i)
402 if (default_filters[i].id == LZMA_FILTER_LZMA1)
403 message_fatal(_("LZMA1 cannot be used "
404 "with the .xz format"));
405
406 if (chains_used_mask & 1) {
407 // Print the selected default filter chain.
408 message_filters_show(V_DEBUG, default_filters);
409 }
410
411 // The --flush-timeout option requires LZMA_SYNC_FLUSH support
412 // from the filter chain. Currently the threaded encoder doesn't
413 // support LZMA_SYNC_FLUSH so single-threaded mode must be used.
414 if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) {
415 for (unsigned i = 0; i < ARRAY_SIZE(chains); ++i) {
416 if (!(chains_used_mask & (1U << i)))
417 continue;
418
419 const lzma_filter *fc = chains[i];
420 for (size_t j = 0; fc[j].id != LZMA_VLI_UNKNOWN; j++) {
421 switch (fc[j].id) {
422 case LZMA_FILTER_LZMA2:
423 case LZMA_FILTER_DELTA:
424 break;
425
426 default:
427 message_fatal(_("Filter chain %u is "
428 "incompatible with "
429 "--flush-timeout"),
430 i);
431 }
432 }
433 }
434
435 if (hardware_threads_is_mt()) {
436 message(V_WARNING, _("Switching to single-threaded "
437 "mode due to --flush-timeout"));
438 hardware_threads_set(1);
439 }
440 }
441
442 // Get memory limit and the memory usage of the used filter chains.
443 // Note that if --format=raw was used, we can be decompressing
444 // using the default filter chain.
445 //
446 // If multithreaded .xz compression is done, the memory limit
447 // will be replaced.
448 uint64_t memory_limit = hardware_memlimit_get(opt_mode);
449 uint64_t memory_usage = UINT64_MAX;
450
451 #ifdef HAVE_ENCODERS
452 // Memory usage for each encoder filter chain (default
453 // or --filtersX). The encoder options may need to be
454 // scaled down depending on the memory usage limit.
455 uint64_t encoder_memusages[ARRAY_SIZE(chains)];
456 #endif
457
458 if (opt_mode == MODE_COMPRESS) {
459 #ifdef HAVE_ENCODERS
460 # ifdef MYTHREAD_ENABLED
461 if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) {
462 memory_limit = hardware_memlimit_mtenc_get();
463 mt_options.threads = hardware_threads_get();
464
465 uint64_t block_size = opt_block_size;
466
467 // If opt_block_size is not set, find the maximum
468 // recommended Block size based on the filter chains
469 if (block_size == 0) {
470 for (unsigned i = 0; i < ARRAY_SIZE(chains);
471 i++) {
472 if (!(chains_used_mask & (1U << i)))
473 continue;
474
475 uint64_t size = lzma_mt_block_size(
476 chains[i]);
477
478 // If this returns an error, then one
479 // of the filter chains in use is
480 // invalid, so there is no point in
481 // progressing further.
482 if (size == UINT64_MAX)
483 message_fatal(_("Unsupported "
484 "options in filter "
485 "chain %u"), i);
486
487 if (size > block_size)
488 block_size = size;
489 }
490
491 // If --block-list was used and our current
492 // Block size exceeds the largest size
493 // in --block-list, reduce the Block size of
494 // the multithreaded encoder. The extra size
495 // would only be a waste of RAM. With a
496 // smaller Block size we might even be able
497 // to use more threads in some cases.
498 if (block_list_largest > 0 && block_size
499 > block_list_largest)
500 block_size = block_list_largest;
501 }
502
503 mt_options.block_size = block_size;
504 mt_options.check = check;
505
506 memory_usage = get_chains_memusage(encoder_memusages,
507 &mt_options, true);
508 if (memory_usage != UINT64_MAX)
509 message(V_DEBUG, _("Using up to %" PRIu32
510 " threads."),
511 mt_options.threads);
512 } else
513 # endif
514 {
515 memory_usage = get_chains_memusage(encoder_memusages,
516 NULL, true);
517 }
518 #endif
519 } else {
520 #ifdef HAVE_DECODERS
521 memory_usage = lzma_raw_decoder_memusage(default_filters);
522 #endif
523 }
524
525 if (memory_usage == UINT64_MAX)
526 message_fatal(_("Unsupported filter chain or filter options"));
527
528 // Print memory usage info before possible dictionary
529 // size auto-adjusting.
530 //
531 // NOTE: If only encoder support was built, we cannot show
532 // what the decoder memory usage will be.
533 message_mem_needed(V_DEBUG, memory_usage);
534
535 #if defined(HAVE_ENCODERS) && defined(HAVE_DECODERS)
536 if (opt_mode == MODE_COMPRESS && message_verbosity_get() >= V_DEBUG) {
537 const uint64_t decmem = get_chains_memusage(NULL, NULL, false);
538 if (decmem != UINT64_MAX)
539 message(V_DEBUG, _("Decompression will need "
540 "%s MiB of memory."), uint64_to_str(
541 round_up_to_mib(decmem), 0));
542 }
543 #endif
544
545 if (memory_usage <= memory_limit)
546 return;
547
548 // With --format=raw settings are never adjusted to meet
549 // the memory usage limit.
550 if (opt_format == FORMAT_RAW)
551 memlimit_too_small(memory_usage);
552
553 assert(opt_mode == MODE_COMPRESS);
554
555 #ifdef HAVE_ENCODERS
556 # ifdef MYTHREAD_ENABLED
557 if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) {
558 // Try to reduce the number of threads before
559 // adjusting the compression settings down.
560 while (mt_options.threads > 1) {
561 // Reduce the number of threads by one and check
562 // the memory usage.
563 --mt_options.threads;
564 memory_usage = get_chains_memusage(encoder_memusages,
565 &mt_options, true);
566 if (memory_usage == UINT64_MAX)
567 message_bug();
568
569 if (memory_usage <= memory_limit) {
570 // The memory usage is now low enough.
571 //
572 // Since 5.6.1: This is only shown at
573 // V_DEBUG instead of V_WARNING because
574 // changing the number of threads doesn't
575 // affect the output. On some systems this
576 // message would be too common now that
577 // multithreaded compression is the default.
578 message(V_DEBUG, _("Reduced the number of "
579 "threads from %s to %s to not exceed "
580 "the memory usage limit of %s MiB"),
581 uint64_to_str(
582 hardware_threads_get(), 0),
583 uint64_to_str(mt_options.threads, 1),
584 uint64_to_str(round_up_to_mib(
585 memory_limit), 2));
586 return;
587 }
588 }
589
590 // If the memory usage limit is only a soft limit (automatic
591 // number of threads and no --memlimit-compress), the limit
592 // is only used to reduce the number of threads and once at
593 // just one thread, the limit is completely ignored. This
594 // way -T0 won't use insane amount of memory but at the same
595 // time the soft limit will never make xz fail and never make
596 // xz change settings that would affect the compressed output.
597 //
598 // Since 5.6.1: Like above, this is now shown at V_DEBUG
599 // instead of V_WARNING.
600 if (hardware_memlimit_mtenc_is_default()) {
601 message(V_DEBUG, _("Reduced the number of threads "
602 "from %s to one. The automatic memory usage "
603 "limit of %s MiB is still being exceeded. "
604 "%s MiB of memory is required. "
605 "Continuing anyway."),
606 uint64_to_str(hardware_threads_get(), 0),
607 uint64_to_str(
608 round_up_to_mib(memory_limit), 1),
609 uint64_to_str(
610 round_up_to_mib(memory_usage), 2));
611 return;
612 }
613
614 // If --no-adjust was used, we cannot drop to single-threaded
615 // mode since it produces different compressed output.
616 //
617 // NOTE: In xz 5.2.x, --no-adjust also prevented reducing
618 // the number of threads. This changed in 5.3.3alpha.
619 if (!opt_auto_adjust)
620 memlimit_too_small(memory_usage);
621
622 // Switch to single-threaded mode. It uses
623 // less memory than using one thread in
624 // the multithreaded mode but the output
625 // is also different.
626 hardware_threads_set(1);
627 memory_usage = get_chains_memusage(encoder_memusages,
628 NULL, true);
629 message(V_WARNING, _("Switching to single-threaded mode "
630 "to not exceed the memory usage limit of %s MiB"),
631 uint64_to_str(round_up_to_mib(memory_limit), 0));
632 }
633 # endif
634
635 if (memory_usage <= memory_limit)
636 return;
637
638 // Don't adjust LZMA2 or LZMA1 dictionary size if --no-adjust
639 // was specified as that would change the compressed output.
640 if (!opt_auto_adjust)
641 memlimit_too_small(memory_usage);
642
643 // Adjust each filter chain that is exceeding the memory usage limit.
644 for (unsigned i = 0; i < ARRAY_SIZE(chains); i++) {
645 // Skip unused chains.
646 if (!(chains_used_mask & (1U << i)))
647 continue;
648
649 // Skip chains that already meet the memory usage limit.
650 if (encoder_memusages[i] <= memory_limit)
651 continue;
652
653 // Look for the last filter if it is LZMA2 or LZMA1, so we
654 // can make it use less RAM. We cannot adjust other filters.
655 unsigned j = 0;
656 while (chains[i][j].id != LZMA_FILTER_LZMA2
657 && chains[i][j].id != LZMA_FILTER_LZMA1) {
658 // NOTE: This displays the too high limit of this
659 // particular filter chain. If multiple chains are
660 // specified and another one would need more then
661 // this message could be confusing. As long as LZMA2
662 // is the only memory hungry filter in .xz this
663 // doesn't matter at all in practice.
664 //
665 // FIXME? However, it's sort of odd still if we had
666 // switched from multithreaded mode to single-threaded
667 // mode because single-threaded produces different
668 // output. So the messages could perhaps be clearer.
669 // Another case of this is a few lines below.
670 if (chains[i][j].id == LZMA_VLI_UNKNOWN)
671 memlimit_too_small(encoder_memusages[i]);
672
673 ++j;
674 }
675
676 // Decrease the dictionary size until we meet the memory
677 // usage limit. First round down to full mebibytes.
678 lzma_options_lzma *opt = chains[i][j].options;
679 const uint32_t orig_dict_size = opt->dict_size;
680 opt->dict_size &= ~((UINT32_C(1) << 20) - 1);
681
682 while (true) {
683 // If it is below 1 MiB, auto-adjusting failed.
684 //
685 // FIXME? See the FIXME a few lines above.
686 if (opt->dict_size < (UINT32_C(1) << 20))
687 memlimit_too_small(encoder_memusages[i]);
688
689 encoder_memusages[i]
690 = lzma_raw_encoder_memusage(chains[i]);
691 if (encoder_memusages[i] == UINT64_MAX)
692 message_bug();
693
694 // Accept it if it is low enough.
695 if (encoder_memusages[i] <= memory_limit)
696 break;
697
698 // Otherwise adjust it 1 MiB down and try again.
699 opt->dict_size -= UINT32_C(1) << 20;
700 }
701
702 // Tell the user that we decreased the dictionary size.
703 // The message is slightly different between the default
704 // filter chain (0) or and chains from --filtersX.
705 const char lzma_num = chains[i][j].id == LZMA_FILTER_LZMA2
706 ? '2' : '1';
707 const char *from_size = uint64_to_str(orig_dict_size >> 20, 0);
708 const char *to_size = uint64_to_str(opt->dict_size >> 20, 1);
709 const char *limit_size = uint64_to_str(round_up_to_mib(
710 memory_limit), 2);
711 if (i == 0)
712 message(V_WARNING, _("Adjusted LZMA%c dictionary "
713 "size from %s MiB to %s MiB to not exceed the "
714 "memory usage limit of %s MiB"),
715 lzma_num, from_size, to_size, limit_size);
716 else
717 message(V_WARNING, _("Adjusted LZMA%c dictionary size "
718 "for --filters%u from %s MiB to %s MiB to not "
719 "exceed the memory usage limit of %s MiB"),
720 lzma_num, i, from_size, to_size, limit_size);
721 }
722 #endif
723
724 return;
725 }
726
727
728 #ifdef HAVE_DECODERS
729 /// Return true if the data in in_buf seems to be in the .xz format.
730 static bool
is_format_xz(void)731 is_format_xz(void)
732 {
733 // Specify the magic as hex to be compatible with EBCDIC systems.
734 static const uint8_t magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 };
735 return strm.avail_in >= sizeof(magic)
736 && memcmp(in_buf.u8, magic, sizeof(magic)) == 0;
737 }
738
739
740 /// Return true if the data in in_buf seems to be in the .lzma format.
741 static bool
is_format_lzma(void)742 is_format_lzma(void)
743 {
744 // The .lzma header is 13 bytes.
745 if (strm.avail_in < 13)
746 return false;
747
748 // Decode the LZMA1 properties.
749 lzma_filter filter = { .id = LZMA_FILTER_LZMA1 };
750 if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK)
751 return false;
752
753 // A hack to ditch tons of false positives: We allow only dictionary
754 // sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone
755 // created only files with 2^n, but accepts any dictionary size.
756 // If someone complains, this will be reconsidered.
757 lzma_options_lzma *opt = filter.options;
758 const uint32_t dict_size = opt->dict_size;
759 free(opt);
760
761 if (dict_size != UINT32_MAX) {
762 uint32_t d = dict_size - 1;
763 d |= d >> 2;
764 d |= d >> 3;
765 d |= d >> 4;
766 d |= d >> 8;
767 d |= d >> 16;
768 ++d;
769 if (d != dict_size || dict_size == 0)
770 return false;
771 }
772
773 // Another hack to ditch false positives: Assume that if the
774 // uncompressed size is known, it must be less than 256 GiB.
775 // Again, if someone complains, this will be reconsidered.
776 uint64_t uncompressed_size = 0;
777 for (size_t i = 0; i < 8; ++i)
778 uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8);
779
780 if (uncompressed_size != UINT64_MAX
781 && uncompressed_size > (UINT64_C(1) << 38))
782 return false;
783
784 return true;
785 }
786
787
788 #ifdef HAVE_LZIP_DECODER
789 /// Return true if the data in in_buf seems to be in the .lz format.
790 static bool
is_format_lzip(void)791 is_format_lzip(void)
792 {
793 static const uint8_t magic[4] = { 0x4C, 0x5A, 0x49, 0x50 };
794 return strm.avail_in >= sizeof(magic)
795 && memcmp(in_buf.u8, magic, sizeof(magic)) == 0;
796 }
797 #endif
798 #endif
799
800
801 /// Detect the input file type (for now, this done only when decompressing),
802 /// and initialize an appropriate coder. Return value indicates if a normal
803 /// liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru
804 /// mode should be used (CODER_INIT_PASSTHRU), or if an error occurred
805 /// (CODER_INIT_ERROR).
806 static enum coder_init_ret
coder_init(file_pair * pair)807 coder_init(file_pair *pair)
808 {
809 lzma_ret ret = LZMA_PROG_ERROR;
810
811 // In most cases if there is input left when coding finishes,
812 // something has gone wrong. Exceptions are --single-stream
813 // and decoding .lz files which can contain trailing non-.lz data.
814 // These will be handled later in this function.
815 allow_trailing_input = false;
816
817 // Set the first filter chain. If the --block-list option is not
818 // used then use the default filter chain (chains[0]).
819 // Otherwise, use first filter chain from the block list.
820 lzma_filter *active_filters = opt_block_list == NULL
821 ? chains[0]
822 : chains[opt_block_list[0].chain_num];
823
824 if (opt_mode == MODE_COMPRESS) {
825 #ifdef HAVE_ENCODERS
826 switch (opt_format) {
827 case FORMAT_AUTO:
828 // args.c ensures this.
829 assert(0);
830 break;
831
832 case FORMAT_XZ:
833 # ifdef MYTHREAD_ENABLED
834 mt_options.filters = active_filters;
835 if (hardware_threads_is_mt())
836 ret = lzma_stream_encoder_mt(
837 &strm, &mt_options);
838 else
839 # endif
840 ret = lzma_stream_encoder(
841 &strm, active_filters, check);
842 break;
843
844 case FORMAT_LZMA:
845 ret = lzma_alone_encoder(&strm,
846 active_filters[0].options);
847 break;
848
849 # ifdef HAVE_LZIP_DECODER
850 case FORMAT_LZIP:
851 // args.c should disallow this.
852 assert(0);
853 ret = LZMA_PROG_ERROR;
854 break;
855 # endif
856
857 case FORMAT_RAW:
858 ret = lzma_raw_encoder(&strm, active_filters);
859 break;
860 }
861 #endif
862 } else {
863 #ifdef HAVE_DECODERS
864 uint32_t flags = 0;
865
866 // It seems silly to warn about unsupported check if the
867 // check won't be verified anyway due to --ignore-check.
868 if (opt_ignore_check)
869 flags |= LZMA_IGNORE_CHECK;
870 else
871 flags |= LZMA_TELL_UNSUPPORTED_CHECK;
872
873 if (opt_single_stream)
874 allow_trailing_input = true;
875 else
876 flags |= LZMA_CONCATENATED;
877
878 // We abuse FORMAT_AUTO to indicate unknown file format,
879 // for which we may consider passthru mode.
880 enum format_type init_format = FORMAT_AUTO;
881
882 switch (opt_format) {
883 case FORMAT_AUTO:
884 // .lz is checked before .lzma since .lzma detection
885 // is more complicated (no magic bytes).
886 if (is_format_xz())
887 init_format = FORMAT_XZ;
888 # ifdef HAVE_LZIP_DECODER
889 else if (is_format_lzip())
890 init_format = FORMAT_LZIP;
891 # endif
892 else if (is_format_lzma())
893 init_format = FORMAT_LZMA;
894 break;
895
896 case FORMAT_XZ:
897 if (is_format_xz())
898 init_format = FORMAT_XZ;
899 break;
900
901 case FORMAT_LZMA:
902 if (is_format_lzma())
903 init_format = FORMAT_LZMA;
904 break;
905
906 # ifdef HAVE_LZIP_DECODER
907 case FORMAT_LZIP:
908 if (is_format_lzip())
909 init_format = FORMAT_LZIP;
910 break;
911 # endif
912
913 case FORMAT_RAW:
914 init_format = FORMAT_RAW;
915 break;
916 }
917
918 switch (init_format) {
919 case FORMAT_AUTO:
920 // Unknown file format. If --decompress --stdout
921 // --force have been given, then we copy the input
922 // as is to stdout. Checking for MODE_DECOMPRESS
923 // is needed, because we don't want to do use
924 // passthru mode with --test.
925 if (opt_mode == MODE_DECOMPRESS
926 && opt_stdout && opt_force) {
927 // These are needed for progress info.
928 strm.total_in = 0;
929 strm.total_out = 0;
930 return CODER_INIT_PASSTHRU;
931 }
932
933 ret = LZMA_FORMAT_ERROR;
934 break;
935
936 case FORMAT_XZ:
937 # ifdef MYTHREAD_ENABLED
938 mt_options.flags = flags;
939
940 mt_options.threads = hardware_threads_get();
941 mt_options.memlimit_stop
942 = hardware_memlimit_get(MODE_DECOMPRESS);
943
944 // If single-threaded mode was requested, set the
945 // memlimit for threading to zero. This forces the
946 // decoder to use single-threaded mode which matches
947 // the behavior of lzma_stream_decoder().
948 //
949 // Otherwise use the limit for threaded decompression
950 // which has a sane default (users are still free to
951 // make it insanely high though).
952 mt_options.memlimit_threading
953 = mt_options.threads == 1
954 ? 0 : hardware_memlimit_mtdec_get();
955
956 ret = lzma_stream_decoder_mt(&strm, &mt_options);
957 # else
958 ret = lzma_stream_decoder(&strm,
959 hardware_memlimit_get(
960 MODE_DECOMPRESS), flags);
961 # endif
962 break;
963
964 case FORMAT_LZMA:
965 ret = lzma_alone_decoder(&strm,
966 hardware_memlimit_get(
967 MODE_DECOMPRESS));
968 break;
969
970 # ifdef HAVE_LZIP_DECODER
971 case FORMAT_LZIP:
972 allow_trailing_input = true;
973 ret = lzma_lzip_decoder(&strm,
974 hardware_memlimit_get(
975 MODE_DECOMPRESS), flags);
976 break;
977 # endif
978
979 case FORMAT_RAW:
980 // Memory usage has already been checked in
981 // coder_set_compression_settings().
982 ret = lzma_raw_decoder(&strm, active_filters);
983 break;
984 }
985
986 // Try to decode the headers. This will catch too low
987 // memory usage limit in case it happens in the first
988 // Block of the first Stream, which is where it very
989 // probably will happen if it is going to happen.
990 //
991 // This will also catch unsupported check type which
992 // we treat as a warning only. If there are empty
993 // concatenated Streams with unsupported check type then
994 // the message can be shown more than once here. The loop
995 // is used in case there is first a warning about
996 // unsupported check type and then the first Block
997 // would exceed the memlimit.
998 if (ret == LZMA_OK && init_format != FORMAT_RAW) {
999 strm.next_out = NULL;
1000 strm.avail_out = 0;
1001 while ((ret = lzma_code(&strm, LZMA_RUN))
1002 == LZMA_UNSUPPORTED_CHECK)
1003 message_warning(_("%s: %s"),
1004 tuklib_mask_nonprint(pair->src_name),
1005 message_strm(ret));
1006
1007 // With --single-stream lzma_code won't wait for
1008 // LZMA_FINISH and thus it can return LZMA_STREAM_END
1009 // if the file has no uncompressed data inside.
1010 // So treat LZMA_STREAM_END as LZMA_OK here.
1011 // When lzma_code() is called again in coder_normal()
1012 // it will return LZMA_STREAM_END again.
1013 if (ret == LZMA_STREAM_END)
1014 ret = LZMA_OK;
1015 }
1016 #endif
1017 }
1018
1019 if (ret != LZMA_OK) {
1020 message_error(_("%s: %s"),
1021 tuklib_mask_nonprint(pair->src_name),
1022 message_strm(ret));
1023 if (ret == LZMA_MEMLIMIT_ERROR)
1024 message_mem_needed(V_ERROR, lzma_memusage(&strm));
1025
1026 return CODER_INIT_ERROR;
1027 }
1028
1029 return CODER_INIT_NORMAL;
1030 }
1031
1032
1033 #ifdef HAVE_ENCODERS
1034 /// Resolve conflicts between opt_block_size and opt_block_list in single
1035 /// threaded mode. We want to default to opt_block_list, except when it is
1036 /// larger than opt_block_size. If this is the case for the current Block
1037 /// at *list_pos, then we break into smaller Blocks. Otherwise advance
1038 /// to the next Block in opt_block_list, and break apart if needed.
1039 static void
split_block(uint64_t * block_remaining,uint64_t * next_block_remaining,size_t * list_pos)1040 split_block(uint64_t *block_remaining,
1041 uint64_t *next_block_remaining,
1042 size_t *list_pos)
1043 {
1044 if (*next_block_remaining > 0) {
1045 // The Block at *list_pos has previously been split up.
1046 assert(!hardware_threads_is_mt());
1047 assert(opt_block_size > 0);
1048 assert(opt_block_list != NULL);
1049
1050 if (*next_block_remaining > opt_block_size) {
1051 // We have to split the current Block at *list_pos
1052 // into another opt_block_size length Block.
1053 *block_remaining = opt_block_size;
1054 } else {
1055 // This is the last remaining split Block for the
1056 // Block at *list_pos.
1057 *block_remaining = *next_block_remaining;
1058 }
1059
1060 *next_block_remaining -= *block_remaining;
1061
1062 } else {
1063 // The Block at *list_pos has been finished. Go to the next
1064 // entry in the list. If the end of the list has been
1065 // reached, reuse the size and filters of the last Block.
1066 if (opt_block_list[*list_pos + 1].size != 0) {
1067 ++*list_pos;
1068
1069 // Update the filters if needed.
1070 if (opt_block_list[*list_pos - 1].chain_num
1071 != opt_block_list[*list_pos].chain_num) {
1072 const unsigned chain_num
1073 = opt_block_list[*list_pos].chain_num;
1074 const lzma_filter *next = chains[chain_num];
1075 const lzma_ret ret = lzma_filters_update(
1076 &strm, next);
1077
1078 if (ret != LZMA_OK) {
1079 // This message is only possible if
1080 // the filter chain has unsupported
1081 // options since the filter chain is
1082 // validated using
1083 // lzma_raw_encoder_memusage() or
1084 // lzma_stream_encoder_mt_memusage().
1085 // Some options are not validated until
1086 // the encoders are initialized.
1087 message_fatal(
1088 _("Error changing to "
1089 "filter chain %u: %s"),
1090 chain_num,
1091 message_strm(ret));
1092 }
1093 }
1094 }
1095
1096 *block_remaining = opt_block_list[*list_pos].size;
1097
1098 // If in single-threaded mode, split up the Block if needed.
1099 // This is not needed in multi-threaded mode because liblzma
1100 // will do this due to how threaded encoding works.
1101 if (!hardware_threads_is_mt() && opt_block_size > 0
1102 && *block_remaining > opt_block_size) {
1103 *next_block_remaining
1104 = *block_remaining - opt_block_size;
1105 *block_remaining = opt_block_size;
1106 }
1107 }
1108 }
1109 #endif
1110
1111
1112 static bool
coder_write_output(file_pair * pair)1113 coder_write_output(file_pair *pair)
1114 {
1115 if (opt_mode != MODE_TEST) {
1116 if (io_write(pair, &out_buf, IO_BUFFER_SIZE - strm.avail_out))
1117 return true;
1118 }
1119
1120 strm.next_out = out_buf.u8;
1121 strm.avail_out = IO_BUFFER_SIZE;
1122 return false;
1123 }
1124
1125
1126 /// Compress or decompress using liblzma.
1127 static bool
coder_normal(file_pair * pair)1128 coder_normal(file_pair *pair)
1129 {
1130 // Encoder needs to know when we have given all the input to it.
1131 // The decoders need to know it too when we are using
1132 // LZMA_CONCATENATED. We need to check for src_eof here, because
1133 // the first input chunk has been already read if decompressing,
1134 // and that may have been the only chunk we will read.
1135 lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN;
1136
1137 lzma_ret ret;
1138
1139 // Assume that something goes wrong.
1140 bool success = false;
1141
1142 #ifdef HAVE_ENCODERS
1143 // block_remaining indicates how many input bytes to encode before
1144 // finishing the current .xz Block. The Block size is set with
1145 // --block-size=SIZE and --block-list. They have an effect only when
1146 // compressing to the .xz format. If block_remaining == UINT64_MAX,
1147 // only a single block is created.
1148 uint64_t block_remaining = UINT64_MAX;
1149
1150 // next_block_remaining for when we are in single-threaded mode and
1151 // the Block in --block-list is larger than the --block-size=SIZE.
1152 uint64_t next_block_remaining = 0;
1153
1154 // Position in opt_block_list. Unused if --block-list wasn't used.
1155 size_t list_pos = 0;
1156
1157 // Handle --block-size for single-threaded mode and the first step
1158 // of --block-list.
1159 if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) {
1160 // --block-size doesn't do anything here in threaded mode,
1161 // because the threaded encoder will take care of splitting
1162 // to fixed-sized Blocks.
1163 if (!hardware_threads_is_mt() && opt_block_size > 0)
1164 block_remaining = opt_block_size;
1165
1166 // If --block-list was used, start with the first size.
1167 //
1168 // For threaded case, --block-size specifies how big Blocks
1169 // the encoder needs to be prepared to create at maximum
1170 // and --block-list will simultaneously cause new Blocks
1171 // to be started at specified intervals. To keep things
1172 // logical, the same is done in single-threaded mode. The
1173 // output is still not identical because in single-threaded
1174 // mode the size info isn't written into Block Headers.
1175 if (opt_block_list != NULL) {
1176 if (block_remaining < opt_block_list[list_pos].size) {
1177 assert(!hardware_threads_is_mt());
1178 next_block_remaining =
1179 opt_block_list[list_pos].size
1180 - block_remaining;
1181 } else {
1182 block_remaining =
1183 opt_block_list[list_pos].size;
1184 }
1185 }
1186 }
1187 #endif
1188
1189 strm.next_out = out_buf.u8;
1190 strm.avail_out = IO_BUFFER_SIZE;
1191
1192 while (!user_abort) {
1193 // Fill the input buffer if it is empty and we aren't
1194 // flushing or finishing.
1195 if (strm.avail_in == 0 && action == LZMA_RUN) {
1196 strm.next_in = in_buf.u8;
1197 #ifdef HAVE_ENCODERS
1198 const size_t read_size = my_min(block_remaining,
1199 IO_BUFFER_SIZE);
1200 #else
1201 const size_t read_size = IO_BUFFER_SIZE;
1202 #endif
1203 strm.avail_in = io_read(pair, &in_buf, read_size);
1204
1205 if (strm.avail_in == SIZE_MAX)
1206 break;
1207
1208 if (pair->src_eof) {
1209 action = LZMA_FINISH;
1210 }
1211 #ifdef HAVE_ENCODERS
1212 else if (block_remaining != UINT64_MAX) {
1213 // Start a new Block after every
1214 // opt_block_size bytes of input.
1215 block_remaining -= strm.avail_in;
1216 if (block_remaining == 0)
1217 action = LZMA_FULL_BARRIER;
1218 }
1219
1220 if (action == LZMA_RUN && pair->flush_needed)
1221 action = LZMA_SYNC_FLUSH;
1222 #endif
1223 }
1224
1225 // Let liblzma do the actual work.
1226 ret = lzma_code(&strm, action);
1227
1228 // Write out if the output buffer became full.
1229 if (strm.avail_out == 0) {
1230 if (coder_write_output(pair))
1231 break;
1232 }
1233
1234 #ifdef HAVE_ENCODERS
1235 if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH
1236 || action == LZMA_FULL_BARRIER)) {
1237 if (action == LZMA_SYNC_FLUSH) {
1238 // Flushing completed. Write the pending data
1239 // out immediately so that the reading side
1240 // can decompress everything compressed so far.
1241 if (coder_write_output(pair))
1242 break;
1243
1244 // Mark that we haven't seen any new input
1245 // since the previous flush.
1246 pair->src_has_seen_input = false;
1247 pair->flush_needed = false;
1248 } else {
1249 // Start a new Block after LZMA_FULL_BARRIER.
1250 if (opt_block_list == NULL) {
1251 assert(!hardware_threads_is_mt());
1252 assert(opt_block_size > 0);
1253 block_remaining = opt_block_size;
1254 } else {
1255 split_block(&block_remaining,
1256 &next_block_remaining,
1257 &list_pos);
1258 }
1259 }
1260
1261 // Start a new Block after LZMA_FULL_FLUSH or continue
1262 // the same block after LZMA_SYNC_FLUSH.
1263 action = LZMA_RUN;
1264 } else
1265 #endif
1266 if (ret != LZMA_OK) {
1267 // Determine if the return value indicates that we
1268 // won't continue coding. LZMA_NO_CHECK would be
1269 // here too if LZMA_TELL_ANY_CHECK was used.
1270 const bool stop = ret != LZMA_UNSUPPORTED_CHECK;
1271
1272 if (stop) {
1273 // Write the remaining bytes even if something
1274 // went wrong, because that way the user gets
1275 // as much data as possible, which can be good
1276 // when trying to get at least some useful
1277 // data out of damaged files.
1278 if (coder_write_output(pair))
1279 break;
1280 }
1281
1282 if (ret == LZMA_STREAM_END) {
1283 if (allow_trailing_input) {
1284 io_fix_src_pos(pair, strm.avail_in);
1285 success = true;
1286 break;
1287 }
1288
1289 // Check that there is no trailing garbage.
1290 // This is needed for LZMA_Alone and raw
1291 // streams. This is *not* done with .lz files
1292 // as that format specifically requires
1293 // allowing trailing garbage.
1294 if (strm.avail_in == 0 && !pair->src_eof) {
1295 // Try reading one more byte.
1296 // Hopefully we don't get any more
1297 // input, and thus pair->src_eof
1298 // becomes true.
1299 strm.avail_in = io_read(
1300 pair, &in_buf, 1);
1301 if (strm.avail_in == SIZE_MAX)
1302 break;
1303
1304 assert(strm.avail_in == 0
1305 || strm.avail_in == 1);
1306 }
1307
1308 if (strm.avail_in == 0) {
1309 assert(pair->src_eof);
1310 success = true;
1311 break;
1312 }
1313
1314 // We hadn't reached the end of the file.
1315 ret = LZMA_DATA_ERROR;
1316 assert(stop);
1317 }
1318
1319 // If we get here and stop is true, something went
1320 // wrong and we print an error. Otherwise it's just
1321 // a warning and coding can continue.
1322 if (stop) {
1323 message_error(_("%s: %s"),
1324 tuklib_mask_nonprint(pair->src_name),
1325 message_strm(ret));
1326 } else {
1327 message_warning(_("%s: %s"),
1328 tuklib_mask_nonprint(pair->src_name),
1329 message_strm(ret));
1330
1331 // When compressing, all possible errors set
1332 // stop to true.
1333 assert(opt_mode != MODE_COMPRESS);
1334 }
1335
1336 if (ret == LZMA_MEMLIMIT_ERROR) {
1337 // Display how much memory it would have
1338 // actually needed.
1339 message_mem_needed(V_ERROR,
1340 lzma_memusage(&strm));
1341 }
1342
1343 if (stop)
1344 break;
1345 }
1346
1347 // Show progress information under certain conditions.
1348 message_progress_update();
1349 }
1350
1351 return success;
1352 }
1353
1354
1355 /// Copy from input file to output file without processing the data in any
1356 /// way. This is used only when trying to decompress unrecognized files
1357 /// with --decompress --stdout --force, so the output is always stdout.
1358 static bool
coder_passthru(file_pair * pair)1359 coder_passthru(file_pair *pair)
1360 {
1361 while (strm.avail_in != 0) {
1362 if (user_abort)
1363 return false;
1364
1365 if (io_write(pair, &in_buf, strm.avail_in))
1366 return false;
1367
1368 strm.total_in += strm.avail_in;
1369 strm.total_out = strm.total_in;
1370 message_progress_update();
1371
1372 strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
1373 if (strm.avail_in == SIZE_MAX)
1374 return false;
1375 }
1376
1377 return true;
1378 }
1379
1380
1381 extern void
coder_run(const char * filename)1382 coder_run(const char *filename)
1383 {
1384 // Set and possibly print the filename for the progress message.
1385 message_filename(filename);
1386
1387 // Try to open the input file.
1388 file_pair *pair = io_open_src(filename);
1389 if (pair == NULL)
1390 return;
1391
1392 // Assume that something goes wrong.
1393 bool success = false;
1394
1395 if (opt_mode == MODE_COMPRESS) {
1396 strm.next_in = NULL;
1397 strm.avail_in = 0;
1398 } else {
1399 // Read the first chunk of input data. This is needed
1400 // to detect the input file type.
1401 strm.next_in = in_buf.u8;
1402 strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
1403 }
1404
1405 if (strm.avail_in != SIZE_MAX) {
1406 // Initialize the coder. This will detect the file format
1407 // and, in decompression or testing mode, check the memory
1408 // usage of the first Block too. This way we don't try to
1409 // open the destination file if we see that coding wouldn't
1410 // work at all anyway. This also avoids deleting the old
1411 // "target" file if --force was used.
1412 const enum coder_init_ret init_ret = coder_init(pair);
1413
1414 if (init_ret != CODER_INIT_ERROR && !user_abort) {
1415 // Don't open the destination file when --test
1416 // is used.
1417 if (opt_mode == MODE_TEST || !io_open_dest(pair)) {
1418 // Remember the current time. It is needed
1419 // for progress indicator.
1420 mytime_set_start_time();
1421
1422 // Initialize the progress indicator.
1423 //
1424 // NOTE: When reading from stdin, fstat()
1425 // isn't called on it and thus src_st.st_size
1426 // is zero. If stdin pointed to a regular
1427 // file, it would still be possible to know
1428 // the file size but then we would also need
1429 // to take into account the current reading
1430 // position since with stdin it isn't
1431 // necessarily at the beginning of the file.
1432 const bool is_passthru = init_ret
1433 == CODER_INIT_PASSTHRU;
1434 const uint64_t in_size
1435 = pair->src_st.st_size <= 0
1436 ? 0 : (uint64_t)(pair->src_st.st_size);
1437 message_progress_start(&strm,
1438 is_passthru, in_size);
1439
1440 // Do the actual coding or passthru.
1441 if (is_passthru)
1442 success = coder_passthru(pair);
1443 else
1444 success = coder_normal(pair);
1445
1446 message_progress_end(success);
1447 }
1448 }
1449 }
1450
1451 // Close the file pair. It needs to know if coding was successful to
1452 // know if the source or target file should be unlinked.
1453 io_close(pair, success);
1454
1455 return;
1456 }
1457
1458
1459 #ifndef NDEBUG
1460 extern void
coder_free(void)1461 coder_free(void)
1462 {
1463 // Free starting from the second filter chain since the default
1464 // filter chain may have its options set from a static variable
1465 // in coder_set_compression_settings(). Since this is only run in
1466 // debug mode and will be freed when the process ends anyway, we
1467 // don't worry about freeing it.
1468 for (uint32_t i = 1; i < ARRAY_SIZE(chains); i++) {
1469 if (chains_used_mask & (1U << i))
1470 lzma_filters_free(chains[i], NULL);
1471 }
1472
1473 lzma_end(&strm);
1474 return;
1475 }
1476 #endif
1477