1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file lzma_encoder.c 4 /// \brief LZMA encoder 5 /// 6 // Authors: Igor Pavlov 7 // Lasse Collin 8 // 9 // This file has been put into the public domain. 10 // You can do whatever you want with this file. 11 // 12 /////////////////////////////////////////////////////////////////////////////// 13 14 #include "lzma2_encoder.h" 15 #include "lzma_encoder_private.h" 16 #include "fastpos.h" 17 18 19 ///////////// 20 // Literal // 21 ///////////// 22 23 static inline void 24 literal_matched(lzma_range_encoder *rc, probability *subcoder, 25 uint32_t match_byte, uint32_t symbol) 26 { 27 uint32_t offset = 0x100; 28 symbol += UINT32_C(1) << 8; 29 30 do { 31 match_byte <<= 1; 32 const uint32_t match_bit = match_byte & offset; 33 const uint32_t subcoder_index 34 = offset + match_bit + (symbol >> 8); 35 const uint32_t bit = (symbol >> 7) & 1; 36 rc_bit(rc, &subcoder[subcoder_index], bit); 37 38 symbol <<= 1; 39 offset &= ~(match_byte ^ symbol); 40 41 } while (symbol < (UINT32_C(1) << 16)); 42 } 43 44 45 static inline void 46 literal(lzma_lzma1_encoder *coder, lzma_mf *mf, uint32_t position) 47 { 48 // Locate the literal byte to be encoded and the subcoder. 49 const uint8_t cur_byte = mf->buffer[ 50 mf->read_pos - mf->read_ahead]; 51 probability *subcoder = literal_subcoder(coder->literal, 52 coder->literal_context_bits, coder->literal_pos_mask, 53 position, mf->buffer[mf->read_pos - mf->read_ahead - 1]); 54 55 if (is_literal_state(coder->state)) { 56 // Previous LZMA-symbol was a literal. Encode a normal 57 // literal without a match byte. 58 rc_bittree(&coder->rc, subcoder, 8, cur_byte); 59 } else { 60 // Previous LZMA-symbol was a match. Use the last byte of 61 // the match as a "match byte". That is, compare the bits 62 // of the current literal and the match byte. 63 const uint8_t match_byte = mf->buffer[ 64 mf->read_pos - coder->reps[0] - 1 65 - mf->read_ahead]; 66 literal_matched(&coder->rc, subcoder, match_byte, cur_byte); 67 } 68 69 update_literal(coder->state); 70 } 71 72 73 ////////////////// 74 // Match length // 75 ////////////////// 76 77 static void 78 length_update_prices(lzma_length_encoder *lc, const uint32_t pos_state) 79 { 80 const uint32_t table_size = lc->table_size; 81 lc->counters[pos_state] = table_size; 82 83 const uint32_t a0 = rc_bit_0_price(lc->choice); 84 const uint32_t a1 = rc_bit_1_price(lc->choice); 85 const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2); 86 const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2); 87 uint32_t *const prices = lc->prices[pos_state]; 88 89 uint32_t i; 90 for (i = 0; i < table_size && i < LEN_LOW_SYMBOLS; ++i) 91 prices[i] = a0 + rc_bittree_price(lc->low[pos_state], 92 LEN_LOW_BITS, i); 93 94 for (; i < table_size && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) 95 prices[i] = b0 + rc_bittree_price(lc->mid[pos_state], 96 LEN_MID_BITS, i - LEN_LOW_SYMBOLS); 97 98 for (; i < table_size; ++i) 99 prices[i] = b1 + rc_bittree_price(lc->high, LEN_HIGH_BITS, 100 i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); 101 102 return; 103 } 104 105 106 static inline void 107 length(lzma_range_encoder *rc, lzma_length_encoder *lc, 108 const uint32_t pos_state, uint32_t len, const bool fast_mode) 109 { 110 assert(len <= MATCH_LEN_MAX); 111 len -= MATCH_LEN_MIN; 112 113 if (len < LEN_LOW_SYMBOLS) { 114 rc_bit(rc, &lc->choice, 0); 115 rc_bittree(rc, lc->low[pos_state], LEN_LOW_BITS, len); 116 } else { 117 rc_bit(rc, &lc->choice, 1); 118 len -= LEN_LOW_SYMBOLS; 119 120 if (len < LEN_MID_SYMBOLS) { 121 rc_bit(rc, &lc->choice2, 0); 122 rc_bittree(rc, lc->mid[pos_state], LEN_MID_BITS, len); 123 } else { 124 rc_bit(rc, &lc->choice2, 1); 125 len -= LEN_MID_SYMBOLS; 126 rc_bittree(rc, lc->high, LEN_HIGH_BITS, len); 127 } 128 } 129 130 // Only getoptimum uses the prices so don't update the table when 131 // in fast mode. 132 if (!fast_mode) 133 if (--lc->counters[pos_state] == 0) 134 length_update_prices(lc, pos_state); 135 } 136 137 138 /////////// 139 // Match // 140 /////////// 141 142 static inline void 143 match(lzma_lzma1_encoder *coder, const uint32_t pos_state, 144 const uint32_t distance, const uint32_t len) 145 { 146 update_match(coder->state); 147 148 length(&coder->rc, &coder->match_len_encoder, pos_state, len, 149 coder->fast_mode); 150 151 const uint32_t dist_slot = get_dist_slot(distance); 152 const uint32_t dist_state = get_dist_state(len); 153 rc_bittree(&coder->rc, coder->dist_slot[dist_state], 154 DIST_SLOT_BITS, dist_slot); 155 156 if (dist_slot >= DIST_MODEL_START) { 157 const uint32_t footer_bits = (dist_slot >> 1) - 1; 158 const uint32_t base = (2 | (dist_slot & 1)) << footer_bits; 159 const uint32_t dist_reduced = distance - base; 160 161 if (dist_slot < DIST_MODEL_END) { 162 // Careful here: base - dist_slot - 1 can be -1, but 163 // rc_bittree_reverse starts at probs[1], not probs[0]. 164 rc_bittree_reverse(&coder->rc, 165 coder->dist_special + base - dist_slot - 1, 166 footer_bits, dist_reduced); 167 } else { 168 rc_direct(&coder->rc, dist_reduced >> ALIGN_BITS, 169 footer_bits - ALIGN_BITS); 170 rc_bittree_reverse( 171 &coder->rc, coder->dist_align, 172 ALIGN_BITS, dist_reduced & ALIGN_MASK); 173 ++coder->align_price_count; 174 } 175 } 176 177 coder->reps[3] = coder->reps[2]; 178 coder->reps[2] = coder->reps[1]; 179 coder->reps[1] = coder->reps[0]; 180 coder->reps[0] = distance; 181 ++coder->match_price_count; 182 } 183 184 185 //////////////////// 186 // Repeated match // 187 //////////////////// 188 189 static inline void 190 rep_match(lzma_lzma1_encoder *coder, const uint32_t pos_state, 191 const uint32_t rep, const uint32_t len) 192 { 193 if (rep == 0) { 194 rc_bit(&coder->rc, &coder->is_rep0[coder->state], 0); 195 rc_bit(&coder->rc, 196 &coder->is_rep0_long[coder->state][pos_state], 197 len != 1); 198 } else { 199 const uint32_t distance = coder->reps[rep]; 200 rc_bit(&coder->rc, &coder->is_rep0[coder->state], 1); 201 202 if (rep == 1) { 203 rc_bit(&coder->rc, &coder->is_rep1[coder->state], 0); 204 } else { 205 rc_bit(&coder->rc, &coder->is_rep1[coder->state], 1); 206 rc_bit(&coder->rc, &coder->is_rep2[coder->state], 207 rep - 2); 208 209 if (rep == 3) 210 coder->reps[3] = coder->reps[2]; 211 212 coder->reps[2] = coder->reps[1]; 213 } 214 215 coder->reps[1] = coder->reps[0]; 216 coder->reps[0] = distance; 217 } 218 219 if (len == 1) { 220 update_short_rep(coder->state); 221 } else { 222 length(&coder->rc, &coder->rep_len_encoder, pos_state, len, 223 coder->fast_mode); 224 update_long_rep(coder->state); 225 } 226 } 227 228 229 ////////// 230 // Main // 231 ////////// 232 233 static void 234 encode_symbol(lzma_lzma1_encoder *coder, lzma_mf *mf, 235 uint32_t back, uint32_t len, uint32_t position) 236 { 237 const uint32_t pos_state = position & coder->pos_mask; 238 239 if (back == UINT32_MAX) { 240 // Literal i.e. eight-bit byte 241 assert(len == 1); 242 rc_bit(&coder->rc, 243 &coder->is_match[coder->state][pos_state], 0); 244 literal(coder, mf, position); 245 } else { 246 // Some type of match 247 rc_bit(&coder->rc, 248 &coder->is_match[coder->state][pos_state], 1); 249 250 if (back < REPS) { 251 // It's a repeated match i.e. the same distance 252 // has been used earlier. 253 rc_bit(&coder->rc, &coder->is_rep[coder->state], 1); 254 rep_match(coder, pos_state, back, len); 255 } else { 256 // Normal match 257 rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); 258 match(coder, pos_state, back - REPS, len); 259 } 260 } 261 262 assert(mf->read_ahead >= len); 263 mf->read_ahead -= len; 264 } 265 266 267 static bool 268 encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf) 269 { 270 assert(mf_position(mf) == 0); 271 assert(coder->uncomp_size == 0); 272 273 if (mf->read_pos == mf->read_limit) { 274 if (mf->action == LZMA_RUN) 275 return false; // We cannot do anything. 276 277 // We are finishing (we cannot get here when flushing). 278 assert(mf->write_pos == mf->read_pos); 279 assert(mf->action == LZMA_FINISH); 280 } else { 281 // Do the actual initialization. The first LZMA symbol must 282 // always be a literal. 283 mf_skip(mf, 1); 284 mf->read_ahead = 0; 285 rc_bit(&coder->rc, &coder->is_match[0][0], 0); 286 rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]); 287 ++coder->uncomp_size; 288 } 289 290 // Initialization is done (except if empty file). 291 coder->is_initialized = true; 292 293 return true; 294 } 295 296 297 static void 298 encode_eopm(lzma_lzma1_encoder *coder, uint32_t position) 299 { 300 const uint32_t pos_state = position & coder->pos_mask; 301 rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); 302 rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); 303 match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN); 304 } 305 306 307 /// Number of bytes that a single encoding loop in lzma_lzma_encode() can 308 /// consume from the dictionary. This limit comes from lzma_lzma_optimum() 309 /// and may need to be updated if that function is significantly modified. 310 #define LOOP_INPUT_MAX (OPTS + 1) 311 312 313 extern lzma_ret 314 lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf, 315 uint8_t *restrict out, size_t *restrict out_pos, 316 size_t out_size, uint32_t limit) 317 { 318 // Initialize the stream if no data has been encoded yet. 319 if (!coder->is_initialized && !encode_init(coder, mf)) 320 return LZMA_OK; 321 322 // Encode pending output bytes from the range encoder. 323 // At the start of the stream, encode_init() encodes one literal. 324 // Later there can be pending output only with LZMA1 because LZMA2 325 // ensures that there is always enough output space. Thus when using 326 // LZMA2, rc_encode() calls in this function will always return false. 327 if (rc_encode(&coder->rc, out, out_pos, out_size)) { 328 // We don't get here with LZMA2. 329 assert(limit == UINT32_MAX); 330 return LZMA_OK; 331 } 332 333 // If the range encoder was flushed in an earlier call to this 334 // function but there wasn't enough output buffer space, those 335 // bytes would have now been encoded by the above rc_encode() call 336 // and the stream has now been finished. This can only happen with 337 // LZMA1 as LZMA2 always provides enough output buffer space. 338 if (coder->is_flushed) { 339 assert(limit == UINT32_MAX); 340 return LZMA_STREAM_END; 341 } 342 343 while (true) { 344 // With LZMA2 we need to take care that compressed size of 345 // a chunk doesn't get too big. 346 // FIXME? Check if this could be improved. 347 if (limit != UINT32_MAX 348 && (mf->read_pos - mf->read_ahead >= limit 349 || *out_pos + rc_pending(&coder->rc) 350 >= LZMA2_CHUNK_MAX 351 - LOOP_INPUT_MAX)) 352 break; 353 354 // Check that there is some input to process. 355 if (mf->read_pos >= mf->read_limit) { 356 if (mf->action == LZMA_RUN) 357 return LZMA_OK; 358 359 if (mf->read_ahead == 0) 360 break; 361 } 362 363 // Get optimal match (repeat position and length). 364 // Value ranges for pos: 365 // - [0, REPS): repeated match 366 // - [REPS, UINT32_MAX): 367 // match at (pos - REPS) 368 // - UINT32_MAX: not a match but a literal 369 // Value ranges for len: 370 // - [MATCH_LEN_MIN, MATCH_LEN_MAX] 371 uint32_t len; 372 uint32_t back; 373 374 if (coder->fast_mode) 375 lzma_lzma_optimum_fast(coder, mf, &back, &len); 376 else 377 lzma_lzma_optimum_normal(coder, mf, &back, &len, 378 (uint32_t)(coder->uncomp_size)); 379 380 encode_symbol(coder, mf, back, len, 381 (uint32_t)(coder->uncomp_size)); 382 383 // If output size limiting is active (out_limit != 0), check 384 // if encoding this LZMA symbol would make the output size 385 // exceed the specified limit. 386 if (coder->out_limit != 0 && rc_encode_dummy( 387 &coder->rc, coder->out_limit)) { 388 // The most recent LZMA symbol would make the output 389 // too big. Throw it away. 390 rc_forget(&coder->rc); 391 392 // FIXME: Tell the LZ layer to not read more input as 393 // it would be waste of time. This doesn't matter if 394 // output-size-limited encoding is done with a single 395 // call though. 396 397 break; 398 } 399 400 // This symbol will be encoded so update the uncompressed size. 401 coder->uncomp_size += len; 402 403 // Encode the LZMA symbol. 404 if (rc_encode(&coder->rc, out, out_pos, out_size)) { 405 // Once again, this can only happen with LZMA1. 406 assert(limit == UINT32_MAX); 407 return LZMA_OK; 408 } 409 } 410 411 // Make the uncompressed size available to the application. 412 if (coder->uncomp_size_ptr != NULL) 413 *coder->uncomp_size_ptr = coder->uncomp_size; 414 415 // LZMA2 doesn't use EOPM at LZMA level. 416 // 417 // Plain LZMA streams without EOPM aren't supported except when 418 // output size limiting is enabled. 419 if (coder->use_eopm) 420 encode_eopm(coder, (uint32_t)(coder->uncomp_size)); 421 422 // Flush the remaining bytes from the range encoder. 423 rc_flush(&coder->rc); 424 425 // Copy the remaining bytes to the output buffer. If there 426 // isn't enough output space, we will copy out the remaining 427 // bytes on the next call to this function. 428 if (rc_encode(&coder->rc, out, out_pos, out_size)) { 429 // This cannot happen with LZMA2. 430 assert(limit == UINT32_MAX); 431 432 coder->is_flushed = true; 433 return LZMA_OK; 434 } 435 436 return LZMA_STREAM_END; 437 } 438 439 440 static lzma_ret 441 lzma_encode(void *coder, lzma_mf *restrict mf, 442 uint8_t *restrict out, size_t *restrict out_pos, 443 size_t out_size) 444 { 445 // Plain LZMA has no support for sync-flushing. 446 if (unlikely(mf->action == LZMA_SYNC_FLUSH)) 447 return LZMA_OPTIONS_ERROR; 448 449 return lzma_lzma_encode(coder, mf, out, out_pos, out_size, UINT32_MAX); 450 } 451 452 453 static lzma_ret 454 lzma_lzma_set_out_limit( 455 void *coder_ptr, uint64_t *uncomp_size, uint64_t out_limit) 456 { 457 // Minimum output size is 5 bytes but that cannot hold any output 458 // so we use 6 bytes. 459 if (out_limit < 6) 460 return LZMA_BUF_ERROR; 461 462 lzma_lzma1_encoder *coder = coder_ptr; 463 coder->out_limit = out_limit; 464 coder->uncomp_size_ptr = uncomp_size; 465 coder->use_eopm = false; 466 return LZMA_OK; 467 } 468 469 470 //////////////////// 471 // Initialization // 472 //////////////////// 473 474 static bool 475 is_options_valid(const lzma_options_lzma *options) 476 { 477 // Validate some of the options. LZ encoder validates nice_len too 478 // but we need a valid value here earlier. 479 return is_lclppb_valid(options) 480 && options->nice_len >= MATCH_LEN_MIN 481 && options->nice_len <= MATCH_LEN_MAX 482 && (options->mode == LZMA_MODE_FAST 483 || options->mode == LZMA_MODE_NORMAL); 484 } 485 486 487 static void 488 set_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options) 489 { 490 // LZ encoder initialization does the validation for these so we 491 // don't need to validate here. 492 lz_options->before_size = OPTS; 493 lz_options->dict_size = options->dict_size; 494 lz_options->after_size = LOOP_INPUT_MAX; 495 lz_options->match_len_max = MATCH_LEN_MAX; 496 lz_options->nice_len = my_max(mf_get_hash_bytes(options->mf), 497 options->nice_len); 498 lz_options->match_finder = options->mf; 499 lz_options->depth = options->depth; 500 lz_options->preset_dict = options->preset_dict; 501 lz_options->preset_dict_size = options->preset_dict_size; 502 return; 503 } 504 505 506 static void 507 length_encoder_reset(lzma_length_encoder *lencoder, 508 const uint32_t num_pos_states, const bool fast_mode) 509 { 510 bit_reset(lencoder->choice); 511 bit_reset(lencoder->choice2); 512 513 for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { 514 bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS); 515 bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS); 516 } 517 518 bittree_reset(lencoder->high, LEN_HIGH_BITS); 519 520 if (!fast_mode) 521 for (uint32_t pos_state = 0; pos_state < num_pos_states; 522 ++pos_state) 523 length_update_prices(lencoder, pos_state); 524 525 return; 526 } 527 528 529 extern lzma_ret 530 lzma_lzma_encoder_reset(lzma_lzma1_encoder *coder, 531 const lzma_options_lzma *options) 532 { 533 if (!is_options_valid(options)) 534 return LZMA_OPTIONS_ERROR; 535 536 coder->pos_mask = (1U << options->pb) - 1; 537 coder->literal_context_bits = options->lc; 538 coder->literal_pos_mask = (1U << options->lp) - 1; 539 540 // Range coder 541 rc_reset(&coder->rc); 542 543 // State 544 coder->state = STATE_LIT_LIT; 545 for (size_t i = 0; i < REPS; ++i) 546 coder->reps[i] = 0; 547 548 literal_init(coder->literal, options->lc, options->lp); 549 550 // Bit encoders 551 for (size_t i = 0; i < STATES; ++i) { 552 for (size_t j = 0; j <= coder->pos_mask; ++j) { 553 bit_reset(coder->is_match[i][j]); 554 bit_reset(coder->is_rep0_long[i][j]); 555 } 556 557 bit_reset(coder->is_rep[i]); 558 bit_reset(coder->is_rep0[i]); 559 bit_reset(coder->is_rep1[i]); 560 bit_reset(coder->is_rep2[i]); 561 } 562 563 for (size_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i) 564 bit_reset(coder->dist_special[i]); 565 566 // Bit tree encoders 567 for (size_t i = 0; i < DIST_STATES; ++i) 568 bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS); 569 570 bittree_reset(coder->dist_align, ALIGN_BITS); 571 572 // Length encoders 573 length_encoder_reset(&coder->match_len_encoder, 574 1U << options->pb, coder->fast_mode); 575 576 length_encoder_reset(&coder->rep_len_encoder, 577 1U << options->pb, coder->fast_mode); 578 579 // Price counts are incremented every time appropriate probabilities 580 // are changed. price counts are set to zero when the price tables 581 // are updated, which is done when the appropriate price counts have 582 // big enough value, and lzma_mf.read_ahead == 0 which happens at 583 // least every OPTS (a few thousand) possible price count increments. 584 // 585 // By resetting price counts to UINT32_MAX / 2, we make sure that the 586 // price tables will be initialized before they will be used (since 587 // the value is definitely big enough), and that it is OK to increment 588 // price counts without risk of integer overflow (since UINT32_MAX / 2 589 // is small enough). The current code doesn't increment price counts 590 // before initializing price tables, but it maybe done in future if 591 // we add support for saving the state between LZMA2 chunks. 592 coder->match_price_count = UINT32_MAX / 2; 593 coder->align_price_count = UINT32_MAX / 2; 594 595 coder->opts_end_index = 0; 596 coder->opts_current_index = 0; 597 598 return LZMA_OK; 599 } 600 601 602 extern lzma_ret 603 lzma_lzma_encoder_create(void **coder_ptr, const lzma_allocator *allocator, 604 lzma_vli id, const lzma_options_lzma *options, 605 lzma_lz_options *lz_options) 606 { 607 assert(id == LZMA_FILTER_LZMA1 || id == LZMA_FILTER_LZMA1EXT 608 || id == LZMA_FILTER_LZMA2); 609 610 // Allocate lzma_lzma1_encoder if it wasn't already allocated. 611 if (*coder_ptr == NULL) { 612 *coder_ptr = lzma_alloc(sizeof(lzma_lzma1_encoder), allocator); 613 if (*coder_ptr == NULL) 614 return LZMA_MEM_ERROR; 615 } 616 617 lzma_lzma1_encoder *coder = *coder_ptr; 618 619 // Set compression mode. Note that we haven't validated the options 620 // yet. Invalid options will get rejected by lzma_lzma_encoder_reset() 621 // call at the end of this function. 622 switch (options->mode) { 623 case LZMA_MODE_FAST: 624 coder->fast_mode = true; 625 break; 626 627 case LZMA_MODE_NORMAL: { 628 coder->fast_mode = false; 629 630 // Set dist_table_size. 631 // Round the dictionary size up to next 2^n. 632 // 633 // Currently the maximum encoder dictionary size 634 // is 1.5 GiB due to lz_encoder.c and here we need 635 // to be below 2 GiB to make the rounded up value 636 // fit in an uint32_t and avoid an infinite while-loop 637 // (and undefined behavior due to a too large shift). 638 // So do the same check as in LZ encoder, 639 // limiting to 1.5 GiB. 640 if (options->dict_size > (UINT32_C(1) << 30) 641 + (UINT32_C(1) << 29)) 642 return LZMA_OPTIONS_ERROR; 643 644 uint32_t log_size = 0; 645 while ((UINT32_C(1) << log_size) < options->dict_size) 646 ++log_size; 647 648 coder->dist_table_size = log_size * 2; 649 650 // Length encoders' price table size 651 const uint32_t nice_len = my_max( 652 mf_get_hash_bytes(options->mf), 653 options->nice_len); 654 655 coder->match_len_encoder.table_size 656 = nice_len + 1 - MATCH_LEN_MIN; 657 coder->rep_len_encoder.table_size 658 = nice_len + 1 - MATCH_LEN_MIN; 659 break; 660 } 661 662 default: 663 return LZMA_OPTIONS_ERROR; 664 } 665 666 // We don't need to write the first byte as literal if there is 667 // a non-empty preset dictionary. encode_init() wouldn't even work 668 // if there is a non-empty preset dictionary, because encode_init() 669 // assumes that position is zero and previous byte is also zero. 670 coder->is_initialized = options->preset_dict != NULL 671 && options->preset_dict_size > 0; 672 coder->is_flushed = false; 673 coder->uncomp_size = 0; 674 coder->uncomp_size_ptr = NULL; 675 676 // Output size limiting is disabled by default. 677 coder->out_limit = 0; 678 679 // Determine if end marker is wanted: 680 // - It is never used with LZMA2. 681 // - It is always used with LZMA_FILTER_LZMA1 (unless 682 // lzma_lzma_set_out_limit() is called later). 683 // - LZMA_FILTER_LZMA1EXT has a flag for it in the options. 684 coder->use_eopm = (id == LZMA_FILTER_LZMA1); 685 if (id == LZMA_FILTER_LZMA1EXT) { 686 // Check if unsupported flags are present. 687 if (options->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM) 688 return LZMA_OPTIONS_ERROR; 689 690 coder->use_eopm = (options->ext_flags 691 & LZMA_LZMA1EXT_ALLOW_EOPM) != 0; 692 693 // TODO? As long as there are no filters that change the size 694 // of the data, it is enough to look at lzma_stream.total_in 695 // after encoding has been finished to know the uncompressed 696 // size of the LZMA1 stream. But in the future there could be 697 // filters that change the size of the data and then total_in 698 // doesn't work as the LZMA1 stream size might be different 699 // due to another filter in the chain. The problem is simple 700 // to solve: Add another flag to ext_flags and then set 701 // coder->uncomp_size_ptr to the address stored in 702 // lzma_options_lzma.reserved_ptr2 (or _ptr1). 703 } 704 705 set_lz_options(lz_options, options); 706 707 return lzma_lzma_encoder_reset(coder, options); 708 } 709 710 711 static lzma_ret 712 lzma_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator, 713 lzma_vli id, const void *options, lzma_lz_options *lz_options) 714 { 715 lz->code = &lzma_encode; 716 lz->set_out_limit = &lzma_lzma_set_out_limit; 717 return lzma_lzma_encoder_create( 718 &lz->coder, allocator, id, options, lz_options); 719 } 720 721 722 extern lzma_ret 723 lzma_lzma_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 724 const lzma_filter_info *filters) 725 { 726 return lzma_lz_encoder_init( 727 next, allocator, filters, &lzma_encoder_init); 728 } 729 730 731 extern uint64_t 732 lzma_lzma_encoder_memusage(const void *options) 733 { 734 if (!is_options_valid(options)) 735 return UINT64_MAX; 736 737 lzma_lz_options lz_options; 738 set_lz_options(&lz_options, options); 739 740 const uint64_t lz_memusage = lzma_lz_encoder_memusage(&lz_options); 741 if (lz_memusage == UINT64_MAX) 742 return UINT64_MAX; 743 744 return (uint64_t)(sizeof(lzma_lzma1_encoder)) + lz_memusage; 745 } 746 747 748 extern bool 749 lzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte) 750 { 751 if (!is_lclppb_valid(options)) 752 return true; 753 754 *byte = (options->pb * 5 + options->lp) * 9 + options->lc; 755 assert(*byte <= (4 * 5 + 4) * 9 + 8); 756 757 return false; 758 } 759 760 761 #ifdef HAVE_ENCODER_LZMA1 762 extern lzma_ret 763 lzma_lzma_props_encode(const void *options, uint8_t *out) 764 { 765 if (options == NULL) 766 return LZMA_PROG_ERROR; 767 768 const lzma_options_lzma *const opt = options; 769 770 if (lzma_lzma_lclppb_encode(opt, out)) 771 return LZMA_PROG_ERROR; 772 773 write32le(out + 1, opt->dict_size); 774 775 return LZMA_OK; 776 } 777 #endif 778 779 780 extern LZMA_API(lzma_bool) 781 lzma_mode_is_supported(lzma_mode mode) 782 { 783 return mode == LZMA_MODE_FAST || mode == LZMA_MODE_NORMAL; 784 } 785