1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file lzma_encoder.c 4 /// \brief LZMA encoder 5 /// 6 // Authors: Igor Pavlov 7 // Lasse Collin 8 // 9 // This file has been put into the public domain. 10 // You can do whatever you want with this file. 11 // 12 /////////////////////////////////////////////////////////////////////////////// 13 14 #include "lzma2_encoder.h" 15 #include "lzma_encoder_private.h" 16 #include "fastpos.h" 17 18 19 ///////////// 20 // Literal // 21 ///////////// 22 23 static inline void 24 literal_matched(lzma_range_encoder *rc, probability *subcoder, 25 uint32_t match_byte, uint32_t symbol) 26 { 27 uint32_t offset = 0x100; 28 symbol += UINT32_C(1) << 8; 29 30 do { 31 match_byte <<= 1; 32 const uint32_t match_bit = match_byte & offset; 33 const uint32_t subcoder_index 34 = offset + match_bit + (symbol >> 8); 35 const uint32_t bit = (symbol >> 7) & 1; 36 rc_bit(rc, &subcoder[subcoder_index], bit); 37 38 symbol <<= 1; 39 offset &= ~(match_byte ^ symbol); 40 41 } while (symbol < (UINT32_C(1) << 16)); 42 } 43 44 45 static inline void 46 literal(lzma_coder *coder, lzma_mf *mf, uint32_t position) 47 { 48 // Locate the literal byte to be encoded and the subcoder. 49 const uint8_t cur_byte = mf->buffer[ 50 mf->read_pos - mf->read_ahead]; 51 probability *subcoder = literal_subcoder(coder->literal, 52 coder->literal_context_bits, coder->literal_pos_mask, 53 position, mf->buffer[mf->read_pos - mf->read_ahead - 1]); 54 55 if (is_literal_state(coder->state)) { 56 // Previous LZMA-symbol was a literal. Encode a normal 57 // literal without a match byte. 58 rc_bittree(&coder->rc, subcoder, 8, cur_byte); 59 } else { 60 // Previous LZMA-symbol was a match. Use the last byte of 61 // the match as a "match byte". That is, compare the bits 62 // of the current literal and the match byte. 63 const uint8_t match_byte = mf->buffer[ 64 mf->read_pos - coder->reps[0] - 1 65 - mf->read_ahead]; 66 literal_matched(&coder->rc, subcoder, match_byte, cur_byte); 67 } 68 69 update_literal(coder->state); 70 } 71 72 73 ////////////////// 74 // Match length // 75 ////////////////// 76 77 static void 78 length_update_prices(lzma_length_encoder *lc, const uint32_t pos_state) 79 { 80 const uint32_t table_size = lc->table_size; 81 lc->counters[pos_state] = table_size; 82 83 const uint32_t a0 = rc_bit_0_price(lc->choice); 84 const uint32_t a1 = rc_bit_1_price(lc->choice); 85 const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2); 86 const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2); 87 uint32_t *const prices = lc->prices[pos_state]; 88 89 uint32_t i; 90 for (i = 0; i < table_size && i < LEN_LOW_SYMBOLS; ++i) 91 prices[i] = a0 + rc_bittree_price(lc->low[pos_state], 92 LEN_LOW_BITS, i); 93 94 for (; i < table_size && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) 95 prices[i] = b0 + rc_bittree_price(lc->mid[pos_state], 96 LEN_MID_BITS, i - LEN_LOW_SYMBOLS); 97 98 for (; i < table_size; ++i) 99 prices[i] = b1 + rc_bittree_price(lc->high, LEN_HIGH_BITS, 100 i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); 101 102 return; 103 } 104 105 106 static inline void 107 length(lzma_range_encoder *rc, lzma_length_encoder *lc, 108 const uint32_t pos_state, uint32_t len, const bool fast_mode) 109 { 110 assert(len <= MATCH_LEN_MAX); 111 len -= MATCH_LEN_MIN; 112 113 if (len < LEN_LOW_SYMBOLS) { 114 rc_bit(rc, &lc->choice, 0); 115 rc_bittree(rc, lc->low[pos_state], LEN_LOW_BITS, len); 116 } else { 117 rc_bit(rc, &lc->choice, 1); 118 len -= LEN_LOW_SYMBOLS; 119 120 if (len < LEN_MID_SYMBOLS) { 121 rc_bit(rc, &lc->choice2, 0); 122 rc_bittree(rc, lc->mid[pos_state], LEN_MID_BITS, len); 123 } else { 124 rc_bit(rc, &lc->choice2, 1); 125 len -= LEN_MID_SYMBOLS; 126 rc_bittree(rc, lc->high, LEN_HIGH_BITS, len); 127 } 128 } 129 130 // Only getoptimum uses the prices so don't update the table when 131 // in fast mode. 132 if (!fast_mode) 133 if (--lc->counters[pos_state] == 0) 134 length_update_prices(lc, pos_state); 135 } 136 137 138 /////////// 139 // Match // 140 /////////// 141 142 static inline void 143 match(lzma_coder *coder, const uint32_t pos_state, 144 const uint32_t distance, const uint32_t len) 145 { 146 update_match(coder->state); 147 148 length(&coder->rc, &coder->match_len_encoder, pos_state, len, 149 coder->fast_mode); 150 151 const uint32_t dist_slot = get_dist_slot(distance); 152 const uint32_t dist_state = get_dist_state(len); 153 rc_bittree(&coder->rc, coder->dist_slot[dist_state], 154 DIST_SLOT_BITS, dist_slot); 155 156 if (dist_slot >= DIST_MODEL_START) { 157 const uint32_t footer_bits = (dist_slot >> 1) - 1; 158 const uint32_t base = (2 | (dist_slot & 1)) << footer_bits; 159 const uint32_t dist_reduced = distance - base; 160 161 if (dist_slot < DIST_MODEL_END) { 162 // Careful here: base - dist_slot - 1 can be -1, but 163 // rc_bittree_reverse starts at probs[1], not probs[0]. 164 rc_bittree_reverse(&coder->rc, 165 coder->dist_special + base - dist_slot - 1, 166 footer_bits, dist_reduced); 167 } else { 168 rc_direct(&coder->rc, dist_reduced >> ALIGN_BITS, 169 footer_bits - ALIGN_BITS); 170 rc_bittree_reverse( 171 &coder->rc, coder->dist_align, 172 ALIGN_BITS, dist_reduced & ALIGN_MASK); 173 ++coder->align_price_count; 174 } 175 } 176 177 coder->reps[3] = coder->reps[2]; 178 coder->reps[2] = coder->reps[1]; 179 coder->reps[1] = coder->reps[0]; 180 coder->reps[0] = distance; 181 ++coder->match_price_count; 182 } 183 184 185 //////////////////// 186 // Repeated match // 187 //////////////////// 188 189 static inline void 190 rep_match(lzma_coder *coder, const uint32_t pos_state, 191 const uint32_t rep, const uint32_t len) 192 { 193 if (rep == 0) { 194 rc_bit(&coder->rc, &coder->is_rep0[coder->state], 0); 195 rc_bit(&coder->rc, 196 &coder->is_rep0_long[coder->state][pos_state], 197 len != 1); 198 } else { 199 const uint32_t distance = coder->reps[rep]; 200 rc_bit(&coder->rc, &coder->is_rep0[coder->state], 1); 201 202 if (rep == 1) { 203 rc_bit(&coder->rc, &coder->is_rep1[coder->state], 0); 204 } else { 205 rc_bit(&coder->rc, &coder->is_rep1[coder->state], 1); 206 rc_bit(&coder->rc, &coder->is_rep2[coder->state], 207 rep - 2); 208 209 if (rep == 3) 210 coder->reps[3] = coder->reps[2]; 211 212 coder->reps[2] = coder->reps[1]; 213 } 214 215 coder->reps[1] = coder->reps[0]; 216 coder->reps[0] = distance; 217 } 218 219 if (len == 1) { 220 update_short_rep(coder->state); 221 } else { 222 length(&coder->rc, &coder->rep_len_encoder, pos_state, len, 223 coder->fast_mode); 224 update_long_rep(coder->state); 225 } 226 } 227 228 229 ////////// 230 // Main // 231 ////////// 232 233 static void 234 encode_symbol(lzma_coder *coder, lzma_mf *mf, 235 uint32_t back, uint32_t len, uint32_t position) 236 { 237 const uint32_t pos_state = position & coder->pos_mask; 238 239 if (back == UINT32_MAX) { 240 // Literal i.e. eight-bit byte 241 assert(len == 1); 242 rc_bit(&coder->rc, 243 &coder->is_match[coder->state][pos_state], 0); 244 literal(coder, mf, position); 245 } else { 246 // Some type of match 247 rc_bit(&coder->rc, 248 &coder->is_match[coder->state][pos_state], 1); 249 250 if (back < REPS) { 251 // It's a repeated match i.e. the same distance 252 // has been used earlier. 253 rc_bit(&coder->rc, &coder->is_rep[coder->state], 1); 254 rep_match(coder, pos_state, back, len); 255 } else { 256 // Normal match 257 rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); 258 match(coder, pos_state, back - REPS, len); 259 } 260 } 261 262 assert(mf->read_ahead >= len); 263 mf->read_ahead -= len; 264 } 265 266 267 static bool 268 encode_init(lzma_coder *coder, lzma_mf *mf) 269 { 270 assert(mf_position(mf) == 0); 271 272 if (mf->read_pos == mf->read_limit) { 273 if (mf->action == LZMA_RUN) 274 return false; // We cannot do anything. 275 276 // We are finishing (we cannot get here when flushing). 277 assert(mf->write_pos == mf->read_pos); 278 assert(mf->action == LZMA_FINISH); 279 } else { 280 // Do the actual initialization. The first LZMA symbol must 281 // always be a literal. 282 mf_skip(mf, 1); 283 mf->read_ahead = 0; 284 rc_bit(&coder->rc, &coder->is_match[0][0], 0); 285 rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]); 286 } 287 288 // Initialization is done (except if empty file). 289 coder->is_initialized = true; 290 291 return true; 292 } 293 294 295 static void 296 encode_eopm(lzma_coder *coder, uint32_t position) 297 { 298 const uint32_t pos_state = position & coder->pos_mask; 299 rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); 300 rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); 301 match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN); 302 } 303 304 305 /// Number of bytes that a single encoding loop in lzma_lzma_encode() can 306 /// consume from the dictionary. This limit comes from lzma_lzma_optimum() 307 /// and may need to be updated if that function is significantly modified. 308 #define LOOP_INPUT_MAX (OPTS + 1) 309 310 311 extern lzma_ret 312 lzma_lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, 313 uint8_t *restrict out, size_t *restrict out_pos, 314 size_t out_size, uint32_t limit) 315 { 316 // Initialize the stream if no data has been encoded yet. 317 if (!coder->is_initialized && !encode_init(coder, mf)) 318 return LZMA_OK; 319 320 // Get the lowest bits of the uncompressed offset from the LZ layer. 321 uint32_t position = mf_position(mf); 322 323 while (true) { 324 // Encode pending bits, if any. Calling this before encoding 325 // the next symbol is needed only with plain LZMA, since 326 // LZMA2 always provides big enough buffer to flush 327 // everything out from the range encoder. For the same reason, 328 // rc_encode() never returns true when this function is used 329 // as part of LZMA2 encoder. 330 if (rc_encode(&coder->rc, out, out_pos, out_size)) { 331 assert(limit == UINT32_MAX); 332 return LZMA_OK; 333 } 334 335 // With LZMA2 we need to take care that compressed size of 336 // a chunk doesn't get too big. 337 // FIXME? Check if this could be improved. 338 if (limit != UINT32_MAX 339 && (mf->read_pos - mf->read_ahead >= limit 340 || *out_pos + rc_pending(&coder->rc) 341 >= LZMA2_CHUNK_MAX 342 - LOOP_INPUT_MAX)) 343 break; 344 345 // Check that there is some input to process. 346 if (mf->read_pos >= mf->read_limit) { 347 if (mf->action == LZMA_RUN) 348 return LZMA_OK; 349 350 if (mf->read_ahead == 0) 351 break; 352 } 353 354 // Get optimal match (repeat position and length). 355 // Value ranges for pos: 356 // - [0, REPS): repeated match 357 // - [REPS, UINT32_MAX): 358 // match at (pos - REPS) 359 // - UINT32_MAX: not a match but a literal 360 // Value ranges for len: 361 // - [MATCH_LEN_MIN, MATCH_LEN_MAX] 362 uint32_t len; 363 uint32_t back; 364 365 if (coder->fast_mode) 366 lzma_lzma_optimum_fast(coder, mf, &back, &len); 367 else 368 lzma_lzma_optimum_normal( 369 coder, mf, &back, &len, position); 370 371 encode_symbol(coder, mf, back, len, position); 372 373 position += len; 374 } 375 376 if (!coder->is_flushed) { 377 coder->is_flushed = true; 378 379 // We don't support encoding plain LZMA streams without EOPM, 380 // and LZMA2 doesn't use EOPM at LZMA level. 381 if (limit == UINT32_MAX) 382 encode_eopm(coder, position); 383 384 // Flush the remaining bytes from the range encoder. 385 rc_flush(&coder->rc); 386 387 // Copy the remaining bytes to the output buffer. If there 388 // isn't enough output space, we will copy out the remaining 389 // bytes on the next call to this function by using 390 // the rc_encode() call in the encoding loop above. 391 if (rc_encode(&coder->rc, out, out_pos, out_size)) { 392 assert(limit == UINT32_MAX); 393 return LZMA_OK; 394 } 395 } 396 397 // Make it ready for the next LZMA2 chunk. 398 coder->is_flushed = false; 399 400 return LZMA_STREAM_END; 401 } 402 403 404 static lzma_ret 405 lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, 406 uint8_t *restrict out, size_t *restrict out_pos, 407 size_t out_size) 408 { 409 // Plain LZMA has no support for sync-flushing. 410 if (unlikely(mf->action == LZMA_SYNC_FLUSH)) 411 return LZMA_OPTIONS_ERROR; 412 413 return lzma_lzma_encode(coder, mf, out, out_pos, out_size, UINT32_MAX); 414 } 415 416 417 //////////////////// 418 // Initialization // 419 //////////////////// 420 421 static bool 422 is_options_valid(const lzma_options_lzma *options) 423 { 424 // Validate some of the options. LZ encoder validates nice_len too 425 // but we need a valid value here earlier. 426 return is_lclppb_valid(options) 427 && options->nice_len >= MATCH_LEN_MIN 428 && options->nice_len <= MATCH_LEN_MAX 429 && (options->mode == LZMA_MODE_FAST 430 || options->mode == LZMA_MODE_NORMAL); 431 } 432 433 434 static void 435 set_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options) 436 { 437 // LZ encoder initialization does the validation for these so we 438 // don't need to validate here. 439 lz_options->before_size = OPTS; 440 lz_options->dict_size = options->dict_size; 441 lz_options->after_size = LOOP_INPUT_MAX; 442 lz_options->match_len_max = MATCH_LEN_MAX; 443 lz_options->nice_len = options->nice_len; 444 lz_options->match_finder = options->mf; 445 lz_options->depth = options->depth; 446 lz_options->preset_dict = options->preset_dict; 447 lz_options->preset_dict_size = options->preset_dict_size; 448 return; 449 } 450 451 452 static void 453 length_encoder_reset(lzma_length_encoder *lencoder, 454 const uint32_t num_pos_states, const bool fast_mode) 455 { 456 bit_reset(lencoder->choice); 457 bit_reset(lencoder->choice2); 458 459 for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { 460 bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS); 461 bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS); 462 } 463 464 bittree_reset(lencoder->high, LEN_HIGH_BITS); 465 466 if (!fast_mode) 467 for (uint32_t pos_state = 0; pos_state < num_pos_states; 468 ++pos_state) 469 length_update_prices(lencoder, pos_state); 470 471 return; 472 } 473 474 475 extern lzma_ret 476 lzma_lzma_encoder_reset(lzma_coder *coder, const lzma_options_lzma *options) 477 { 478 if (!is_options_valid(options)) 479 return LZMA_OPTIONS_ERROR; 480 481 coder->pos_mask = (1U << options->pb) - 1; 482 coder->literal_context_bits = options->lc; 483 coder->literal_pos_mask = (1U << options->lp) - 1; 484 485 // Range coder 486 rc_reset(&coder->rc); 487 488 // State 489 coder->state = STATE_LIT_LIT; 490 for (size_t i = 0; i < REPS; ++i) 491 coder->reps[i] = 0; 492 493 literal_init(coder->literal, options->lc, options->lp); 494 495 // Bit encoders 496 for (size_t i = 0; i < STATES; ++i) { 497 for (size_t j = 0; j <= coder->pos_mask; ++j) { 498 bit_reset(coder->is_match[i][j]); 499 bit_reset(coder->is_rep0_long[i][j]); 500 } 501 502 bit_reset(coder->is_rep[i]); 503 bit_reset(coder->is_rep0[i]); 504 bit_reset(coder->is_rep1[i]); 505 bit_reset(coder->is_rep2[i]); 506 } 507 508 for (size_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i) 509 bit_reset(coder->dist_special[i]); 510 511 // Bit tree encoders 512 for (size_t i = 0; i < DIST_STATES; ++i) 513 bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS); 514 515 bittree_reset(coder->dist_align, ALIGN_BITS); 516 517 // Length encoders 518 length_encoder_reset(&coder->match_len_encoder, 519 1U << options->pb, coder->fast_mode); 520 521 length_encoder_reset(&coder->rep_len_encoder, 522 1U << options->pb, coder->fast_mode); 523 524 // Price counts are incremented every time appropriate probabilities 525 // are changed. price counts are set to zero when the price tables 526 // are updated, which is done when the appropriate price counts have 527 // big enough value, and lzma_mf.read_ahead == 0 which happens at 528 // least every OPTS (a few thousand) possible price count increments. 529 // 530 // By resetting price counts to UINT32_MAX / 2, we make sure that the 531 // price tables will be initialized before they will be used (since 532 // the value is definitely big enough), and that it is OK to increment 533 // price counts without risk of integer overflow (since UINT32_MAX / 2 534 // is small enough). The current code doesn't increment price counts 535 // before initializing price tables, but it maybe done in future if 536 // we add support for saving the state between LZMA2 chunks. 537 coder->match_price_count = UINT32_MAX / 2; 538 coder->align_price_count = UINT32_MAX / 2; 539 540 coder->opts_end_index = 0; 541 coder->opts_current_index = 0; 542 543 return LZMA_OK; 544 } 545 546 547 extern lzma_ret 548 lzma_lzma_encoder_create(lzma_coder **coder_ptr, 549 const lzma_allocator *allocator, 550 const lzma_options_lzma *options, lzma_lz_options *lz_options) 551 { 552 // Allocate lzma_coder if it wasn't already allocated. 553 if (*coder_ptr == NULL) { 554 *coder_ptr = lzma_alloc(sizeof(lzma_coder), allocator); 555 if (*coder_ptr == NULL) 556 return LZMA_MEM_ERROR; 557 } 558 559 lzma_coder *coder = *coder_ptr; 560 561 // Set compression mode. We haven't validates the options yet, 562 // but it's OK here, since nothing bad happens with invalid 563 // options in the code below, and they will get rejected by 564 // lzma_lzma_encoder_reset() call at the end of this function. 565 switch (options->mode) { 566 case LZMA_MODE_FAST: 567 coder->fast_mode = true; 568 break; 569 570 case LZMA_MODE_NORMAL: { 571 coder->fast_mode = false; 572 573 // Set dist_table_size. 574 // Round the dictionary size up to next 2^n. 575 uint32_t log_size = 0; 576 while ((UINT32_C(1) << log_size) < options->dict_size) 577 ++log_size; 578 579 coder->dist_table_size = log_size * 2; 580 581 // Length encoders' price table size 582 coder->match_len_encoder.table_size 583 = options->nice_len + 1 - MATCH_LEN_MIN; 584 coder->rep_len_encoder.table_size 585 = options->nice_len + 1 - MATCH_LEN_MIN; 586 break; 587 } 588 589 default: 590 return LZMA_OPTIONS_ERROR; 591 } 592 593 // We don't need to write the first byte as literal if there is 594 // a non-empty preset dictionary. encode_init() wouldn't even work 595 // if there is a non-empty preset dictionary, because encode_init() 596 // assumes that position is zero and previous byte is also zero. 597 coder->is_initialized = options->preset_dict != NULL 598 && options->preset_dict_size > 0; 599 coder->is_flushed = false; 600 601 set_lz_options(lz_options, options); 602 603 return lzma_lzma_encoder_reset(coder, options); 604 } 605 606 607 static lzma_ret 608 lzma_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator, 609 const void *options, lzma_lz_options *lz_options) 610 { 611 lz->code = &lzma_encode; 612 return lzma_lzma_encoder_create( 613 &lz->coder, allocator, options, lz_options); 614 } 615 616 617 extern lzma_ret 618 lzma_lzma_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 619 const lzma_filter_info *filters) 620 { 621 return lzma_lz_encoder_init( 622 next, allocator, filters, &lzma_encoder_init); 623 } 624 625 626 extern uint64_t 627 lzma_lzma_encoder_memusage(const void *options) 628 { 629 if (!is_options_valid(options)) 630 return UINT64_MAX; 631 632 lzma_lz_options lz_options; 633 set_lz_options(&lz_options, options); 634 635 const uint64_t lz_memusage = lzma_lz_encoder_memusage(&lz_options); 636 if (lz_memusage == UINT64_MAX) 637 return UINT64_MAX; 638 639 return (uint64_t)(sizeof(lzma_coder)) + lz_memusage; 640 } 641 642 643 extern bool 644 lzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte) 645 { 646 if (!is_lclppb_valid(options)) 647 return true; 648 649 *byte = (options->pb * 5 + options->lp) * 9 + options->lc; 650 assert(*byte <= (4 * 5 + 4) * 9 + 8); 651 652 return false; 653 } 654 655 656 #ifdef HAVE_ENCODER_LZMA1 657 extern lzma_ret 658 lzma_lzma_props_encode(const void *options, uint8_t *out) 659 { 660 const lzma_options_lzma *const opt = options; 661 662 if (lzma_lzma_lclppb_encode(opt, out)) 663 return LZMA_PROG_ERROR; 664 665 unaligned_write32le(out + 1, opt->dict_size); 666 667 return LZMA_OK; 668 } 669 #endif 670 671 672 extern LZMA_API(lzma_bool) 673 lzma_mode_is_supported(lzma_mode mode) 674 { 675 return mode == LZMA_MODE_FAST || mode == LZMA_MODE_NORMAL; 676 } 677