1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file lz_encoder.c 6 /// \brief LZ in window 7 /// 8 // Authors: Igor Pavlov 9 // Lasse Collin 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "lz_encoder.h" 14 #include "lz_encoder_hash.h" 15 16 // See lz_encoder_hash.h. This is a bit hackish but avoids making 17 // endianness a conditional in makefiles. 18 #if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL) 19 # include "lz_encoder_hash_table.h" 20 #endif 21 22 #include "memcmplen.h" 23 24 25 typedef struct { 26 /// LZ-based encoder e.g. LZMA 27 lzma_lz_encoder lz; 28 29 /// History buffer and match finder 30 lzma_mf mf; 31 32 /// Next coder in the chain 33 lzma_next_coder next; 34 } lzma_coder; 35 36 37 /// \brief Moves the data in the input window to free space for new data 38 /// 39 /// mf->buffer is a sliding input window, which keeps mf->keep_size_before 40 /// bytes of input history available all the time. Now and then we need to 41 /// "slide" the buffer to make space for the new data to the end of the 42 /// buffer. At the same time, data older than keep_size_before is dropped. 43 /// 44 static void 45 move_window(lzma_mf *mf) 46 { 47 // Align the move to a multiple of 16 bytes. Some LZ-based encoders 48 // like LZMA use the lowest bits of mf->read_pos to know the 49 // alignment of the uncompressed data. We also get better speed 50 // for memmove() with aligned buffers. 51 assert(mf->read_pos > mf->keep_size_before); 52 const uint32_t move_offset 53 = (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15); 54 55 assert(mf->write_pos > move_offset); 56 const size_t move_size = mf->write_pos - move_offset; 57 58 assert(move_offset + move_size <= mf->size); 59 60 memmove(mf->buffer, mf->buffer + move_offset, move_size); 61 62 mf->offset += move_offset; 63 mf->read_pos -= move_offset; 64 mf->read_limit -= move_offset; 65 mf->write_pos -= move_offset; 66 67 return; 68 } 69 70 71 /// \brief Tries to fill the input window (mf->buffer) 72 /// 73 /// If we are the last encoder in the chain, our input data is in in[]. 74 /// Otherwise we call the next filter in the chain to process in[] and 75 /// write its output to mf->buffer. 76 /// 77 /// This function must not be called once it has returned LZMA_STREAM_END. 78 /// 79 static lzma_ret 80 fill_window(lzma_coder *coder, const lzma_allocator *allocator, 81 const uint8_t *in, size_t *in_pos, size_t in_size, 82 lzma_action action) 83 { 84 assert(coder->mf.read_pos <= coder->mf.write_pos); 85 86 // Move the sliding window if needed. 87 if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after) 88 move_window(&coder->mf); 89 90 // Maybe this is ugly, but lzma_mf uses uint32_t for most things 91 // (which I find cleanest), but we need size_t here when filling 92 // the history window. 93 size_t write_pos = coder->mf.write_pos; 94 lzma_ret ret; 95 if (coder->next.code == NULL) { 96 // Not using a filter, simply memcpy() as much as possible. 97 lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer, 98 &write_pos, coder->mf.size); 99 100 ret = action != LZMA_RUN && *in_pos == in_size 101 ? LZMA_STREAM_END : LZMA_OK; 102 103 } else { 104 ret = coder->next.code(coder->next.coder, allocator, 105 in, in_pos, in_size, 106 coder->mf.buffer, &write_pos, 107 coder->mf.size, action); 108 } 109 110 coder->mf.write_pos = write_pos; 111 112 // Silence Valgrind. lzma_memcmplen() can read extra bytes 113 // and Valgrind will give warnings if those bytes are uninitialized 114 // because Valgrind cannot see that the values of the uninitialized 115 // bytes are eventually ignored. 116 memzero(coder->mf.buffer + write_pos, LZMA_MEMCMPLEN_EXTRA); 117 118 // If end of stream has been reached or flushing completed, we allow 119 // the encoder to process all the input (that is, read_pos is allowed 120 // to reach write_pos). Otherwise we keep keep_size_after bytes 121 // available as prebuffer. 122 if (ret == LZMA_STREAM_END) { 123 assert(*in_pos == in_size); 124 ret = LZMA_OK; 125 coder->mf.action = action; 126 coder->mf.read_limit = coder->mf.write_pos; 127 128 } else if (coder->mf.write_pos > coder->mf.keep_size_after) { 129 // This needs to be done conditionally, because if we got 130 // only little new input, there may be too little input 131 // to do any encoding yet. 132 coder->mf.read_limit = coder->mf.write_pos 133 - coder->mf.keep_size_after; 134 } 135 136 // Restart the match finder after finished LZMA_SYNC_FLUSH. 137 if (coder->mf.pending > 0 138 && coder->mf.read_pos < coder->mf.read_limit) { 139 // Match finder may update coder->pending and expects it to 140 // start from zero, so use a temporary variable. 141 const uint32_t pending = coder->mf.pending; 142 coder->mf.pending = 0; 143 144 // Rewind read_pos so that the match finder can hash 145 // the pending bytes. 146 assert(coder->mf.read_pos >= pending); 147 coder->mf.read_pos -= pending; 148 149 // Call the skip function directly instead of using 150 // mf_skip(), since we don't want to touch mf->read_ahead. 151 coder->mf.skip(&coder->mf, pending); 152 } 153 154 return ret; 155 } 156 157 158 static lzma_ret 159 lz_encode(void *coder_ptr, const lzma_allocator *allocator, 160 const uint8_t *restrict in, size_t *restrict in_pos, 161 size_t in_size, 162 uint8_t *restrict out, size_t *restrict out_pos, 163 size_t out_size, lzma_action action) 164 { 165 lzma_coder *coder = coder_ptr; 166 167 while (*out_pos < out_size 168 && (*in_pos < in_size || action != LZMA_RUN)) { 169 // Read more data to coder->mf.buffer if needed. 170 if (coder->mf.action == LZMA_RUN && coder->mf.read_pos 171 >= coder->mf.read_limit) 172 return_if_error(fill_window(coder, allocator, 173 in, in_pos, in_size, action)); 174 175 // Encode 176 const lzma_ret ret = coder->lz.code(coder->lz.coder, 177 &coder->mf, out, out_pos, out_size); 178 if (ret != LZMA_OK) { 179 // Setting this to LZMA_RUN for cases when we are 180 // flushing. It doesn't matter when finishing or if 181 // an error occurred. 182 coder->mf.action = LZMA_RUN; 183 return ret; 184 } 185 } 186 187 return LZMA_OK; 188 } 189 190 191 static bool 192 lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator, 193 const lzma_lz_options *lz_options) 194 { 195 // For now, the dictionary size is limited to 1.5 GiB. This may grow 196 // in the future if needed, but it needs a little more work than just 197 // changing this check. 198 if (!IS_ENC_DICT_SIZE_VALID(lz_options->dict_size) 199 || lz_options->nice_len > lz_options->match_len_max) 200 return true; 201 202 mf->keep_size_before = lz_options->before_size + lz_options->dict_size; 203 204 mf->keep_size_after = lz_options->after_size 205 + lz_options->match_len_max; 206 207 // To avoid constant memmove()s, allocate some extra space. Since 208 // memmove()s become more expensive when the size of the buffer 209 // increases, we reserve more space when a large dictionary is 210 // used to make the memmove() calls rarer. 211 // 212 // This works with dictionaries up to about 3 GiB. If bigger 213 // dictionary is wanted, some extra work is needed: 214 // - Several variables in lzma_mf have to be changed from uint32_t 215 // to size_t. 216 // - Memory usage calculation needs something too, e.g. use uint64_t 217 // for mf->size. 218 uint32_t reserve = lz_options->dict_size / 2; 219 if (reserve > (UINT32_C(1) << 30)) 220 reserve /= 2; 221 222 reserve += (lz_options->before_size + lz_options->match_len_max 223 + lz_options->after_size) / 2 + (UINT32_C(1) << 19); 224 225 const uint32_t old_size = mf->size; 226 mf->size = mf->keep_size_before + reserve + mf->keep_size_after; 227 228 // Deallocate the old history buffer if it exists but has different 229 // size than what is needed now. 230 if (mf->buffer != NULL && old_size != mf->size) { 231 lzma_free(mf->buffer, allocator); 232 mf->buffer = NULL; 233 } 234 235 // Match finder options 236 mf->match_len_max = lz_options->match_len_max; 237 mf->nice_len = lz_options->nice_len; 238 239 // cyclic_size has to stay smaller than 2 Gi. Note that this doesn't 240 // mean limiting dictionary size to less than 2 GiB. With a match 241 // finder that uses multibyte resolution (hashes start at e.g. every 242 // fourth byte), cyclic_size would stay below 2 Gi even when 243 // dictionary size is greater than 2 GiB. 244 // 245 // It would be possible to allow cyclic_size >= 2 Gi, but then we 246 // would need to be careful to use 64-bit types in various places 247 // (size_t could do since we would need bigger than 32-bit address 248 // space anyway). It would also require either zeroing a multigigabyte 249 // buffer at initialization (waste of time and RAM) or allow 250 // normalization in lz_encoder_mf.c to access uninitialized 251 // memory to keep the code simpler. The current way is simple and 252 // still allows pretty big dictionaries, so I don't expect these 253 // limits to change. 254 mf->cyclic_size = lz_options->dict_size + 1; 255 256 // Validate the match finder ID and setup the function pointers. 257 switch (lz_options->match_finder) { 258 #ifdef HAVE_MF_HC3 259 case LZMA_MF_HC3: 260 mf->find = &lzma_mf_hc3_find; 261 mf->skip = &lzma_mf_hc3_skip; 262 break; 263 #endif 264 #ifdef HAVE_MF_HC4 265 case LZMA_MF_HC4: 266 mf->find = &lzma_mf_hc4_find; 267 mf->skip = &lzma_mf_hc4_skip; 268 break; 269 #endif 270 #ifdef HAVE_MF_BT2 271 case LZMA_MF_BT2: 272 mf->find = &lzma_mf_bt2_find; 273 mf->skip = &lzma_mf_bt2_skip; 274 break; 275 #endif 276 #ifdef HAVE_MF_BT3 277 case LZMA_MF_BT3: 278 mf->find = &lzma_mf_bt3_find; 279 mf->skip = &lzma_mf_bt3_skip; 280 break; 281 #endif 282 #ifdef HAVE_MF_BT4 283 case LZMA_MF_BT4: 284 mf->find = &lzma_mf_bt4_find; 285 mf->skip = &lzma_mf_bt4_skip; 286 break; 287 #endif 288 289 default: 290 return true; 291 } 292 293 // Calculate the sizes of mf->hash and mf->son. 294 // 295 // NOTE: Since 5.3.5beta the LZMA encoder ensures that nice_len 296 // is big enough for the selected match finder. This makes it 297 // easier for applications as nice_len = 2 will always be accepted 298 // even though the effective value can be slightly bigger. 299 const uint32_t hash_bytes 300 = mf_get_hash_bytes(lz_options->match_finder); 301 assert(hash_bytes <= mf->nice_len); 302 303 const bool is_bt = (lz_options->match_finder & 0x10) != 0; 304 uint32_t hs; 305 306 if (hash_bytes == 2) { 307 hs = 0xFFFF; 308 } else { 309 // Round dictionary size up to the next 2^n - 1 so it can 310 // be used as a hash mask. 311 hs = lz_options->dict_size - 1; 312 hs |= hs >> 1; 313 hs |= hs >> 2; 314 hs |= hs >> 4; 315 hs |= hs >> 8; 316 hs >>= 1; 317 hs |= 0xFFFF; 318 319 if (hs > (UINT32_C(1) << 24)) { 320 if (hash_bytes == 3) 321 hs = (UINT32_C(1) << 24) - 1; 322 else 323 hs >>= 1; 324 } 325 } 326 327 mf->hash_mask = hs; 328 329 ++hs; 330 if (hash_bytes > 2) 331 hs += HASH_2_SIZE; 332 if (hash_bytes > 3) 333 hs += HASH_3_SIZE; 334 /* 335 No match finder uses this at the moment. 336 if (mf->hash_bytes > 4) 337 hs += HASH_4_SIZE; 338 */ 339 340 const uint32_t old_hash_count = mf->hash_count; 341 const uint32_t old_sons_count = mf->sons_count; 342 mf->hash_count = hs; 343 mf->sons_count = mf->cyclic_size; 344 if (is_bt) 345 mf->sons_count *= 2; 346 347 // Deallocate the old hash array if it exists and has different size 348 // than what is needed now. 349 if (old_hash_count != mf->hash_count 350 || old_sons_count != mf->sons_count) { 351 lzma_free(mf->hash, allocator); 352 mf->hash = NULL; 353 354 lzma_free(mf->son, allocator); 355 mf->son = NULL; 356 } 357 358 // Maximum number of match finder cycles 359 mf->depth = lz_options->depth; 360 if (mf->depth == 0) { 361 if (is_bt) 362 mf->depth = 16 + mf->nice_len / 2; 363 else 364 mf->depth = 4 + mf->nice_len / 4; 365 } 366 367 return false; 368 } 369 370 371 static bool 372 lz_encoder_init(lzma_mf *mf, const lzma_allocator *allocator, 373 const lzma_lz_options *lz_options) 374 { 375 // Allocate the history buffer. 376 if (mf->buffer == NULL) { 377 // lzma_memcmplen() is used for the dictionary buffer 378 // so we need to allocate a few extra bytes to prevent 379 // it from reading past the end of the buffer. 380 mf->buffer = lzma_alloc(mf->size + LZMA_MEMCMPLEN_EXTRA, 381 allocator); 382 if (mf->buffer == NULL) 383 return true; 384 385 // Keep Valgrind happy with lzma_memcmplen() and initialize 386 // the extra bytes whose value may get read but which will 387 // effectively get ignored. 388 memzero(mf->buffer + mf->size, LZMA_MEMCMPLEN_EXTRA); 389 } 390 391 // Use cyclic_size as initial mf->offset. This allows 392 // avoiding a few branches in the match finders. The downside is 393 // that match finder needs to be normalized more often, which may 394 // hurt performance with huge dictionaries. 395 mf->offset = mf->cyclic_size; 396 mf->read_pos = 0; 397 mf->read_ahead = 0; 398 mf->read_limit = 0; 399 mf->write_pos = 0; 400 mf->pending = 0; 401 402 #if UINT32_MAX >= SIZE_MAX / 4 403 // Check for integer overflow. (Huge dictionaries are not 404 // possible on 32-bit CPU.) 405 if (mf->hash_count > SIZE_MAX / sizeof(uint32_t) 406 || mf->sons_count > SIZE_MAX / sizeof(uint32_t)) 407 return true; 408 #endif 409 410 // Allocate and initialize the hash table. Since EMPTY_HASH_VALUE 411 // is zero, we can use lzma_alloc_zero() or memzero() for mf->hash. 412 // 413 // We don't need to initialize mf->son, but not doing that may 414 // make Valgrind complain in normalization (see normalize() in 415 // lz_encoder_mf.c). Skipping the initialization is *very* good 416 // when big dictionary is used but only small amount of data gets 417 // actually compressed: most of the mf->son won't get actually 418 // allocated by the kernel, so we avoid wasting RAM and improve 419 // initialization speed a lot. 420 if (mf->hash == NULL) { 421 mf->hash = lzma_alloc_zero(mf->hash_count * sizeof(uint32_t), 422 allocator); 423 mf->son = lzma_alloc(mf->sons_count * sizeof(uint32_t), 424 allocator); 425 426 if (mf->hash == NULL || mf->son == NULL) { 427 lzma_free(mf->hash, allocator); 428 mf->hash = NULL; 429 430 lzma_free(mf->son, allocator); 431 mf->son = NULL; 432 433 return true; 434 } 435 } else { 436 /* 437 for (uint32_t i = 0; i < mf->hash_count; ++i) 438 mf->hash[i] = EMPTY_HASH_VALUE; 439 */ 440 memzero(mf->hash, mf->hash_count * sizeof(uint32_t)); 441 } 442 443 mf->cyclic_pos = 0; 444 445 // Handle preset dictionary. 446 if (lz_options->preset_dict != NULL 447 && lz_options->preset_dict_size > 0) { 448 // If the preset dictionary is bigger than the actual 449 // dictionary, use only the tail. 450 mf->write_pos = my_min(lz_options->preset_dict_size, mf->size); 451 memcpy(mf->buffer, lz_options->preset_dict 452 + lz_options->preset_dict_size - mf->write_pos, 453 mf->write_pos); 454 mf->action = LZMA_SYNC_FLUSH; 455 mf->skip(mf, mf->write_pos); 456 } 457 458 mf->action = LZMA_RUN; 459 460 return false; 461 } 462 463 464 extern uint64_t 465 lzma_lz_encoder_memusage(const lzma_lz_options *lz_options) 466 { 467 // Old buffers must not exist when calling lz_encoder_prepare(). 468 lzma_mf mf = { 469 .buffer = NULL, 470 .hash = NULL, 471 .son = NULL, 472 .hash_count = 0, 473 .sons_count = 0, 474 }; 475 476 // Setup the size information into mf. 477 if (lz_encoder_prepare(&mf, NULL, lz_options)) 478 return UINT64_MAX; 479 480 // Calculate the memory usage. 481 return ((uint64_t)(mf.hash_count) + mf.sons_count) * sizeof(uint32_t) 482 + mf.size + sizeof(lzma_coder); 483 } 484 485 486 static void 487 lz_encoder_end(void *coder_ptr, const lzma_allocator *allocator) 488 { 489 lzma_coder *coder = coder_ptr; 490 491 lzma_next_end(&coder->next, allocator); 492 493 lzma_free(coder->mf.son, allocator); 494 lzma_free(coder->mf.hash, allocator); 495 lzma_free(coder->mf.buffer, allocator); 496 497 if (coder->lz.end != NULL) 498 coder->lz.end(coder->lz.coder, allocator); 499 else 500 lzma_free(coder->lz.coder, allocator); 501 502 lzma_free(coder, allocator); 503 return; 504 } 505 506 507 static lzma_ret 508 lz_encoder_update(void *coder_ptr, const lzma_allocator *allocator, 509 const lzma_filter *filters_null lzma_attribute((__unused__)), 510 const lzma_filter *reversed_filters) 511 { 512 lzma_coder *coder = coder_ptr; 513 514 if (coder->lz.options_update == NULL) 515 return LZMA_PROG_ERROR; 516 517 return_if_error(coder->lz.options_update( 518 coder->lz.coder, reversed_filters)); 519 520 return lzma_next_filter_update( 521 &coder->next, allocator, reversed_filters + 1); 522 } 523 524 525 static lzma_ret 526 lz_encoder_set_out_limit(void *coder_ptr, uint64_t *uncomp_size, 527 uint64_t out_limit) 528 { 529 lzma_coder *coder = coder_ptr; 530 531 // This is supported only if there are no other filters chained. 532 if (coder->next.code == NULL && coder->lz.set_out_limit != NULL) 533 return coder->lz.set_out_limit( 534 coder->lz.coder, uncomp_size, out_limit); 535 536 return LZMA_OPTIONS_ERROR; 537 } 538 539 540 extern lzma_ret 541 lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 542 const lzma_filter_info *filters, 543 lzma_ret (*lz_init)(lzma_lz_encoder *lz, 544 const lzma_allocator *allocator, 545 lzma_vli id, const void *options, 546 lzma_lz_options *lz_options)) 547 { 548 #if defined(HAVE_SMALL) && !defined(HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR) 549 // The CRC32 table must be initialized. 550 lzma_crc32_init(); 551 #endif 552 553 // Allocate and initialize the base data structure. 554 lzma_coder *coder = next->coder; 555 if (coder == NULL) { 556 coder = lzma_alloc(sizeof(lzma_coder), allocator); 557 if (coder == NULL) 558 return LZMA_MEM_ERROR; 559 560 next->coder = coder; 561 next->code = &lz_encode; 562 next->end = &lz_encoder_end; 563 next->update = &lz_encoder_update; 564 next->set_out_limit = &lz_encoder_set_out_limit; 565 566 coder->lz.coder = NULL; 567 coder->lz.code = NULL; 568 coder->lz.end = NULL; 569 coder->lz.options_update = NULL; 570 coder->lz.set_out_limit = NULL; 571 572 // mf.size is initialized to silence Valgrind 573 // when used on optimized binaries (GCC may reorder 574 // code in a way that Valgrind gets unhappy). 575 coder->mf.buffer = NULL; 576 coder->mf.size = 0; 577 coder->mf.hash = NULL; 578 coder->mf.son = NULL; 579 coder->mf.hash_count = 0; 580 coder->mf.sons_count = 0; 581 582 coder->next = LZMA_NEXT_CODER_INIT; 583 } 584 585 // Initialize the LZ-based encoder. 586 lzma_lz_options lz_options; 587 return_if_error(lz_init(&coder->lz, allocator, 588 filters[0].id, filters[0].options, &lz_options)); 589 590 // Setup the size information into coder->mf and deallocate 591 // old buffers if they have wrong size. 592 if (lz_encoder_prepare(&coder->mf, allocator, &lz_options)) 593 return LZMA_OPTIONS_ERROR; 594 595 // Allocate new buffers if needed, and do the rest of 596 // the initialization. 597 if (lz_encoder_init(&coder->mf, allocator, &lz_options)) 598 return LZMA_MEM_ERROR; 599 600 // Initialize the next filter in the chain, if any. 601 return lzma_next_filter_init(&coder->next, allocator, filters + 1); 602 } 603 604 605 extern LZMA_API(lzma_bool) 606 lzma_mf_is_supported(lzma_match_finder mf) 607 { 608 switch (mf) { 609 #ifdef HAVE_MF_HC3 610 case LZMA_MF_HC3: 611 return true; 612 #endif 613 #ifdef HAVE_MF_HC4 614 case LZMA_MF_HC4: 615 return true; 616 #endif 617 #ifdef HAVE_MF_BT2 618 case LZMA_MF_BT2: 619 return true; 620 #endif 621 #ifdef HAVE_MF_BT3 622 case LZMA_MF_BT3: 623 return true; 624 #endif 625 #ifdef HAVE_MF_BT4 626 case LZMA_MF_BT4: 627 return true; 628 #endif 629 default: 630 return false; 631 } 632 } 633