1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file lz_encoder.c 4 /// \brief LZ in window 5 /// 6 // Authors: Igor Pavlov 7 // Lasse Collin 8 // 9 // This file has been put into the public domain. 10 // You can do whatever you want with this file. 11 // 12 /////////////////////////////////////////////////////////////////////////////// 13 14 #include "lz_encoder.h" 15 #include "lz_encoder_hash.h" 16 17 // See lz_encoder_hash.h. This is a bit hackish but avoids making 18 // endianness a conditional in makefiles. 19 #if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL) 20 # include "lz_encoder_hash_table.h" 21 #endif 22 23 #include "memcmplen.h" 24 25 26 struct lzma_coder_s { 27 /// LZ-based encoder e.g. LZMA 28 lzma_lz_encoder lz; 29 30 /// History buffer and match finder 31 lzma_mf mf; 32 33 /// Next coder in the chain 34 lzma_next_coder next; 35 }; 36 37 38 /// \brief Moves the data in the input window to free space for new data 39 /// 40 /// mf->buffer is a sliding input window, which keeps mf->keep_size_before 41 /// bytes of input history available all the time. Now and then we need to 42 /// "slide" the buffer to make space for the new data to the end of the 43 /// buffer. At the same time, data older than keep_size_before is dropped. 44 /// 45 static void 46 move_window(lzma_mf *mf) 47 { 48 // Align the move to a multiple of 16 bytes. Some LZ-based encoders 49 // like LZMA use the lowest bits of mf->read_pos to know the 50 // alignment of the uncompressed data. We also get better speed 51 // for memmove() with aligned buffers. 52 assert(mf->read_pos > mf->keep_size_before); 53 const uint32_t move_offset 54 = (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15); 55 56 assert(mf->write_pos > move_offset); 57 const size_t move_size = mf->write_pos - move_offset; 58 59 assert(move_offset + move_size <= mf->size); 60 61 memmove(mf->buffer, mf->buffer + move_offset, move_size); 62 63 mf->offset += move_offset; 64 mf->read_pos -= move_offset; 65 mf->read_limit -= move_offset; 66 mf->write_pos -= move_offset; 67 68 return; 69 } 70 71 72 /// \brief Tries to fill the input window (mf->buffer) 73 /// 74 /// If we are the last encoder in the chain, our input data is in in[]. 75 /// Otherwise we call the next filter in the chain to process in[] and 76 /// write its output to mf->buffer. 77 /// 78 /// This function must not be called once it has returned LZMA_STREAM_END. 79 /// 80 static lzma_ret 81 fill_window(lzma_coder *coder, const lzma_allocator *allocator, 82 const uint8_t *in, size_t *in_pos, size_t in_size, 83 lzma_action action) 84 { 85 assert(coder->mf.read_pos <= coder->mf.write_pos); 86 87 // Move the sliding window if needed. 88 if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after) 89 move_window(&coder->mf); 90 91 // Maybe this is ugly, but lzma_mf uses uint32_t for most things 92 // (which I find cleanest), but we need size_t here when filling 93 // the history window. 94 size_t write_pos = coder->mf.write_pos; 95 lzma_ret ret; 96 if (coder->next.code == NULL) { 97 // Not using a filter, simply memcpy() as much as possible. 98 lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer, 99 &write_pos, coder->mf.size); 100 101 ret = action != LZMA_RUN && *in_pos == in_size 102 ? LZMA_STREAM_END : LZMA_OK; 103 104 } else { 105 ret = coder->next.code(coder->next.coder, allocator, 106 in, in_pos, in_size, 107 coder->mf.buffer, &write_pos, 108 coder->mf.size, action); 109 } 110 111 coder->mf.write_pos = write_pos; 112 113 // If end of stream has been reached or flushing completed, we allow 114 // the encoder to process all the input (that is, read_pos is allowed 115 // to reach write_pos). Otherwise we keep keep_size_after bytes 116 // available as prebuffer. 117 if (ret == LZMA_STREAM_END) { 118 assert(*in_pos == in_size); 119 ret = LZMA_OK; 120 coder->mf.action = action; 121 coder->mf.read_limit = coder->mf.write_pos; 122 123 } else if (coder->mf.write_pos > coder->mf.keep_size_after) { 124 // This needs to be done conditionally, because if we got 125 // only little new input, there may be too little input 126 // to do any encoding yet. 127 coder->mf.read_limit = coder->mf.write_pos 128 - coder->mf.keep_size_after; 129 } 130 131 // Restart the match finder after finished LZMA_SYNC_FLUSH. 132 if (coder->mf.pending > 0 133 && coder->mf.read_pos < coder->mf.read_limit) { 134 // Match finder may update coder->pending and expects it to 135 // start from zero, so use a temporary variable. 136 const size_t pending = coder->mf.pending; 137 coder->mf.pending = 0; 138 139 // Rewind read_pos so that the match finder can hash 140 // the pending bytes. 141 assert(coder->mf.read_pos >= pending); 142 coder->mf.read_pos -= pending; 143 144 // Call the skip function directly instead of using 145 // mf_skip(), since we don't want to touch mf->read_ahead. 146 coder->mf.skip(&coder->mf, pending); 147 } 148 149 return ret; 150 } 151 152 153 static lzma_ret 154 lz_encode(lzma_coder *coder, const lzma_allocator *allocator, 155 const uint8_t *restrict in, size_t *restrict in_pos, 156 size_t in_size, 157 uint8_t *restrict out, size_t *restrict out_pos, 158 size_t out_size, lzma_action action) 159 { 160 while (*out_pos < out_size 161 && (*in_pos < in_size || action != LZMA_RUN)) { 162 // Read more data to coder->mf.buffer if needed. 163 if (coder->mf.action == LZMA_RUN && coder->mf.read_pos 164 >= coder->mf.read_limit) 165 return_if_error(fill_window(coder, allocator, 166 in, in_pos, in_size, action)); 167 168 // Encode 169 const lzma_ret ret = coder->lz.code(coder->lz.coder, 170 &coder->mf, out, out_pos, out_size); 171 if (ret != LZMA_OK) { 172 // Setting this to LZMA_RUN for cases when we are 173 // flushing. It doesn't matter when finishing or if 174 // an error occurred. 175 coder->mf.action = LZMA_RUN; 176 return ret; 177 } 178 } 179 180 return LZMA_OK; 181 } 182 183 184 static bool 185 lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator, 186 const lzma_lz_options *lz_options) 187 { 188 // For now, the dictionary size is limited to 1.5 GiB. This may grow 189 // in the future if needed, but it needs a little more work than just 190 // changing this check. 191 if (lz_options->dict_size < LZMA_DICT_SIZE_MIN 192 || lz_options->dict_size 193 > (UINT32_C(1) << 30) + (UINT32_C(1) << 29) 194 || lz_options->nice_len > lz_options->match_len_max) 195 return true; 196 197 mf->keep_size_before = lz_options->before_size + lz_options->dict_size; 198 199 mf->keep_size_after = lz_options->after_size 200 + lz_options->match_len_max; 201 202 // To avoid constant memmove()s, allocate some extra space. Since 203 // memmove()s become more expensive when the size of the buffer 204 // increases, we reserve more space when a large dictionary is 205 // used to make the memmove() calls rarer. 206 // 207 // This works with dictionaries up to about 3 GiB. If bigger 208 // dictionary is wanted, some extra work is needed: 209 // - Several variables in lzma_mf have to be changed from uint32_t 210 // to size_t. 211 // - Memory usage calculation needs something too, e.g. use uint64_t 212 // for mf->size. 213 uint32_t reserve = lz_options->dict_size / 2; 214 if (reserve > (UINT32_C(1) << 30)) 215 reserve /= 2; 216 217 reserve += (lz_options->before_size + lz_options->match_len_max 218 + lz_options->after_size) / 2 + (UINT32_C(1) << 19); 219 220 const uint32_t old_size = mf->size; 221 mf->size = mf->keep_size_before + reserve + mf->keep_size_after; 222 223 // Deallocate the old history buffer if it exists but has different 224 // size than what is needed now. 225 if (mf->buffer != NULL && old_size != mf->size) { 226 lzma_free(mf->buffer, allocator); 227 mf->buffer = NULL; 228 } 229 230 // Match finder options 231 mf->match_len_max = lz_options->match_len_max; 232 mf->nice_len = lz_options->nice_len; 233 234 // cyclic_size has to stay smaller than 2 Gi. Note that this doesn't 235 // mean limiting dictionary size to less than 2 GiB. With a match 236 // finder that uses multibyte resolution (hashes start at e.g. every 237 // fourth byte), cyclic_size would stay below 2 Gi even when 238 // dictionary size is greater than 2 GiB. 239 // 240 // It would be possible to allow cyclic_size >= 2 Gi, but then we 241 // would need to be careful to use 64-bit types in various places 242 // (size_t could do since we would need bigger than 32-bit address 243 // space anyway). It would also require either zeroing a multigigabyte 244 // buffer at initialization (waste of time and RAM) or allow 245 // normalization in lz_encoder_mf.c to access uninitialized 246 // memory to keep the code simpler. The current way is simple and 247 // still allows pretty big dictionaries, so I don't expect these 248 // limits to change. 249 mf->cyclic_size = lz_options->dict_size + 1; 250 251 // Validate the match finder ID and setup the function pointers. 252 switch (lz_options->match_finder) { 253 #ifdef HAVE_MF_HC3 254 case LZMA_MF_HC3: 255 mf->find = &lzma_mf_hc3_find; 256 mf->skip = &lzma_mf_hc3_skip; 257 break; 258 #endif 259 #ifdef HAVE_MF_HC4 260 case LZMA_MF_HC4: 261 mf->find = &lzma_mf_hc4_find; 262 mf->skip = &lzma_mf_hc4_skip; 263 break; 264 #endif 265 #ifdef HAVE_MF_BT2 266 case LZMA_MF_BT2: 267 mf->find = &lzma_mf_bt2_find; 268 mf->skip = &lzma_mf_bt2_skip; 269 break; 270 #endif 271 #ifdef HAVE_MF_BT3 272 case LZMA_MF_BT3: 273 mf->find = &lzma_mf_bt3_find; 274 mf->skip = &lzma_mf_bt3_skip; 275 break; 276 #endif 277 #ifdef HAVE_MF_BT4 278 case LZMA_MF_BT4: 279 mf->find = &lzma_mf_bt4_find; 280 mf->skip = &lzma_mf_bt4_skip; 281 break; 282 #endif 283 284 default: 285 return true; 286 } 287 288 // Calculate the sizes of mf->hash and mf->son and check that 289 // nice_len is big enough for the selected match finder. 290 const uint32_t hash_bytes = lz_options->match_finder & 0x0F; 291 if (hash_bytes > mf->nice_len) 292 return true; 293 294 const bool is_bt = (lz_options->match_finder & 0x10) != 0; 295 uint32_t hs; 296 297 if (hash_bytes == 2) { 298 hs = 0xFFFF; 299 } else { 300 // Round dictionary size up to the next 2^n - 1 so it can 301 // be used as a hash mask. 302 hs = lz_options->dict_size - 1; 303 hs |= hs >> 1; 304 hs |= hs >> 2; 305 hs |= hs >> 4; 306 hs |= hs >> 8; 307 hs >>= 1; 308 hs |= 0xFFFF; 309 310 if (hs > (UINT32_C(1) << 24)) { 311 if (hash_bytes == 3) 312 hs = (UINT32_C(1) << 24) - 1; 313 else 314 hs >>= 1; 315 } 316 } 317 318 mf->hash_mask = hs; 319 320 ++hs; 321 if (hash_bytes > 2) 322 hs += HASH_2_SIZE; 323 if (hash_bytes > 3) 324 hs += HASH_3_SIZE; 325 /* 326 No match finder uses this at the moment. 327 if (mf->hash_bytes > 4) 328 hs += HASH_4_SIZE; 329 */ 330 331 const uint32_t old_hash_count = mf->hash_count; 332 const uint32_t old_sons_count = mf->sons_count; 333 mf->hash_count = hs; 334 mf->sons_count = mf->cyclic_size; 335 if (is_bt) 336 mf->sons_count *= 2; 337 338 // Deallocate the old hash array if it exists and has different size 339 // than what is needed now. 340 if (old_hash_count != mf->hash_count 341 || old_sons_count != mf->sons_count) { 342 lzma_free(mf->hash, allocator); 343 mf->hash = NULL; 344 345 lzma_free(mf->son, allocator); 346 mf->son = NULL; 347 } 348 349 // Maximum number of match finder cycles 350 mf->depth = lz_options->depth; 351 if (mf->depth == 0) { 352 if (is_bt) 353 mf->depth = 16 + mf->nice_len / 2; 354 else 355 mf->depth = 4 + mf->nice_len / 4; 356 } 357 358 return false; 359 } 360 361 362 static bool 363 lz_encoder_init(lzma_mf *mf, const lzma_allocator *allocator, 364 const lzma_lz_options *lz_options) 365 { 366 // Allocate the history buffer. 367 if (mf->buffer == NULL) { 368 // lzma_memcmplen() is used for the dictionary buffer 369 // so we need to allocate a few extra bytes to prevent 370 // it from reading past the end of the buffer. 371 mf->buffer = lzma_alloc(mf->size + LZMA_MEMCMPLEN_EXTRA, 372 allocator); 373 if (mf->buffer == NULL) 374 return true; 375 376 // Keep Valgrind happy with lzma_memcmplen() and initialize 377 // the extra bytes whose value may get read but which will 378 // effectively get ignored. 379 memzero(mf->buffer + mf->size, LZMA_MEMCMPLEN_EXTRA); 380 } 381 382 // Use cyclic_size as initial mf->offset. This allows 383 // avoiding a few branches in the match finders. The downside is 384 // that match finder needs to be normalized more often, which may 385 // hurt performance with huge dictionaries. 386 mf->offset = mf->cyclic_size; 387 mf->read_pos = 0; 388 mf->read_ahead = 0; 389 mf->read_limit = 0; 390 mf->write_pos = 0; 391 mf->pending = 0; 392 393 #if UINT32_MAX >= SIZE_MAX / 4 394 // Check for integer overflow. (Huge dictionaries are not 395 // possible on 32-bit CPU.) 396 if (mf->hash_count > SIZE_MAX / sizeof(uint32_t) 397 || mf->sons_count > SIZE_MAX / sizeof(uint32_t)) 398 return true; 399 #endif 400 401 // Allocate and initialize the hash table. Since EMPTY_HASH_VALUE 402 // is zero, we can use lzma_alloc_zero() or memzero() for mf->hash. 403 // 404 // We don't need to initialize mf->son, but not doing that may 405 // make Valgrind complain in normalization (see normalize() in 406 // lz_encoder_mf.c). Skipping the initialization is *very* good 407 // when big dictionary is used but only small amount of data gets 408 // actually compressed: most of the mf->son won't get actually 409 // allocated by the kernel, so we avoid wasting RAM and improve 410 // initialization speed a lot. 411 if (mf->hash == NULL) { 412 mf->hash = lzma_alloc_zero(mf->hash_count * sizeof(uint32_t), 413 allocator); 414 mf->son = lzma_alloc(mf->sons_count * sizeof(uint32_t), 415 allocator); 416 417 if (mf->hash == NULL || mf->son == NULL) { 418 lzma_free(mf->hash, allocator); 419 mf->hash = NULL; 420 421 lzma_free(mf->son, allocator); 422 mf->son = NULL; 423 424 return true; 425 } 426 } else { 427 /* 428 for (uint32_t i = 0; i < mf->hash_count; ++i) 429 mf->hash[i] = EMPTY_HASH_VALUE; 430 */ 431 memzero(mf->hash, mf->hash_count * sizeof(uint32_t)); 432 } 433 434 mf->cyclic_pos = 0; 435 436 // Handle preset dictionary. 437 if (lz_options->preset_dict != NULL 438 && lz_options->preset_dict_size > 0) { 439 // If the preset dictionary is bigger than the actual 440 // dictionary, use only the tail. 441 mf->write_pos = my_min(lz_options->preset_dict_size, mf->size); 442 memcpy(mf->buffer, lz_options->preset_dict 443 + lz_options->preset_dict_size - mf->write_pos, 444 mf->write_pos); 445 mf->action = LZMA_SYNC_FLUSH; 446 mf->skip(mf, mf->write_pos); 447 } 448 449 mf->action = LZMA_RUN; 450 451 return false; 452 } 453 454 455 extern uint64_t 456 lzma_lz_encoder_memusage(const lzma_lz_options *lz_options) 457 { 458 // Old buffers must not exist when calling lz_encoder_prepare(). 459 lzma_mf mf = { 460 .buffer = NULL, 461 .hash = NULL, 462 .son = NULL, 463 .hash_count = 0, 464 .sons_count = 0, 465 }; 466 467 // Setup the size information into mf. 468 if (lz_encoder_prepare(&mf, NULL, lz_options)) 469 return UINT64_MAX; 470 471 // Calculate the memory usage. 472 return ((uint64_t)(mf.hash_count) + mf.sons_count) * sizeof(uint32_t) 473 + mf.size + sizeof(lzma_coder); 474 } 475 476 477 static void 478 lz_encoder_end(lzma_coder *coder, const lzma_allocator *allocator) 479 { 480 lzma_next_end(&coder->next, allocator); 481 482 lzma_free(coder->mf.son, allocator); 483 lzma_free(coder->mf.hash, allocator); 484 lzma_free(coder->mf.buffer, allocator); 485 486 if (coder->lz.end != NULL) 487 coder->lz.end(coder->lz.coder, allocator); 488 else 489 lzma_free(coder->lz.coder, allocator); 490 491 lzma_free(coder, allocator); 492 return; 493 } 494 495 496 static lzma_ret 497 lz_encoder_update(lzma_coder *coder, const lzma_allocator *allocator, 498 const lzma_filter *filters_null lzma_attribute((__unused__)), 499 const lzma_filter *reversed_filters) 500 { 501 if (coder->lz.options_update == NULL) 502 return LZMA_PROG_ERROR; 503 504 return_if_error(coder->lz.options_update( 505 coder->lz.coder, reversed_filters)); 506 507 return lzma_next_filter_update( 508 &coder->next, allocator, reversed_filters + 1); 509 } 510 511 512 extern lzma_ret 513 lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 514 const lzma_filter_info *filters, 515 lzma_ret (*lz_init)(lzma_lz_encoder *lz, 516 const lzma_allocator *allocator, const void *options, 517 lzma_lz_options *lz_options)) 518 { 519 #ifdef HAVE_SMALL 520 // We need that the CRC32 table has been initialized. 521 lzma_crc32_init(); 522 #endif 523 524 // Allocate and initialize the base data structure. 525 if (next->coder == NULL) { 526 next->coder = lzma_alloc(sizeof(lzma_coder), allocator); 527 if (next->coder == NULL) 528 return LZMA_MEM_ERROR; 529 530 next->code = &lz_encode; 531 next->end = &lz_encoder_end; 532 next->update = &lz_encoder_update; 533 534 next->coder->lz.coder = NULL; 535 next->coder->lz.code = NULL; 536 next->coder->lz.end = NULL; 537 538 next->coder->mf.buffer = NULL; 539 next->coder->mf.hash = NULL; 540 next->coder->mf.son = NULL; 541 next->coder->mf.hash_count = 0; 542 next->coder->mf.sons_count = 0; 543 544 next->coder->next = LZMA_NEXT_CODER_INIT; 545 } 546 547 // Initialize the LZ-based encoder. 548 lzma_lz_options lz_options; 549 return_if_error(lz_init(&next->coder->lz, allocator, 550 filters[0].options, &lz_options)); 551 552 // Setup the size information into next->coder->mf and deallocate 553 // old buffers if they have wrong size. 554 if (lz_encoder_prepare(&next->coder->mf, allocator, &lz_options)) 555 return LZMA_OPTIONS_ERROR; 556 557 // Allocate new buffers if needed, and do the rest of 558 // the initialization. 559 if (lz_encoder_init(&next->coder->mf, allocator, &lz_options)) 560 return LZMA_MEM_ERROR; 561 562 // Initialize the next filter in the chain, if any. 563 return lzma_next_filter_init(&next->coder->next, allocator, 564 filters + 1); 565 } 566 567 568 extern LZMA_API(lzma_bool) 569 lzma_mf_is_supported(lzma_match_finder mf) 570 { 571 bool ret = false; 572 573 #ifdef HAVE_MF_HC3 574 if (mf == LZMA_MF_HC3) 575 ret = true; 576 #endif 577 578 #ifdef HAVE_MF_HC4 579 if (mf == LZMA_MF_HC4) 580 ret = true; 581 #endif 582 583 #ifdef HAVE_MF_BT2 584 if (mf == LZMA_MF_BT2) 585 ret = true; 586 #endif 587 588 #ifdef HAVE_MF_BT3 589 if (mf == LZMA_MF_BT3) 590 ret = true; 591 #endif 592 593 #ifdef HAVE_MF_BT4 594 if (mf == LZMA_MF_BT4) 595 ret = true; 596 #endif 597 598 return ret; 599 } 600