1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file lz_encoder.c 4 /// \brief LZ in window 5 /// 6 // Authors: Igor Pavlov 7 // Lasse Collin 8 // 9 // This file has been put into the public domain. 10 // You can do whatever you want with this file. 11 // 12 /////////////////////////////////////////////////////////////////////////////// 13 14 #include "lz_encoder.h" 15 #include "lz_encoder_hash.h" 16 17 // See lz_encoder_hash.h. This is a bit hackish but avoids making 18 // endianness a conditional in makefiles. 19 #if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL) 20 # include "lz_encoder_hash_table.h" 21 #endif 22 23 24 struct lzma_coder_s { 25 /// LZ-based encoder e.g. LZMA 26 lzma_lz_encoder lz; 27 28 /// History buffer and match finder 29 lzma_mf mf; 30 31 /// Next coder in the chain 32 lzma_next_coder next; 33 }; 34 35 36 /// \brief Moves the data in the input window to free space for new data 37 /// 38 /// mf->buffer is a sliding input window, which keeps mf->keep_size_before 39 /// bytes of input history available all the time. Now and then we need to 40 /// "slide" the buffer to make space for the new data to the end of the 41 /// buffer. At the same time, data older than keep_size_before is dropped. 42 /// 43 static void 44 move_window(lzma_mf *mf) 45 { 46 // Align the move to a multiple of 16 bytes. Some LZ-based encoders 47 // like LZMA use the lowest bits of mf->read_pos to know the 48 // alignment of the uncompressed data. We also get better speed 49 // for memmove() with aligned buffers. 50 assert(mf->read_pos > mf->keep_size_before); 51 const uint32_t move_offset 52 = (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15); 53 54 assert(mf->write_pos > move_offset); 55 const size_t move_size = mf->write_pos - move_offset; 56 57 assert(move_offset + move_size <= mf->size); 58 59 memmove(mf->buffer, mf->buffer + move_offset, move_size); 60 61 mf->offset += move_offset; 62 mf->read_pos -= move_offset; 63 mf->read_limit -= move_offset; 64 mf->write_pos -= move_offset; 65 66 return; 67 } 68 69 70 /// \brief Tries to fill the input window (mf->buffer) 71 /// 72 /// If we are the last encoder in the chain, our input data is in in[]. 73 /// Otherwise we call the next filter in the chain to process in[] and 74 /// write its output to mf->buffer. 75 /// 76 /// This function must not be called once it has returned LZMA_STREAM_END. 77 /// 78 static lzma_ret 79 fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, 80 size_t *in_pos, size_t in_size, lzma_action action) 81 { 82 assert(coder->mf.read_pos <= coder->mf.write_pos); 83 84 // Move the sliding window if needed. 85 if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after) 86 move_window(&coder->mf); 87 88 // Maybe this is ugly, but lzma_mf uses uint32_t for most things 89 // (which I find cleanest), but we need size_t here when filling 90 // the history window. 91 size_t write_pos = coder->mf.write_pos; 92 lzma_ret ret; 93 if (coder->next.code == NULL) { 94 // Not using a filter, simply memcpy() as much as possible. 95 lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer, 96 &write_pos, coder->mf.size); 97 98 ret = action != LZMA_RUN && *in_pos == in_size 99 ? LZMA_STREAM_END : LZMA_OK; 100 101 } else { 102 ret = coder->next.code(coder->next.coder, allocator, 103 in, in_pos, in_size, 104 coder->mf.buffer, &write_pos, 105 coder->mf.size, action); 106 } 107 108 coder->mf.write_pos = write_pos; 109 110 // If end of stream has been reached or flushing completed, we allow 111 // the encoder to process all the input (that is, read_pos is allowed 112 // to reach write_pos). Otherwise we keep keep_size_after bytes 113 // available as prebuffer. 114 if (ret == LZMA_STREAM_END) { 115 assert(*in_pos == in_size); 116 ret = LZMA_OK; 117 coder->mf.action = action; 118 coder->mf.read_limit = coder->mf.write_pos; 119 120 } else if (coder->mf.write_pos > coder->mf.keep_size_after) { 121 // This needs to be done conditionally, because if we got 122 // only little new input, there may be too little input 123 // to do any encoding yet. 124 coder->mf.read_limit = coder->mf.write_pos 125 - coder->mf.keep_size_after; 126 } 127 128 // Restart the match finder after finished LZMA_SYNC_FLUSH. 129 if (coder->mf.pending > 0 130 && coder->mf.read_pos < coder->mf.read_limit) { 131 // Match finder may update coder->pending and expects it to 132 // start from zero, so use a temporary variable. 133 const size_t pending = coder->mf.pending; 134 coder->mf.pending = 0; 135 136 // Rewind read_pos so that the match finder can hash 137 // the pending bytes. 138 assert(coder->mf.read_pos >= pending); 139 coder->mf.read_pos -= pending; 140 141 // Call the skip function directly instead of using 142 // mf_skip(), since we don't want to touch mf->read_ahead. 143 coder->mf.skip(&coder->mf, pending); 144 } 145 146 return ret; 147 } 148 149 150 static lzma_ret 151 lz_encode(lzma_coder *coder, lzma_allocator *allocator, 152 const uint8_t *restrict in, size_t *restrict in_pos, 153 size_t in_size, 154 uint8_t *restrict out, size_t *restrict out_pos, 155 size_t out_size, lzma_action action) 156 { 157 while (*out_pos < out_size 158 && (*in_pos < in_size || action != LZMA_RUN)) { 159 // Read more data to coder->mf.buffer if needed. 160 if (coder->mf.action == LZMA_RUN && coder->mf.read_pos 161 >= coder->mf.read_limit) 162 return_if_error(fill_window(coder, allocator, 163 in, in_pos, in_size, action)); 164 165 // Encode 166 const lzma_ret ret = coder->lz.code(coder->lz.coder, 167 &coder->mf, out, out_pos, out_size); 168 if (ret != LZMA_OK) { 169 // Setting this to LZMA_RUN for cases when we are 170 // flushing. It doesn't matter when finishing or if 171 // an error occurred. 172 coder->mf.action = LZMA_RUN; 173 return ret; 174 } 175 } 176 177 return LZMA_OK; 178 } 179 180 181 static bool 182 lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator, 183 const lzma_lz_options *lz_options) 184 { 185 // For now, the dictionary size is limited to 1.5 GiB. This may grow 186 // in the future if needed, but it needs a little more work than just 187 // changing this check. 188 if (lz_options->dict_size < LZMA_DICT_SIZE_MIN 189 || lz_options->dict_size 190 > (UINT32_C(1) << 30) + (UINT32_C(1) << 29) 191 || lz_options->nice_len > lz_options->match_len_max) 192 return true; 193 194 mf->keep_size_before = lz_options->before_size + lz_options->dict_size; 195 196 mf->keep_size_after = lz_options->after_size 197 + lz_options->match_len_max; 198 199 // To avoid constant memmove()s, allocate some extra space. Since 200 // memmove()s become more expensive when the size of the buffer 201 // increases, we reserve more space when a large dictionary is 202 // used to make the memmove() calls rarer. 203 // 204 // This works with dictionaries up to about 3 GiB. If bigger 205 // dictionary is wanted, some extra work is needed: 206 // - Several variables in lzma_mf have to be changed from uint32_t 207 // to size_t. 208 // - Memory usage calculation needs something too, e.g. use uint64_t 209 // for mf->size. 210 uint32_t reserve = lz_options->dict_size / 2; 211 if (reserve > (UINT32_C(1) << 30)) 212 reserve /= 2; 213 214 reserve += (lz_options->before_size + lz_options->match_len_max 215 + lz_options->after_size) / 2 + (UINT32_C(1) << 19); 216 217 const uint32_t old_size = mf->size; 218 mf->size = mf->keep_size_before + reserve + mf->keep_size_after; 219 220 // Deallocate the old history buffer if it exists but has different 221 // size than what is needed now. 222 if (mf->buffer != NULL && old_size != mf->size) { 223 lzma_free(mf->buffer, allocator); 224 mf->buffer = NULL; 225 } 226 227 // Match finder options 228 mf->match_len_max = lz_options->match_len_max; 229 mf->nice_len = lz_options->nice_len; 230 231 // cyclic_size has to stay smaller than 2 Gi. Note that this doesn't 232 // mean limiting dictionary size to less than 2 GiB. With a match 233 // finder that uses multibyte resolution (hashes start at e.g. every 234 // fourth byte), cyclic_size would stay below 2 Gi even when 235 // dictionary size is greater than 2 GiB. 236 // 237 // It would be possible to allow cyclic_size >= 2 Gi, but then we 238 // would need to be careful to use 64-bit types in various places 239 // (size_t could do since we would need bigger than 32-bit address 240 // space anyway). It would also require either zeroing a multigigabyte 241 // buffer at initialization (waste of time and RAM) or allow 242 // normalization in lz_encoder_mf.c to access uninitialized 243 // memory to keep the code simpler. The current way is simple and 244 // still allows pretty big dictionaries, so I don't expect these 245 // limits to change. 246 mf->cyclic_size = lz_options->dict_size + 1; 247 248 // Validate the match finder ID and setup the function pointers. 249 switch (lz_options->match_finder) { 250 #ifdef HAVE_MF_HC3 251 case LZMA_MF_HC3: 252 mf->find = &lzma_mf_hc3_find; 253 mf->skip = &lzma_mf_hc3_skip; 254 break; 255 #endif 256 #ifdef HAVE_MF_HC4 257 case LZMA_MF_HC4: 258 mf->find = &lzma_mf_hc4_find; 259 mf->skip = &lzma_mf_hc4_skip; 260 break; 261 #endif 262 #ifdef HAVE_MF_BT2 263 case LZMA_MF_BT2: 264 mf->find = &lzma_mf_bt2_find; 265 mf->skip = &lzma_mf_bt2_skip; 266 break; 267 #endif 268 #ifdef HAVE_MF_BT3 269 case LZMA_MF_BT3: 270 mf->find = &lzma_mf_bt3_find; 271 mf->skip = &lzma_mf_bt3_skip; 272 break; 273 #endif 274 #ifdef HAVE_MF_BT4 275 case LZMA_MF_BT4: 276 mf->find = &lzma_mf_bt4_find; 277 mf->skip = &lzma_mf_bt4_skip; 278 break; 279 #endif 280 281 default: 282 return true; 283 } 284 285 // Calculate the sizes of mf->hash and mf->son and check that 286 // nice_len is big enough for the selected match finder. 287 const uint32_t hash_bytes = lz_options->match_finder & 0x0F; 288 if (hash_bytes > mf->nice_len) 289 return true; 290 291 const bool is_bt = (lz_options->match_finder & 0x10) != 0; 292 uint32_t hs; 293 294 if (hash_bytes == 2) { 295 hs = 0xFFFF; 296 } else { 297 // Round dictionary size up to the next 2^n - 1 so it can 298 // be used as a hash mask. 299 hs = lz_options->dict_size - 1; 300 hs |= hs >> 1; 301 hs |= hs >> 2; 302 hs |= hs >> 4; 303 hs |= hs >> 8; 304 hs >>= 1; 305 hs |= 0xFFFF; 306 307 if (hs > (UINT32_C(1) << 24)) { 308 if (hash_bytes == 3) 309 hs = (UINT32_C(1) << 24) - 1; 310 else 311 hs >>= 1; 312 } 313 } 314 315 mf->hash_mask = hs; 316 317 ++hs; 318 if (hash_bytes > 2) 319 hs += HASH_2_SIZE; 320 if (hash_bytes > 3) 321 hs += HASH_3_SIZE; 322 /* 323 No match finder uses this at the moment. 324 if (mf->hash_bytes > 4) 325 hs += HASH_4_SIZE; 326 */ 327 328 // If the above code calculating hs is modified, make sure that 329 // this assertion stays valid (UINT32_MAX / 5 is not strictly the 330 // exact limit). If it doesn't, you need to calculate that 331 // hash_size_sum + sons_count cannot overflow. 332 assert(hs < UINT32_MAX / 5); 333 334 const uint32_t old_count = mf->hash_size_sum + mf->sons_count; 335 mf->hash_size_sum = hs; 336 mf->sons_count = mf->cyclic_size; 337 if (is_bt) 338 mf->sons_count *= 2; 339 340 const uint32_t new_count = mf->hash_size_sum + mf->sons_count; 341 342 // Deallocate the old hash array if it exists and has different size 343 // than what is needed now. 344 if (old_count != new_count) { 345 lzma_free(mf->hash, allocator); 346 mf->hash = NULL; 347 } 348 349 // Maximum number of match finder cycles 350 mf->depth = lz_options->depth; 351 if (mf->depth == 0) { 352 if (is_bt) 353 mf->depth = 16 + mf->nice_len / 2; 354 else 355 mf->depth = 4 + mf->nice_len / 4; 356 } 357 358 return false; 359 } 360 361 362 static bool 363 lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator, 364 const lzma_lz_options *lz_options) 365 { 366 // Allocate the history buffer. 367 if (mf->buffer == NULL) { 368 mf->buffer = lzma_alloc(mf->size, allocator); 369 if (mf->buffer == NULL) 370 return true; 371 } 372 373 // Use cyclic_size as initial mf->offset. This allows 374 // avoiding a few branches in the match finders. The downside is 375 // that match finder needs to be normalized more often, which may 376 // hurt performance with huge dictionaries. 377 mf->offset = mf->cyclic_size; 378 mf->read_pos = 0; 379 mf->read_ahead = 0; 380 mf->read_limit = 0; 381 mf->write_pos = 0; 382 mf->pending = 0; 383 384 // Allocate match finder's hash array. 385 const size_t alloc_count = mf->hash_size_sum + mf->sons_count; 386 387 #if UINT32_MAX >= SIZE_MAX / 4 388 // Check for integer overflow. (Huge dictionaries are not 389 // possible on 32-bit CPU.) 390 if (alloc_count > SIZE_MAX / sizeof(uint32_t)) 391 return true; 392 #endif 393 394 if (mf->hash == NULL) { 395 mf->hash = lzma_alloc(alloc_count * sizeof(uint32_t), 396 allocator); 397 if (mf->hash == NULL) 398 return true; 399 } 400 401 mf->son = mf->hash + mf->hash_size_sum; 402 mf->cyclic_pos = 0; 403 404 // Initialize the hash table. Since EMPTY_HASH_VALUE is zero, we 405 // can use memset(). 406 /* 407 for (uint32_t i = 0; i < hash_size_sum; ++i) 408 mf->hash[i] = EMPTY_HASH_VALUE; 409 */ 410 memzero(mf->hash, (size_t)(mf->hash_size_sum) * sizeof(uint32_t)); 411 412 // We don't need to initialize mf->son, but not doing that will 413 // make Valgrind complain in normalization (see normalize() in 414 // lz_encoder_mf.c). 415 // 416 // Skipping this initialization is *very* good when big dictionary is 417 // used but only small amount of data gets actually compressed: most 418 // of the mf->hash won't get actually allocated by the kernel, so 419 // we avoid wasting RAM and improve initialization speed a lot. 420 //memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t)); 421 422 // Handle preset dictionary. 423 if (lz_options->preset_dict != NULL 424 && lz_options->preset_dict_size > 0) { 425 // If the preset dictionary is bigger than the actual 426 // dictionary, use only the tail. 427 mf->write_pos = my_min(lz_options->preset_dict_size, mf->size); 428 memcpy(mf->buffer, lz_options->preset_dict 429 + lz_options->preset_dict_size - mf->write_pos, 430 mf->write_pos); 431 mf->action = LZMA_SYNC_FLUSH; 432 mf->skip(mf, mf->write_pos); 433 } 434 435 mf->action = LZMA_RUN; 436 437 return false; 438 } 439 440 441 extern uint64_t 442 lzma_lz_encoder_memusage(const lzma_lz_options *lz_options) 443 { 444 // Old buffers must not exist when calling lz_encoder_prepare(). 445 lzma_mf mf = { 446 .buffer = NULL, 447 .hash = NULL, 448 .hash_size_sum = 0, 449 .sons_count = 0, 450 }; 451 452 // Setup the size information into mf. 453 if (lz_encoder_prepare(&mf, NULL, lz_options)) 454 return UINT64_MAX; 455 456 // Calculate the memory usage. 457 return (uint64_t)(mf.hash_size_sum + mf.sons_count) 458 * sizeof(uint32_t) 459 + (uint64_t)(mf.size) + sizeof(lzma_coder); 460 } 461 462 463 static void 464 lz_encoder_end(lzma_coder *coder, lzma_allocator *allocator) 465 { 466 lzma_next_end(&coder->next, allocator); 467 468 lzma_free(coder->mf.hash, allocator); 469 lzma_free(coder->mf.buffer, allocator); 470 471 if (coder->lz.end != NULL) 472 coder->lz.end(coder->lz.coder, allocator); 473 else 474 lzma_free(coder->lz.coder, allocator); 475 476 lzma_free(coder, allocator); 477 return; 478 } 479 480 481 static lzma_ret 482 lz_encoder_update(lzma_coder *coder, lzma_allocator *allocator, 483 const lzma_filter *filters_null lzma_attribute((__unused__)), 484 const lzma_filter *reversed_filters) 485 { 486 if (coder->lz.options_update == NULL) 487 return LZMA_PROG_ERROR; 488 489 return_if_error(coder->lz.options_update( 490 coder->lz.coder, reversed_filters)); 491 492 return lzma_next_filter_update( 493 &coder->next, allocator, reversed_filters + 1); 494 } 495 496 497 extern lzma_ret 498 lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, 499 const lzma_filter_info *filters, 500 lzma_ret (*lz_init)(lzma_lz_encoder *lz, 501 lzma_allocator *allocator, const void *options, 502 lzma_lz_options *lz_options)) 503 { 504 #ifdef HAVE_SMALL 505 // We need that the CRC32 table has been initialized. 506 lzma_crc32_init(); 507 #endif 508 509 // Allocate and initialize the base data structure. 510 if (next->coder == NULL) { 511 next->coder = lzma_alloc(sizeof(lzma_coder), allocator); 512 if (next->coder == NULL) 513 return LZMA_MEM_ERROR; 514 515 next->code = &lz_encode; 516 next->end = &lz_encoder_end; 517 next->update = &lz_encoder_update; 518 519 next->coder->lz.coder = NULL; 520 next->coder->lz.code = NULL; 521 next->coder->lz.end = NULL; 522 523 next->coder->mf.buffer = NULL; 524 next->coder->mf.hash = NULL; 525 next->coder->mf.hash_size_sum = 0; 526 next->coder->mf.sons_count = 0; 527 528 next->coder->next = LZMA_NEXT_CODER_INIT; 529 } 530 531 // Initialize the LZ-based encoder. 532 lzma_lz_options lz_options; 533 return_if_error(lz_init(&next->coder->lz, allocator, 534 filters[0].options, &lz_options)); 535 536 // Setup the size information into next->coder->mf and deallocate 537 // old buffers if they have wrong size. 538 if (lz_encoder_prepare(&next->coder->mf, allocator, &lz_options)) 539 return LZMA_OPTIONS_ERROR; 540 541 // Allocate new buffers if needed, and do the rest of 542 // the initialization. 543 if (lz_encoder_init(&next->coder->mf, allocator, &lz_options)) 544 return LZMA_MEM_ERROR; 545 546 // Initialize the next filter in the chain, if any. 547 return lzma_next_filter_init(&next->coder->next, allocator, 548 filters + 1); 549 } 550 551 552 extern LZMA_API(lzma_bool) 553 lzma_mf_is_supported(lzma_match_finder mf) 554 { 555 bool ret = false; 556 557 #ifdef HAVE_MF_HC3 558 if (mf == LZMA_MF_HC3) 559 ret = true; 560 #endif 561 562 #ifdef HAVE_MF_HC4 563 if (mf == LZMA_MF_HC4) 564 ret = true; 565 #endif 566 567 #ifdef HAVE_MF_BT2 568 if (mf == LZMA_MF_BT2) 569 ret = true; 570 #endif 571 572 #ifdef HAVE_MF_BT3 573 if (mf == LZMA_MF_BT3) 574 ret = true; 575 #endif 576 577 #ifdef HAVE_MF_BT4 578 if (mf == LZMA_MF_BT4) 579 ret = true; 580 #endif 581 582 return ret; 583 } 584