1 /* 2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 */ 9 10 #include "zstd_ldm.h" 11 12 #include "debug.h" 13 #include "zstd_fast.h" /* ZSTD_fillHashTable() */ 14 #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ 15 16 #define LDM_BUCKET_SIZE_LOG 3 17 #define LDM_MIN_MATCH_LENGTH 64 18 #define LDM_HASH_RLOG 7 19 #define LDM_HASH_CHAR_OFFSET 10 20 21 void ZSTD_ldm_adjustParameters(ldmParams_t* params, 22 ZSTD_compressionParameters const* cParams) 23 { 24 params->windowLog = cParams->windowLog; 25 ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); 26 DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); 27 if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; 28 if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; 29 if (cParams->strategy >= ZSTD_btopt) { 30 /* Get out of the way of the optimal parser */ 31 U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); 32 assert(minMatch >= ZSTD_LDM_MINMATCH_MIN); 33 assert(minMatch <= ZSTD_LDM_MINMATCH_MAX); 34 params->minMatchLength = minMatch; 35 } 36 if (params->hashLog == 0) { 37 params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); 38 assert(params->hashLog <= ZSTD_HASHLOG_MAX); 39 } 40 if (params->hashEveryLog == 0) { 41 params->hashEveryLog = params->windowLog < params->hashLog 42 ? 0 43 : params->windowLog - params->hashLog; 44 } 45 params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); 46 } 47 48 size_t ZSTD_ldm_getTableSize(ldmParams_t params) 49 { 50 size_t const ldmHSize = ((size_t)1) << params.hashLog; 51 size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); 52 size_t const ldmBucketSize = 53 ((size_t)1) << (params.hashLog - ldmBucketSizeLog); 54 size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t); 55 return params.enableLdm ? totalSize : 0; 56 } 57 58 size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) 59 { 60 return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; 61 } 62 63 /** ZSTD_ldm_getSmallHash() : 64 * numBits should be <= 32 65 * If numBits==0, returns 0. 66 * @return : the most significant numBits of value. */ 67 static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits) 68 { 69 assert(numBits <= 32); 70 return numBits == 0 ? 0 : (U32)(value >> (64 - numBits)); 71 } 72 73 /** ZSTD_ldm_getChecksum() : 74 * numBitsToDiscard should be <= 32 75 * @return : the next most significant 32 bits after numBitsToDiscard */ 76 static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard) 77 { 78 assert(numBitsToDiscard <= 32); 79 return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF; 80 } 81 82 /** ZSTD_ldm_getTag() ; 83 * Given the hash, returns the most significant numTagBits bits 84 * after (32 + hbits) bits. 85 * 86 * If there are not enough bits remaining, return the last 87 * numTagBits bits. */ 88 static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits) 89 { 90 assert(numTagBits < 32 && hbits <= 32); 91 if (32 - hbits < numTagBits) { 92 return hash & (((U32)1 << numTagBits) - 1); 93 } else { 94 return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1); 95 } 96 } 97 98 /** ZSTD_ldm_getBucket() : 99 * Returns a pointer to the start of the bucket associated with hash. */ 100 static ldmEntry_t* ZSTD_ldm_getBucket( 101 ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) 102 { 103 return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); 104 } 105 106 /** ZSTD_ldm_insertEntry() : 107 * Insert the entry with corresponding hash into the hash table */ 108 static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, 109 size_t const hash, const ldmEntry_t entry, 110 ldmParams_t const ldmParams) 111 { 112 BYTE* const bucketOffsets = ldmState->bucketOffsets; 113 *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry; 114 bucketOffsets[hash]++; 115 bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1; 116 } 117 118 /** ZSTD_ldm_makeEntryAndInsertByTag() : 119 * 120 * Gets the small hash, checksum, and tag from the rollingHash. 121 * 122 * If the tag matches (1 << ldmParams.hashEveryLog)-1, then 123 * creates an ldmEntry from the offset, and inserts it into the hash table. 124 * 125 * hBits is the length of the small hash, which is the most significant hBits 126 * of rollingHash. The checksum is the next 32 most significant bits, followed 127 * by ldmParams.hashEveryLog bits that make up the tag. */ 128 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, 129 U64 const rollingHash, 130 U32 const hBits, 131 U32 const offset, 132 ldmParams_t const ldmParams) 133 { 134 U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog); 135 U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1; 136 if (tag == tagMask) { 137 U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); 138 U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); 139 ldmEntry_t entry; 140 entry.offset = offset; 141 entry.checksum = checksum; 142 ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); 143 } 144 } 145 146 /** ZSTD_ldm_getRollingHash() : 147 * Get a 64-bit hash using the first len bytes from buf. 148 * 149 * Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be 150 * H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0) 151 * 152 * where the constant a is defined to be prime8bytes. 153 * 154 * The implementation adds an offset to each byte, so 155 * H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */ 156 static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len) 157 { 158 U64 ret = 0; 159 U32 i; 160 for (i = 0; i < len; i++) { 161 ret *= prime8bytes; 162 ret += buf[i] + LDM_HASH_CHAR_OFFSET; 163 } 164 return ret; 165 } 166 167 /** ZSTD_ldm_ipow() : 168 * Return base^exp. */ 169 static U64 ZSTD_ldm_ipow(U64 base, U64 exp) 170 { 171 U64 ret = 1; 172 while (exp) { 173 if (exp & 1) { ret *= base; } 174 exp >>= 1; 175 base *= base; 176 } 177 return ret; 178 } 179 180 U64 ZSTD_ldm_getHashPower(U32 minMatchLength) { 181 DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength); 182 assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN); 183 return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1); 184 } 185 186 /** ZSTD_ldm_updateHash() : 187 * Updates hash by removing toRemove and adding toAdd. */ 188 static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower) 189 { 190 hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower); 191 hash *= prime8bytes; 192 hash += toAdd + LDM_HASH_CHAR_OFFSET; 193 return hash; 194 } 195 196 /** ZSTD_ldm_countBackwardsMatch() : 197 * Returns the number of bytes that match backwards before pIn and pMatch. 198 * 199 * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ 200 static size_t ZSTD_ldm_countBackwardsMatch( 201 const BYTE* pIn, const BYTE* pAnchor, 202 const BYTE* pMatch, const BYTE* pBase) 203 { 204 size_t matchLength = 0; 205 while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { 206 pIn--; 207 pMatch--; 208 matchLength++; 209 } 210 return matchLength; 211 } 212 213 /** ZSTD_ldm_fillFastTables() : 214 * 215 * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. 216 * This is similar to ZSTD_loadDictionaryContent. 217 * 218 * The tables for the other strategies are filled within their 219 * block compressors. */ 220 static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, 221 void const* end) 222 { 223 const BYTE* const iend = (const BYTE*)end; 224 225 switch(ms->cParams.strategy) 226 { 227 case ZSTD_fast: 228 ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); 229 break; 230 231 case ZSTD_dfast: 232 ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); 233 break; 234 235 case ZSTD_greedy: 236 case ZSTD_lazy: 237 case ZSTD_lazy2: 238 case ZSTD_btlazy2: 239 case ZSTD_btopt: 240 case ZSTD_btultra: 241 break; 242 default: 243 assert(0); /* not possible : not a valid strategy id */ 244 } 245 246 return 0; 247 } 248 249 /** ZSTD_ldm_fillLdmHashTable() : 250 * 251 * Fills hashTable from (lastHashed + 1) to iend (non-inclusive). 252 * lastHash is the rolling hash that corresponds to lastHashed. 253 * 254 * Returns the rolling hash corresponding to position iend-1. */ 255 static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, 256 U64 lastHash, const BYTE* lastHashed, 257 const BYTE* iend, const BYTE* base, 258 U32 hBits, ldmParams_t const ldmParams) 259 { 260 U64 rollingHash = lastHash; 261 const BYTE* cur = lastHashed + 1; 262 263 while (cur < iend) { 264 rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1], 265 cur[ldmParams.minMatchLength-1], 266 state->hashPower); 267 ZSTD_ldm_makeEntryAndInsertByTag(state, 268 rollingHash, hBits, 269 (U32)(cur - base), ldmParams); 270 ++cur; 271 } 272 return rollingHash; 273 } 274 275 276 /** ZSTD_ldm_limitTableUpdate() : 277 * 278 * Sets cctx->nextToUpdate to a position corresponding closer to anchor 279 * if it is far way 280 * (after a long match, only update tables a limited amount). */ 281 static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) 282 { 283 U32 const current = (U32)(anchor - ms->window.base); 284 if (current > ms->nextToUpdate + 1024) { 285 ms->nextToUpdate = 286 current - MIN(512, current - ms->nextToUpdate - 1024); 287 } 288 } 289 290 static size_t ZSTD_ldm_generateSequences_internal( 291 ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, 292 ldmParams_t const* params, void const* src, size_t srcSize) 293 { 294 /* LDM parameters */ 295 int const extDict = ZSTD_window_hasExtDict(ldmState->window); 296 U32 const minMatchLength = params->minMatchLength; 297 U64 const hashPower = ldmState->hashPower; 298 U32 const hBits = params->hashLog - params->bucketSizeLog; 299 U32 const ldmBucketSize = 1U << params->bucketSizeLog; 300 U32 const hashEveryLog = params->hashEveryLog; 301 U32 const ldmTagMask = (1U << params->hashEveryLog) - 1; 302 /* Prefix and extDict parameters */ 303 U32 const dictLimit = ldmState->window.dictLimit; 304 U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; 305 BYTE const* const base = ldmState->window.base; 306 BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; 307 BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; 308 BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; 309 BYTE const* const lowPrefixPtr = base + dictLimit; 310 /* Input bounds */ 311 BYTE const* const istart = (BYTE const*)src; 312 BYTE const* const iend = istart + srcSize; 313 BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE); 314 /* Input positions */ 315 BYTE const* anchor = istart; 316 BYTE const* ip = istart; 317 /* Rolling hash */ 318 BYTE const* lastHashed = NULL; 319 U64 rollingHash = 0; 320 321 while (ip <= ilimit) { 322 size_t mLength; 323 U32 const current = (U32)(ip - base); 324 size_t forwardMatchLength = 0, backwardMatchLength = 0; 325 ldmEntry_t* bestEntry = NULL; 326 if (ip != istart) { 327 rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0], 328 lastHashed[minMatchLength], 329 hashPower); 330 } else { 331 rollingHash = ZSTD_ldm_getRollingHash(ip, minMatchLength); 332 } 333 lastHashed = ip; 334 335 /* Do not insert and do not look for a match */ 336 if (ZSTD_ldm_getTag(rollingHash, hBits, hashEveryLog) != ldmTagMask) { 337 ip++; 338 continue; 339 } 340 341 /* Get the best entry and compute the match lengths */ 342 { 343 ldmEntry_t* const bucket = 344 ZSTD_ldm_getBucket(ldmState, 345 ZSTD_ldm_getSmallHash(rollingHash, hBits), 346 *params); 347 ldmEntry_t* cur; 348 size_t bestMatchLength = 0; 349 U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); 350 351 for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { 352 size_t curForwardMatchLength, curBackwardMatchLength, 353 curTotalMatchLength; 354 if (cur->checksum != checksum || cur->offset <= lowestIndex) { 355 continue; 356 } 357 if (extDict) { 358 BYTE const* const curMatchBase = 359 cur->offset < dictLimit ? dictBase : base; 360 BYTE const* const pMatch = curMatchBase + cur->offset; 361 BYTE const* const matchEnd = 362 cur->offset < dictLimit ? dictEnd : iend; 363 BYTE const* const lowMatchPtr = 364 cur->offset < dictLimit ? dictStart : lowPrefixPtr; 365 366 curForwardMatchLength = ZSTD_count_2segments( 367 ip, pMatch, iend, 368 matchEnd, lowPrefixPtr); 369 if (curForwardMatchLength < minMatchLength) { 370 continue; 371 } 372 curBackwardMatchLength = 373 ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, 374 lowMatchPtr); 375 curTotalMatchLength = curForwardMatchLength + 376 curBackwardMatchLength; 377 } else { /* !extDict */ 378 BYTE const* const pMatch = base + cur->offset; 379 curForwardMatchLength = ZSTD_count(ip, pMatch, iend); 380 if (curForwardMatchLength < minMatchLength) { 381 continue; 382 } 383 curBackwardMatchLength = 384 ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, 385 lowPrefixPtr); 386 curTotalMatchLength = curForwardMatchLength + 387 curBackwardMatchLength; 388 } 389 390 if (curTotalMatchLength > bestMatchLength) { 391 bestMatchLength = curTotalMatchLength; 392 forwardMatchLength = curForwardMatchLength; 393 backwardMatchLength = curBackwardMatchLength; 394 bestEntry = cur; 395 } 396 } 397 } 398 399 /* No match found -- continue searching */ 400 if (bestEntry == NULL) { 401 ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, 402 hBits, current, 403 *params); 404 ip++; 405 continue; 406 } 407 408 /* Match found */ 409 mLength = forwardMatchLength + backwardMatchLength; 410 ip -= backwardMatchLength; 411 412 { 413 /* Store the sequence: 414 * ip = current - backwardMatchLength 415 * The match is at (bestEntry->offset - backwardMatchLength) 416 */ 417 U32 const matchIndex = bestEntry->offset; 418 U32 const offset = current - matchIndex; 419 rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; 420 421 /* Out of sequence storage */ 422 if (rawSeqStore->size == rawSeqStore->capacity) 423 return ERROR(dstSize_tooSmall); 424 seq->litLength = (U32)(ip - anchor); 425 seq->matchLength = (U32)mLength; 426 seq->offset = offset; 427 rawSeqStore->size++; 428 } 429 430 /* Insert the current entry into the hash table */ 431 ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, 432 (U32)(lastHashed - base), 433 *params); 434 435 assert(ip + backwardMatchLength == lastHashed); 436 437 /* Fill the hash table from lastHashed+1 to ip+mLength*/ 438 /* Heuristic: don't need to fill the entire table at end of block */ 439 if (ip + mLength <= ilimit) { 440 rollingHash = ZSTD_ldm_fillLdmHashTable( 441 ldmState, rollingHash, lastHashed, 442 ip + mLength, base, hBits, *params); 443 lastHashed = ip + mLength - 1; 444 } 445 ip += mLength; 446 anchor = ip; 447 } 448 return iend - anchor; 449 } 450 451 /*! ZSTD_ldm_reduceTable() : 452 * reduce table indexes by `reducerValue` */ 453 static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, 454 U32 const reducerValue) 455 { 456 U32 u; 457 for (u = 0; u < size; u++) { 458 if (table[u].offset < reducerValue) table[u].offset = 0; 459 else table[u].offset -= reducerValue; 460 } 461 } 462 463 size_t ZSTD_ldm_generateSequences( 464 ldmState_t* ldmState, rawSeqStore_t* sequences, 465 ldmParams_t const* params, void const* src, size_t srcSize) 466 { 467 U32 const maxDist = 1U << params->windowLog; 468 BYTE const* const istart = (BYTE const*)src; 469 BYTE const* const iend = istart + srcSize; 470 size_t const kMaxChunkSize = 1 << 20; 471 size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); 472 size_t chunk; 473 size_t leftoverSize = 0; 474 475 assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); 476 /* Check that ZSTD_window_update() has been called for this chunk prior 477 * to passing it to this function. 478 */ 479 assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); 480 /* The input could be very large (in zstdmt), so it must be broken up into 481 * chunks to enforce the maximmum distance and handle overflow correction. 482 */ 483 assert(sequences->pos <= sequences->size); 484 assert(sequences->size <= sequences->capacity); 485 for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { 486 BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; 487 size_t const remaining = (size_t)(iend - chunkStart); 488 BYTE const *const chunkEnd = 489 (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; 490 size_t const chunkSize = chunkEnd - chunkStart; 491 size_t newLeftoverSize; 492 size_t const prevSize = sequences->size; 493 494 assert(chunkStart < iend); 495 /* 1. Perform overflow correction if necessary. */ 496 if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { 497 U32 const ldmHSize = 1U << params->hashLog; 498 U32 const correction = ZSTD_window_correctOverflow( 499 &ldmState->window, /* cycleLog */ 0, maxDist, src); 500 ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); 501 } 502 /* 2. We enforce the maximum offset allowed. 503 * 504 * kMaxChunkSize should be small enough that we don't lose too much of 505 * the window through early invalidation. 506 * TODO: * Test the chunk size. 507 * * Try invalidation after the sequence generation and test the 508 * the offset against maxDist directly. 509 */ 510 ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL); 511 /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ 512 newLeftoverSize = ZSTD_ldm_generateSequences_internal( 513 ldmState, sequences, params, chunkStart, chunkSize); 514 if (ZSTD_isError(newLeftoverSize)) 515 return newLeftoverSize; 516 /* 4. We add the leftover literals from previous iterations to the first 517 * newly generated sequence, or add the `newLeftoverSize` if none are 518 * generated. 519 */ 520 /* Prepend the leftover literals from the last call */ 521 if (prevSize < sequences->size) { 522 sequences->seq[prevSize].litLength += (U32)leftoverSize; 523 leftoverSize = newLeftoverSize; 524 } else { 525 assert(newLeftoverSize == chunkSize); 526 leftoverSize += chunkSize; 527 } 528 } 529 return 0; 530 } 531 532 void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { 533 while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { 534 rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; 535 if (srcSize <= seq->litLength) { 536 /* Skip past srcSize literals */ 537 seq->litLength -= (U32)srcSize; 538 return; 539 } 540 srcSize -= seq->litLength; 541 seq->litLength = 0; 542 if (srcSize < seq->matchLength) { 543 /* Skip past the first srcSize of the match */ 544 seq->matchLength -= (U32)srcSize; 545 if (seq->matchLength < minMatch) { 546 /* The match is too short, omit it */ 547 if (rawSeqStore->pos + 1 < rawSeqStore->size) { 548 seq[1].litLength += seq[0].matchLength; 549 } 550 rawSeqStore->pos++; 551 } 552 return; 553 } 554 srcSize -= seq->matchLength; 555 seq->matchLength = 0; 556 rawSeqStore->pos++; 557 } 558 } 559 560 /** 561 * If the sequence length is longer than remaining then the sequence is split 562 * between this block and the next. 563 * 564 * Returns the current sequence to handle, or if the rest of the block should 565 * be literals, it returns a sequence with offset == 0. 566 */ 567 static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, 568 U32 const remaining, U32 const minMatch) 569 { 570 rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; 571 assert(sequence.offset > 0); 572 /* Likely: No partial sequence */ 573 if (remaining >= sequence.litLength + sequence.matchLength) { 574 rawSeqStore->pos++; 575 return sequence; 576 } 577 /* Cut the sequence short (offset == 0 ==> rest is literals). */ 578 if (remaining <= sequence.litLength) { 579 sequence.offset = 0; 580 } else if (remaining < sequence.litLength + sequence.matchLength) { 581 sequence.matchLength = remaining - sequence.litLength; 582 if (sequence.matchLength < minMatch) { 583 sequence.offset = 0; 584 } 585 } 586 /* Skip past `remaining` bytes for the future sequences. */ 587 ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); 588 return sequence; 589 } 590 591 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, 592 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 593 void const* src, size_t srcSize) 594 { 595 const ZSTD_compressionParameters* const cParams = &ms->cParams; 596 unsigned const minMatch = cParams->searchLength; 597 ZSTD_blockCompressor const blockCompressor = 598 ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); 599 /* Input bounds */ 600 BYTE const* const istart = (BYTE const*)src; 601 BYTE const* const iend = istart + srcSize; 602 /* Input positions */ 603 BYTE const* ip = istart; 604 605 DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); 606 assert(rawSeqStore->pos <= rawSeqStore->size); 607 assert(rawSeqStore->size <= rawSeqStore->capacity); 608 /* Loop through each sequence and apply the block compressor to the lits */ 609 while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { 610 /* maybeSplitSequence updates rawSeqStore->pos */ 611 rawSeq const sequence = maybeSplitSequence(rawSeqStore, 612 (U32)(iend - ip), minMatch); 613 int i; 614 /* End signal */ 615 if (sequence.offset == 0) 616 break; 617 618 assert(sequence.offset <= (1U << cParams->windowLog)); 619 assert(ip + sequence.litLength + sequence.matchLength <= iend); 620 621 /* Fill tables for block compressor */ 622 ZSTD_ldm_limitTableUpdate(ms, ip); 623 ZSTD_ldm_fillFastTables(ms, ip); 624 /* Run the block compressor */ 625 DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength); 626 { 627 size_t const newLitLength = 628 blockCompressor(ms, seqStore, rep, ip, sequence.litLength); 629 ip += sequence.litLength; 630 /* Update the repcodes */ 631 for (i = ZSTD_REP_NUM - 1; i > 0; i--) 632 rep[i] = rep[i-1]; 633 rep[0] = sequence.offset; 634 /* Store the sequence */ 635 ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, 636 sequence.offset + ZSTD_REP_MOVE, 637 sequence.matchLength - MINMATCH); 638 ip += sequence.matchLength; 639 } 640 } 641 /* Fill the tables for the block compressor */ 642 ZSTD_ldm_limitTableUpdate(ms, ip); 643 ZSTD_ldm_fillFastTables(ms, ip); 644 /* Compress the last literals */ 645 return blockCompressor(ms, seqStore, rep, ip, iend - ip); 646 } 647