1 /* 2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 */ 9 10 #include "zstd_ldm.h" 11 12 #include "zstd_fast.h" /* ZSTD_fillHashTable() */ 13 #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ 14 15 #define LDM_BUCKET_SIZE_LOG 3 16 #define LDM_MIN_MATCH_LENGTH 64 17 #define LDM_HASH_RLOG 7 18 #define LDM_HASH_CHAR_OFFSET 10 19 20 size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm) 21 { 22 ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); 23 params->enableLdm = enableLdm>0; 24 params->hashLog = 0; 25 params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; 26 params->minMatchLength = LDM_MIN_MATCH_LENGTH; 27 params->hashEveryLog = ZSTD_LDM_HASHEVERYLOG_NOTSET; 28 return 0; 29 } 30 31 void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog) 32 { 33 if (params->hashLog == 0) { 34 params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG); 35 assert(params->hashLog <= ZSTD_HASHLOG_MAX); 36 } 37 if (params->hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET) { 38 params->hashEveryLog = 39 windowLog < params->hashLog ? 0 : windowLog - params->hashLog; 40 } 41 params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); 42 } 43 44 size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog) { 45 size_t const ldmHSize = ((size_t)1) << hashLog; 46 size_t const ldmBucketSizeLog = MIN(bucketSizeLog, hashLog); 47 size_t const ldmBucketSize = 48 ((size_t)1) << (hashLog - ldmBucketSizeLog); 49 return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t))); 50 } 51 52 /** ZSTD_ldm_getSmallHash() : 53 * numBits should be <= 32 54 * If numBits==0, returns 0. 55 * @return : the most significant numBits of value. */ 56 static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits) 57 { 58 assert(numBits <= 32); 59 return numBits == 0 ? 0 : (U32)(value >> (64 - numBits)); 60 } 61 62 /** ZSTD_ldm_getChecksum() : 63 * numBitsToDiscard should be <= 32 64 * @return : the next most significant 32 bits after numBitsToDiscard */ 65 static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard) 66 { 67 assert(numBitsToDiscard <= 32); 68 return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF; 69 } 70 71 /** ZSTD_ldm_getTag() ; 72 * Given the hash, returns the most significant numTagBits bits 73 * after (32 + hbits) bits. 74 * 75 * If there are not enough bits remaining, return the last 76 * numTagBits bits. */ 77 static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits) 78 { 79 assert(numTagBits < 32 && hbits <= 32); 80 if (32 - hbits < numTagBits) { 81 return hash & (((U32)1 << numTagBits) - 1); 82 } else { 83 return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1); 84 } 85 } 86 87 /** ZSTD_ldm_getBucket() : 88 * Returns a pointer to the start of the bucket associated with hash. */ 89 static ldmEntry_t* ZSTD_ldm_getBucket( 90 ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) 91 { 92 return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); 93 } 94 95 /** ZSTD_ldm_insertEntry() : 96 * Insert the entry with corresponding hash into the hash table */ 97 static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, 98 size_t const hash, const ldmEntry_t entry, 99 ldmParams_t const ldmParams) 100 { 101 BYTE* const bucketOffsets = ldmState->bucketOffsets; 102 *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry; 103 bucketOffsets[hash]++; 104 bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1; 105 } 106 107 /** ZSTD_ldm_makeEntryAndInsertByTag() : 108 * 109 * Gets the small hash, checksum, and tag from the rollingHash. 110 * 111 * If the tag matches (1 << ldmParams.hashEveryLog)-1, then 112 * creates an ldmEntry from the offset, and inserts it into the hash table. 113 * 114 * hBits is the length of the small hash, which is the most significant hBits 115 * of rollingHash. The checksum is the next 32 most significant bits, followed 116 * by ldmParams.hashEveryLog bits that make up the tag. */ 117 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, 118 U64 const rollingHash, 119 U32 const hBits, 120 U32 const offset, 121 ldmParams_t const ldmParams) 122 { 123 U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog); 124 U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1; 125 if (tag == tagMask) { 126 U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); 127 U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); 128 ldmEntry_t entry; 129 entry.offset = offset; 130 entry.checksum = checksum; 131 ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); 132 } 133 } 134 135 /** ZSTD_ldm_getRollingHash() : 136 * Get a 64-bit hash using the first len bytes from buf. 137 * 138 * Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be 139 * H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0) 140 * 141 * where the constant a is defined to be prime8bytes. 142 * 143 * The implementation adds an offset to each byte, so 144 * H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */ 145 static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len) 146 { 147 U64 ret = 0; 148 U32 i; 149 for (i = 0; i < len; i++) { 150 ret *= prime8bytes; 151 ret += buf[i] + LDM_HASH_CHAR_OFFSET; 152 } 153 return ret; 154 } 155 156 /** ZSTD_ldm_ipow() : 157 * Return base^exp. */ 158 static U64 ZSTD_ldm_ipow(U64 base, U64 exp) 159 { 160 U64 ret = 1; 161 while (exp) { 162 if (exp & 1) { ret *= base; } 163 exp >>= 1; 164 base *= base; 165 } 166 return ret; 167 } 168 169 U64 ZSTD_ldm_getHashPower(U32 minMatchLength) { 170 assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN); 171 return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1); 172 } 173 174 /** ZSTD_ldm_updateHash() : 175 * Updates hash by removing toRemove and adding toAdd. */ 176 static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower) 177 { 178 hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower); 179 hash *= prime8bytes; 180 hash += toAdd + LDM_HASH_CHAR_OFFSET; 181 return hash; 182 } 183 184 /** ZSTD_ldm_countBackwardsMatch() : 185 * Returns the number of bytes that match backwards before pIn and pMatch. 186 * 187 * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ 188 static size_t ZSTD_ldm_countBackwardsMatch( 189 const BYTE* pIn, const BYTE* pAnchor, 190 const BYTE* pMatch, const BYTE* pBase) 191 { 192 size_t matchLength = 0; 193 while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { 194 pIn--; 195 pMatch--; 196 matchLength++; 197 } 198 return matchLength; 199 } 200 201 /** ZSTD_ldm_fillFastTables() : 202 * 203 * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. 204 * This is similar to ZSTD_loadDictionaryContent. 205 * 206 * The tables for the other strategies are filled within their 207 * block compressors. */ 208 static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end) 209 { 210 const BYTE* const iend = (const BYTE*)end; 211 const U32 mls = zc->appliedParams.cParams.searchLength; 212 213 switch(zc->appliedParams.cParams.strategy) 214 { 215 case ZSTD_fast: 216 ZSTD_fillHashTable(zc, iend, mls); 217 zc->nextToUpdate = (U32)(iend - zc->base); 218 break; 219 220 case ZSTD_dfast: 221 ZSTD_fillDoubleHashTable(zc, iend, mls); 222 zc->nextToUpdate = (U32)(iend - zc->base); 223 break; 224 225 case ZSTD_greedy: 226 case ZSTD_lazy: 227 case ZSTD_lazy2: 228 case ZSTD_btlazy2: 229 case ZSTD_btopt: 230 case ZSTD_btultra: 231 break; 232 default: 233 assert(0); /* not possible : not a valid strategy id */ 234 } 235 236 return 0; 237 } 238 239 /** ZSTD_ldm_fillLdmHashTable() : 240 * 241 * Fills hashTable from (lastHashed + 1) to iend (non-inclusive). 242 * lastHash is the rolling hash that corresponds to lastHashed. 243 * 244 * Returns the rolling hash corresponding to position iend-1. */ 245 static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, 246 U64 lastHash, const BYTE* lastHashed, 247 const BYTE* iend, const BYTE* base, 248 U32 hBits, ldmParams_t const ldmParams) 249 { 250 U64 rollingHash = lastHash; 251 const BYTE* cur = lastHashed + 1; 252 253 while (cur < iend) { 254 rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1], 255 cur[ldmParams.minMatchLength-1], 256 state->hashPower); 257 ZSTD_ldm_makeEntryAndInsertByTag(state, 258 rollingHash, hBits, 259 (U32)(cur - base), ldmParams); 260 ++cur; 261 } 262 return rollingHash; 263 } 264 265 266 /** ZSTD_ldm_limitTableUpdate() : 267 * 268 * Sets cctx->nextToUpdate to a position corresponding closer to anchor 269 * if it is far way 270 * (after a long match, only update tables a limited amount). */ 271 static void ZSTD_ldm_limitTableUpdate(ZSTD_CCtx* cctx, const BYTE* anchor) 272 { 273 U32 const current = (U32)(anchor - cctx->base); 274 if (current > cctx->nextToUpdate + 1024) { 275 cctx->nextToUpdate = 276 current - MIN(512, current - cctx->nextToUpdate - 1024); 277 } 278 } 279 280 typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); 281 /* defined in zstd_compress.c */ 282 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict); 283 284 FORCE_INLINE_TEMPLATE 285 size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx, 286 const void* src, size_t srcSize) 287 { 288 ldmState_t* const ldmState = &(cctx->ldmState); 289 const ldmParams_t ldmParams = cctx->appliedParams.ldmParams; 290 const U64 hashPower = ldmState->hashPower; 291 const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog; 292 const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog); 293 const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1; 294 seqStore_t* const seqStorePtr = &(cctx->seqStore); 295 const BYTE* const base = cctx->base; 296 const BYTE* const istart = (const BYTE*)src; 297 const BYTE* ip = istart; 298 const BYTE* anchor = istart; 299 const U32 lowestIndex = cctx->dictLimit; 300 const BYTE* const lowest = base + lowestIndex; 301 const BYTE* const iend = istart + srcSize; 302 const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE); 303 304 const ZSTD_blockCompressor blockCompressor = 305 ZSTD_selectBlockCompressor(cctx->appliedParams.cParams.strategy, 0); 306 U32* const repToConfirm = seqStorePtr->repToConfirm; 307 U32 savedRep[ZSTD_REP_NUM]; 308 U64 rollingHash = 0; 309 const BYTE* lastHashed = NULL; 310 size_t i, lastLiterals; 311 312 /* Save seqStorePtr->rep and copy repToConfirm */ 313 for (i = 0; i < ZSTD_REP_NUM; i++) 314 savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i]; 315 316 /* Main Search Loop */ 317 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ 318 size_t mLength; 319 U32 const current = (U32)(ip - base); 320 size_t forwardMatchLength = 0, backwardMatchLength = 0; 321 ldmEntry_t* bestEntry = NULL; 322 if (ip != istart) { 323 rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0], 324 lastHashed[ldmParams.minMatchLength], 325 hashPower); 326 } else { 327 rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength); 328 } 329 lastHashed = ip; 330 331 /* Do not insert and do not look for a match */ 332 if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) != 333 ldmTagMask) { 334 ip++; 335 continue; 336 } 337 338 /* Get the best entry and compute the match lengths */ 339 { 340 ldmEntry_t* const bucket = 341 ZSTD_ldm_getBucket(ldmState, 342 ZSTD_ldm_getSmallHash(rollingHash, hBits), 343 ldmParams); 344 ldmEntry_t* cur; 345 size_t bestMatchLength = 0; 346 U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); 347 348 for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { 349 const BYTE* const pMatch = cur->offset + base; 350 size_t curForwardMatchLength, curBackwardMatchLength, 351 curTotalMatchLength; 352 if (cur->checksum != checksum || cur->offset <= lowestIndex) { 353 continue; 354 } 355 356 curForwardMatchLength = ZSTD_count(ip, pMatch, iend); 357 if (curForwardMatchLength < ldmParams.minMatchLength) { 358 continue; 359 } 360 curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch( 361 ip, anchor, pMatch, lowest); 362 curTotalMatchLength = curForwardMatchLength + 363 curBackwardMatchLength; 364 365 if (curTotalMatchLength > bestMatchLength) { 366 bestMatchLength = curTotalMatchLength; 367 forwardMatchLength = curForwardMatchLength; 368 backwardMatchLength = curBackwardMatchLength; 369 bestEntry = cur; 370 } 371 } 372 } 373 374 /* No match found -- continue searching */ 375 if (bestEntry == NULL) { 376 ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, 377 hBits, current, 378 ldmParams); 379 ip++; 380 continue; 381 } 382 383 /* Match found */ 384 mLength = forwardMatchLength + backwardMatchLength; 385 ip -= backwardMatchLength; 386 387 /* Call the block compressor on the remaining literals */ 388 { 389 U32 const matchIndex = bestEntry->offset; 390 const BYTE* const match = base + matchIndex - backwardMatchLength; 391 U32 const offset = (U32)(ip - match); 392 393 /* Overwrite rep codes */ 394 for (i = 0; i < ZSTD_REP_NUM; i++) 395 seqStorePtr->rep[i] = repToConfirm[i]; 396 397 /* Fill tables for block compressor */ 398 ZSTD_ldm_limitTableUpdate(cctx, anchor); 399 ZSTD_ldm_fillFastTables(cctx, anchor); 400 401 /* Call block compressor and get remaining literals */ 402 lastLiterals = blockCompressor(cctx, anchor, ip - anchor); 403 cctx->nextToUpdate = (U32)(ip - base); 404 405 /* Update repToConfirm with the new offset */ 406 for (i = ZSTD_REP_NUM - 1; i > 0; i--) 407 repToConfirm[i] = repToConfirm[i-1]; 408 repToConfirm[0] = offset; 409 410 /* Store the sequence with the leftover literals */ 411 ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals, 412 offset + ZSTD_REP_MOVE, mLength - MINMATCH); 413 } 414 415 /* Insert the current entry into the hash table */ 416 ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, 417 (U32)(lastHashed - base), 418 ldmParams); 419 420 assert(ip + backwardMatchLength == lastHashed); 421 422 /* Fill the hash table from lastHashed+1 to ip+mLength*/ 423 /* Heuristic: don't need to fill the entire table at end of block */ 424 if (ip + mLength < ilimit) { 425 rollingHash = ZSTD_ldm_fillLdmHashTable( 426 ldmState, rollingHash, lastHashed, 427 ip + mLength, base, hBits, ldmParams); 428 lastHashed = ip + mLength - 1; 429 } 430 ip += mLength; 431 anchor = ip; 432 /* Check immediate repcode */ 433 while ( (ip < ilimit) 434 && ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest)) 435 && (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) { 436 437 size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1], 438 iend) + 4; 439 /* Swap repToConfirm[1] <=> repToConfirm[0] */ 440 { 441 U32 const tmpOff = repToConfirm[1]; 442 repToConfirm[1] = repToConfirm[0]; 443 repToConfirm[0] = tmpOff; 444 } 445 446 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); 447 448 /* Fill the hash table from lastHashed+1 to ip+rLength*/ 449 if (ip + rLength < ilimit) { 450 rollingHash = ZSTD_ldm_fillLdmHashTable( 451 ldmState, rollingHash, lastHashed, 452 ip + rLength, base, hBits, ldmParams); 453 lastHashed = ip + rLength - 1; 454 } 455 ip += rLength; 456 anchor = ip; 457 } 458 } 459 460 /* Overwrite rep */ 461 for (i = 0; i < ZSTD_REP_NUM; i++) 462 seqStorePtr->rep[i] = repToConfirm[i]; 463 464 ZSTD_ldm_limitTableUpdate(cctx, anchor); 465 ZSTD_ldm_fillFastTables(cctx, anchor); 466 467 lastLiterals = blockCompressor(cctx, anchor, iend - anchor); 468 cctx->nextToUpdate = (U32)(iend - base); 469 470 /* Restore seqStorePtr->rep */ 471 for (i = 0; i < ZSTD_REP_NUM; i++) 472 seqStorePtr->rep[i] = savedRep[i]; 473 474 /* Return the last literals size */ 475 return lastLiterals; 476 } 477 478 size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* ctx, 479 const void* src, size_t srcSize) 480 { 481 return ZSTD_compressBlock_ldm_generic(ctx, src, srcSize); 482 } 483 484 static size_t ZSTD_compressBlock_ldm_extDict_generic( 485 ZSTD_CCtx* ctx, 486 const void* src, size_t srcSize) 487 { 488 ldmState_t* const ldmState = &(ctx->ldmState); 489 const ldmParams_t ldmParams = ctx->appliedParams.ldmParams; 490 const U64 hashPower = ldmState->hashPower; 491 const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog; 492 const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog); 493 const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1; 494 seqStore_t* const seqStorePtr = &(ctx->seqStore); 495 const BYTE* const base = ctx->base; 496 const BYTE* const dictBase = ctx->dictBase; 497 const BYTE* const istart = (const BYTE*)src; 498 const BYTE* ip = istart; 499 const BYTE* anchor = istart; 500 const U32 lowestIndex = ctx->lowLimit; 501 const BYTE* const dictStart = dictBase + lowestIndex; 502 const U32 dictLimit = ctx->dictLimit; 503 const BYTE* const lowPrefixPtr = base + dictLimit; 504 const BYTE* const dictEnd = dictBase + dictLimit; 505 const BYTE* const iend = istart + srcSize; 506 const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE); 507 508 const ZSTD_blockCompressor blockCompressor = 509 ZSTD_selectBlockCompressor(ctx->appliedParams.cParams.strategy, 1); 510 U32* const repToConfirm = seqStorePtr->repToConfirm; 511 U32 savedRep[ZSTD_REP_NUM]; 512 U64 rollingHash = 0; 513 const BYTE* lastHashed = NULL; 514 size_t i, lastLiterals; 515 516 /* Save seqStorePtr->rep and copy repToConfirm */ 517 for (i = 0; i < ZSTD_REP_NUM; i++) { 518 savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i]; 519 } 520 521 /* Search Loop */ 522 while (ip < ilimit) { /* < instead of <=, because (ip+1) */ 523 size_t mLength; 524 const U32 current = (U32)(ip-base); 525 size_t forwardMatchLength = 0, backwardMatchLength = 0; 526 ldmEntry_t* bestEntry = NULL; 527 if (ip != istart) { 528 rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0], 529 lastHashed[ldmParams.minMatchLength], 530 hashPower); 531 } else { 532 rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength); 533 } 534 lastHashed = ip; 535 536 if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) != 537 ldmTagMask) { 538 /* Don't insert and don't look for a match */ 539 ip++; 540 continue; 541 } 542 543 /* Get the best entry and compute the match lengths */ 544 { 545 ldmEntry_t* const bucket = 546 ZSTD_ldm_getBucket(ldmState, 547 ZSTD_ldm_getSmallHash(rollingHash, hBits), 548 ldmParams); 549 ldmEntry_t* cur; 550 size_t bestMatchLength = 0; 551 U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); 552 553 for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { 554 const BYTE* const curMatchBase = 555 cur->offset < dictLimit ? dictBase : base; 556 const BYTE* const pMatch = curMatchBase + cur->offset; 557 const BYTE* const matchEnd = 558 cur->offset < dictLimit ? dictEnd : iend; 559 const BYTE* const lowMatchPtr = 560 cur->offset < dictLimit ? dictStart : lowPrefixPtr; 561 size_t curForwardMatchLength, curBackwardMatchLength, 562 curTotalMatchLength; 563 564 if (cur->checksum != checksum || cur->offset <= lowestIndex) { 565 continue; 566 } 567 568 curForwardMatchLength = ZSTD_count_2segments( 569 ip, pMatch, iend, 570 matchEnd, lowPrefixPtr); 571 if (curForwardMatchLength < ldmParams.minMatchLength) { 572 continue; 573 } 574 curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch( 575 ip, anchor, pMatch, lowMatchPtr); 576 curTotalMatchLength = curForwardMatchLength + 577 curBackwardMatchLength; 578 579 if (curTotalMatchLength > bestMatchLength) { 580 bestMatchLength = curTotalMatchLength; 581 forwardMatchLength = curForwardMatchLength; 582 backwardMatchLength = curBackwardMatchLength; 583 bestEntry = cur; 584 } 585 } 586 } 587 588 /* No match found -- continue searching */ 589 if (bestEntry == NULL) { 590 ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, 591 (U32)(lastHashed - base), 592 ldmParams); 593 ip++; 594 continue; 595 } 596 597 /* Match found */ 598 mLength = forwardMatchLength + backwardMatchLength; 599 ip -= backwardMatchLength; 600 601 /* Call the block compressor on the remaining literals */ 602 { 603 /* ip = current - backwardMatchLength 604 * The match is at (bestEntry->offset - backwardMatchLength) */ 605 U32 const matchIndex = bestEntry->offset; 606 U32 const offset = current - matchIndex; 607 608 /* Overwrite rep codes */ 609 for (i = 0; i < ZSTD_REP_NUM; i++) 610 seqStorePtr->rep[i] = repToConfirm[i]; 611 612 /* Fill the hash table for the block compressor */ 613 ZSTD_ldm_limitTableUpdate(ctx, anchor); 614 ZSTD_ldm_fillFastTables(ctx, anchor); 615 616 /* Call block compressor and get remaining literals */ 617 lastLiterals = blockCompressor(ctx, anchor, ip - anchor); 618 ctx->nextToUpdate = (U32)(ip - base); 619 620 /* Update repToConfirm with the new offset */ 621 for (i = ZSTD_REP_NUM - 1; i > 0; i--) 622 repToConfirm[i] = repToConfirm[i-1]; 623 repToConfirm[0] = offset; 624 625 /* Store the sequence with the leftover literals */ 626 ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals, 627 offset + ZSTD_REP_MOVE, mLength - MINMATCH); 628 } 629 630 /* Insert the current entry into the hash table */ 631 ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, 632 (U32)(lastHashed - base), 633 ldmParams); 634 635 /* Fill the hash table from lastHashed+1 to ip+mLength */ 636 assert(ip + backwardMatchLength == lastHashed); 637 if (ip + mLength < ilimit) { 638 rollingHash = ZSTD_ldm_fillLdmHashTable( 639 ldmState, rollingHash, lastHashed, 640 ip + mLength, base, hBits, 641 ldmParams); 642 lastHashed = ip + mLength - 1; 643 } 644 ip += mLength; 645 anchor = ip; 646 647 /* check immediate repcode */ 648 while (ip < ilimit) { 649 U32 const current2 = (U32)(ip-base); 650 U32 const repIndex2 = current2 - repToConfirm[1]; 651 const BYTE* repMatch2 = repIndex2 < dictLimit ? 652 dictBase + repIndex2 : base + repIndex2; 653 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & 654 (repIndex2 > lowestIndex)) /* intentional overflow */ 655 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 656 const BYTE* const repEnd2 = repIndex2 < dictLimit ? 657 dictEnd : iend; 658 size_t const repLength2 = 659 ZSTD_count_2segments(ip+4, repMatch2+4, iend, 660 repEnd2, lowPrefixPtr) + 4; 661 662 U32 tmpOffset = repToConfirm[1]; 663 repToConfirm[1] = repToConfirm[0]; 664 repToConfirm[0] = tmpOffset; 665 666 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); 667 668 /* Fill the hash table from lastHashed+1 to ip+repLength2*/ 669 if (ip + repLength2 < ilimit) { 670 rollingHash = ZSTD_ldm_fillLdmHashTable( 671 ldmState, rollingHash, lastHashed, 672 ip + repLength2, base, hBits, 673 ldmParams); 674 lastHashed = ip + repLength2 - 1; 675 } 676 ip += repLength2; 677 anchor = ip; 678 continue; 679 } 680 break; 681 } 682 } 683 684 /* Overwrite rep */ 685 for (i = 0; i < ZSTD_REP_NUM; i++) 686 seqStorePtr->rep[i] = repToConfirm[i]; 687 688 ZSTD_ldm_limitTableUpdate(ctx, anchor); 689 ZSTD_ldm_fillFastTables(ctx, anchor); 690 691 /* Call the block compressor one last time on the last literals */ 692 lastLiterals = blockCompressor(ctx, anchor, iend - anchor); 693 ctx->nextToUpdate = (U32)(iend - base); 694 695 /* Restore seqStorePtr->rep */ 696 for (i = 0; i < ZSTD_REP_NUM; i++) 697 seqStorePtr->rep[i] = savedRep[i]; 698 699 /* Return the last literals size */ 700 return lastLiterals; 701 } 702 703 size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx, 704 const void* src, size_t srcSize) 705 { 706 return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize); 707 } 708