1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only 2 /* 3 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 4 * All rights reserved. 5 * 6 * This source code is licensed under both the BSD-style license (found in the 7 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 8 * in the COPYING file in the root directory of this source tree). 9 * You may select, at your option, one of the above-listed licenses. 10 */ 11 12 #include "zstd_compress_internal.h" 13 #include "zstd_lazy.h" 14 15 16 /*-************************************* 17 * Binary Tree search 18 ***************************************/ 19 20 static void 21 ZSTD_updateDUBT(ZSTD_matchState_t* ms, 22 const BYTE* ip, const BYTE* iend, 23 U32 mls) 24 { 25 const ZSTD_compressionParameters* const cParams = &ms->cParams; 26 U32* const hashTable = ms->hashTable; 27 U32 const hashLog = cParams->hashLog; 28 29 U32* const bt = ms->chainTable; 30 U32 const btLog = cParams->chainLog - 1; 31 U32 const btMask = (1 << btLog) - 1; 32 33 const BYTE* const base = ms->window.base; 34 U32 const target = (U32)(ip - base); 35 U32 idx = ms->nextToUpdate; 36 37 if (idx != target) 38 DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", 39 idx, target, ms->window.dictLimit); 40 assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ 41 (void)iend; 42 43 assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ 44 for ( ; idx < target ; idx++) { 45 size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ 46 U32 const matchIndex = hashTable[h]; 47 48 U32* const nextCandidatePtr = bt + 2*(idx&btMask); 49 U32* const sortMarkPtr = nextCandidatePtr + 1; 50 51 DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); 52 hashTable[h] = idx; /* Update Hash Table */ 53 *nextCandidatePtr = matchIndex; /* update BT like a chain */ 54 *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; 55 } 56 ms->nextToUpdate = target; 57 } 58 59 60 /** ZSTD_insertDUBT1() : 61 * sort one already inserted but unsorted position 62 * assumption : current >= btlow == (current - btmask) 63 * doesn't fail */ 64 static void 65 ZSTD_insertDUBT1(ZSTD_matchState_t* ms, 66 U32 current, const BYTE* inputEnd, 67 U32 nbCompares, U32 btLow, 68 const ZSTD_dictMode_e dictMode) 69 { 70 const ZSTD_compressionParameters* const cParams = &ms->cParams; 71 U32* const bt = ms->chainTable; 72 U32 const btLog = cParams->chainLog - 1; 73 U32 const btMask = (1 << btLog) - 1; 74 size_t commonLengthSmaller=0, commonLengthLarger=0; 75 const BYTE* const base = ms->window.base; 76 const BYTE* const dictBase = ms->window.dictBase; 77 const U32 dictLimit = ms->window.dictLimit; 78 const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current; 79 const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit; 80 const BYTE* const dictEnd = dictBase + dictLimit; 81 const BYTE* const prefixStart = base + dictLimit; 82 const BYTE* match; 83 U32* smallerPtr = bt + 2*(current&btMask); 84 U32* largerPtr = smallerPtr + 1; 85 U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ 86 U32 dummy32; /* to be nullified at the end */ 87 U32 const windowValid = ms->window.lowLimit; 88 U32 const maxDistance = 1U << cParams->windowLog; 89 U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid; 90 91 92 DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", 93 current, dictLimit, windowLow); 94 assert(current >= btLow); 95 assert(ip < iend); /* condition for ZSTD_count */ 96 97 while (nbCompares-- && (matchIndex > windowLow)) { 98 U32* const nextPtr = bt + 2*(matchIndex & btMask); 99 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 100 assert(matchIndex < current); 101 /* note : all candidates are now supposed sorted, 102 * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK 103 * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ 104 105 if ( (dictMode != ZSTD_extDict) 106 || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ 107 || (current < dictLimit) /* both in extDict */) { 108 const BYTE* const mBase = ( (dictMode != ZSTD_extDict) 109 || (matchIndex+matchLength >= dictLimit)) ? 110 base : dictBase; 111 assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ 112 || (current < dictLimit) ); 113 match = mBase + matchIndex; 114 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); 115 } else { 116 match = dictBase + matchIndex; 117 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 118 if (matchIndex+matchLength >= dictLimit) 119 match = base + matchIndex; /* preparation for next read of match[matchLength] */ 120 } 121 122 DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", 123 current, matchIndex, (U32)matchLength); 124 125 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ 126 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ 127 } 128 129 if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ 130 /* match is smaller than current */ 131 *smallerPtr = matchIndex; /* update smaller idx */ 132 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 133 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 134 DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", 135 matchIndex, btLow, nextPtr[1]); 136 smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ 137 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ 138 } else { 139 /* match is larger than current */ 140 *largerPtr = matchIndex; 141 commonLengthLarger = matchLength; 142 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 143 DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", 144 matchIndex, btLow, nextPtr[0]); 145 largerPtr = nextPtr; 146 matchIndex = nextPtr[0]; 147 } } 148 149 *smallerPtr = *largerPtr = 0; 150 } 151 152 153 static size_t 154 ZSTD_DUBT_findBetterDictMatch ( 155 ZSTD_matchState_t* ms, 156 const BYTE* const ip, const BYTE* const iend, 157 size_t* offsetPtr, 158 size_t bestLength, 159 U32 nbCompares, 160 U32 const mls, 161 const ZSTD_dictMode_e dictMode) 162 { 163 const ZSTD_matchState_t * const dms = ms->dictMatchState; 164 const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; 165 const U32 * const dictHashTable = dms->hashTable; 166 U32 const hashLog = dmsCParams->hashLog; 167 size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 168 U32 dictMatchIndex = dictHashTable[h]; 169 170 const BYTE* const base = ms->window.base; 171 const BYTE* const prefixStart = base + ms->window.dictLimit; 172 U32 const current = (U32)(ip-base); 173 const BYTE* const dictBase = dms->window.base; 174 const BYTE* const dictEnd = dms->window.nextSrc; 175 U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); 176 U32 const dictLowLimit = dms->window.lowLimit; 177 U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; 178 179 U32* const dictBt = dms->chainTable; 180 U32 const btLog = dmsCParams->chainLog - 1; 181 U32 const btMask = (1 << btLog) - 1; 182 U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask; 183 184 size_t commonLengthSmaller=0, commonLengthLarger=0; 185 186 (void)dictMode; 187 assert(dictMode == ZSTD_dictMatchState); 188 189 while (nbCompares-- && (dictMatchIndex > dictLowLimit)) { 190 U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask); 191 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 192 const BYTE* match = dictBase + dictMatchIndex; 193 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 194 if (dictMatchIndex+matchLength >= dictHighLimit) 195 match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */ 196 197 if (matchLength > bestLength) { 198 U32 matchIndex = dictMatchIndex + dictIndexDelta; 199 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { 200 DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", 201 current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex); 202 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; 203 } 204 if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ 205 break; /* drop, to guarantee consistency (miss a little bit of compression) */ 206 } 207 } 208 209 if (match[matchLength] < ip[matchLength]) { 210 if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ 211 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 212 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 213 } else { 214 /* match is larger than current */ 215 if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ 216 commonLengthLarger = matchLength; 217 dictMatchIndex = nextPtr[0]; 218 } 219 } 220 221 if (bestLength >= MINMATCH) { 222 U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; 223 DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", 224 current, (U32)bestLength, (U32)*offsetPtr, mIndex); 225 } 226 return bestLength; 227 228 } 229 230 231 static size_t 232 ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, 233 const BYTE* const ip, const BYTE* const iend, 234 size_t* offsetPtr, 235 U32 const mls, 236 const ZSTD_dictMode_e dictMode) 237 { 238 const ZSTD_compressionParameters* const cParams = &ms->cParams; 239 U32* const hashTable = ms->hashTable; 240 U32 const hashLog = cParams->hashLog; 241 size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 242 U32 matchIndex = hashTable[h]; 243 244 const BYTE* const base = ms->window.base; 245 U32 const current = (U32)(ip-base); 246 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); 247 248 U32* const bt = ms->chainTable; 249 U32 const btLog = cParams->chainLog - 1; 250 U32 const btMask = (1 << btLog) - 1; 251 U32 const btLow = (btMask >= current) ? 0 : current - btMask; 252 U32 const unsortLimit = MAX(btLow, windowLow); 253 254 U32* nextCandidate = bt + 2*(matchIndex&btMask); 255 U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; 256 U32 nbCompares = 1U << cParams->searchLog; 257 U32 nbCandidates = nbCompares; 258 U32 previousCandidate = 0; 259 260 DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current); 261 assert(ip <= iend-8); /* required for h calculation */ 262 263 /* reach end of unsorted candidates list */ 264 while ( (matchIndex > unsortLimit) 265 && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) 266 && (nbCandidates > 1) ) { 267 DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", 268 matchIndex); 269 *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ 270 previousCandidate = matchIndex; 271 matchIndex = *nextCandidate; 272 nextCandidate = bt + 2*(matchIndex&btMask); 273 unsortedMark = bt + 2*(matchIndex&btMask) + 1; 274 nbCandidates --; 275 } 276 277 /* nullify last candidate if it's still unsorted 278 * simplification, detrimental to compression ratio, beneficial for speed */ 279 if ( (matchIndex > unsortLimit) 280 && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { 281 DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", 282 matchIndex); 283 *nextCandidate = *unsortedMark = 0; 284 } 285 286 /* batch sort stacked candidates */ 287 matchIndex = previousCandidate; 288 while (matchIndex) { /* will end on matchIndex == 0 */ 289 U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; 290 U32 const nextCandidateIdx = *nextCandidateIdxPtr; 291 ZSTD_insertDUBT1(ms, matchIndex, iend, 292 nbCandidates, unsortLimit, dictMode); 293 matchIndex = nextCandidateIdx; 294 nbCandidates++; 295 } 296 297 /* find longest match */ 298 { size_t commonLengthSmaller = 0, commonLengthLarger = 0; 299 const BYTE* const dictBase = ms->window.dictBase; 300 const U32 dictLimit = ms->window.dictLimit; 301 const BYTE* const dictEnd = dictBase + dictLimit; 302 const BYTE* const prefixStart = base + dictLimit; 303 U32* smallerPtr = bt + 2*(current&btMask); 304 U32* largerPtr = bt + 2*(current&btMask) + 1; 305 U32 matchEndIdx = current + 8 + 1; 306 U32 dummy32; /* to be nullified at the end */ 307 size_t bestLength = 0; 308 309 matchIndex = hashTable[h]; 310 hashTable[h] = current; /* Update Hash Table */ 311 312 while (nbCompares-- && (matchIndex > windowLow)) { 313 U32* const nextPtr = bt + 2*(matchIndex & btMask); 314 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 315 const BYTE* match; 316 317 if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { 318 match = base + matchIndex; 319 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); 320 } else { 321 match = dictBase + matchIndex; 322 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 323 if (matchIndex+matchLength >= dictLimit) 324 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ 325 } 326 327 if (matchLength > bestLength) { 328 if (matchLength > matchEndIdx - matchIndex) 329 matchEndIdx = matchIndex + (U32)matchLength; 330 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) 331 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; 332 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ 333 if (dictMode == ZSTD_dictMatchState) { 334 nbCompares = 0; /* in addition to avoiding checking any 335 * further in this loop, make sure we 336 * skip checking in the dictionary. */ 337 } 338 break; /* drop, to guarantee consistency (miss a little bit of compression) */ 339 } 340 } 341 342 if (match[matchLength] < ip[matchLength]) { 343 /* match is smaller than current */ 344 *smallerPtr = matchIndex; /* update smaller idx */ 345 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 346 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 347 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ 348 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 349 } else { 350 /* match is larger than current */ 351 *largerPtr = matchIndex; 352 commonLengthLarger = matchLength; 353 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 354 largerPtr = nextPtr; 355 matchIndex = nextPtr[0]; 356 } } 357 358 *smallerPtr = *largerPtr = 0; 359 360 if (dictMode == ZSTD_dictMatchState && nbCompares) { 361 bestLength = ZSTD_DUBT_findBetterDictMatch( 362 ms, ip, iend, 363 offsetPtr, bestLength, nbCompares, 364 mls, dictMode); 365 } 366 367 assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */ 368 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ 369 if (bestLength >= MINMATCH) { 370 U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; 371 DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", 372 current, (U32)bestLength, (U32)*offsetPtr, mIndex); 373 } 374 return bestLength; 375 } 376 } 377 378 379 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ 380 FORCE_INLINE_TEMPLATE size_t 381 ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, 382 const BYTE* const ip, const BYTE* const iLimit, 383 size_t* offsetPtr, 384 const U32 mls /* template */, 385 const ZSTD_dictMode_e dictMode) 386 { 387 DEBUGLOG(7, "ZSTD_BtFindBestMatch"); 388 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ 389 ZSTD_updateDUBT(ms, ip, iLimit, mls); 390 return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); 391 } 392 393 394 static size_t 395 ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms, 396 const BYTE* ip, const BYTE* const iLimit, 397 size_t* offsetPtr) 398 { 399 switch(ms->cParams.minMatch) 400 { 401 default : /* includes case 3 */ 402 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); 403 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); 404 case 7 : 405 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); 406 } 407 } 408 409 410 static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( 411 ZSTD_matchState_t* ms, 412 const BYTE* ip, const BYTE* const iLimit, 413 size_t* offsetPtr) 414 { 415 switch(ms->cParams.minMatch) 416 { 417 default : /* includes case 3 */ 418 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); 419 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); 420 case 7 : 421 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); 422 } 423 } 424 425 426 static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( 427 ZSTD_matchState_t* ms, 428 const BYTE* ip, const BYTE* const iLimit, 429 size_t* offsetPtr) 430 { 431 switch(ms->cParams.minMatch) 432 { 433 default : /* includes case 3 */ 434 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); 435 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); 436 case 7 : 437 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); 438 } 439 } 440 441 442 443 /* ********************************* 444 * Hash Chain 445 ***********************************/ 446 #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] 447 448 /* Update chains up to ip (excluded) 449 Assumption : always within prefix (i.e. not within extDict) */ 450 static U32 ZSTD_insertAndFindFirstIndex_internal( 451 ZSTD_matchState_t* ms, 452 const ZSTD_compressionParameters* const cParams, 453 const BYTE* ip, U32 const mls) 454 { 455 U32* const hashTable = ms->hashTable; 456 const U32 hashLog = cParams->hashLog; 457 U32* const chainTable = ms->chainTable; 458 const U32 chainMask = (1 << cParams->chainLog) - 1; 459 const BYTE* const base = ms->window.base; 460 const U32 target = (U32)(ip - base); 461 U32 idx = ms->nextToUpdate; 462 463 while(idx < target) { /* catch up */ 464 size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); 465 NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; 466 hashTable[h] = idx; 467 idx++; 468 } 469 470 ms->nextToUpdate = target; 471 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; 472 } 473 474 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { 475 const ZSTD_compressionParameters* const cParams = &ms->cParams; 476 return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); 477 } 478 479 480 /* inlining is important to hardwire a hot branch (template emulation) */ 481 FORCE_INLINE_TEMPLATE 482 size_t ZSTD_HcFindBestMatch_generic ( 483 ZSTD_matchState_t* ms, 484 const BYTE* const ip, const BYTE* const iLimit, 485 size_t* offsetPtr, 486 const U32 mls, const ZSTD_dictMode_e dictMode) 487 { 488 const ZSTD_compressionParameters* const cParams = &ms->cParams; 489 U32* const chainTable = ms->chainTable; 490 const U32 chainSize = (1 << cParams->chainLog); 491 const U32 chainMask = chainSize-1; 492 const BYTE* const base = ms->window.base; 493 const BYTE* const dictBase = ms->window.dictBase; 494 const U32 dictLimit = ms->window.dictLimit; 495 const BYTE* const prefixStart = base + dictLimit; 496 const BYTE* const dictEnd = dictBase + dictLimit; 497 const U32 current = (U32)(ip-base); 498 const U32 maxDistance = 1U << cParams->windowLog; 499 const U32 lowestValid = ms->window.lowLimit; 500 const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; 501 const U32 isDictionary = (ms->loadedDictEnd != 0); 502 const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; 503 const U32 minChain = current > chainSize ? current - chainSize : 0; 504 U32 nbAttempts = 1U << cParams->searchLog; 505 size_t ml=4-1; 506 507 /* HC4 match finder */ 508 U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); 509 510 for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { 511 size_t currentMl=0; 512 if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { 513 const BYTE* const match = base + matchIndex; 514 assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ 515 if (match[ml] == ip[ml]) /* potentially better */ 516 currentMl = ZSTD_count(ip, match, iLimit); 517 } else { 518 const BYTE* const match = dictBase + matchIndex; 519 assert(match+4 <= dictEnd); 520 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ 521 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; 522 } 523 524 /* save best solution */ 525 if (currentMl > ml) { 526 ml = currentMl; 527 *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; 528 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ 529 } 530 531 if (matchIndex <= minChain) break; 532 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); 533 } 534 535 if (dictMode == ZSTD_dictMatchState) { 536 const ZSTD_matchState_t* const dms = ms->dictMatchState; 537 const U32* const dmsChainTable = dms->chainTable; 538 const U32 dmsChainSize = (1 << dms->cParams.chainLog); 539 const U32 dmsChainMask = dmsChainSize - 1; 540 const U32 dmsLowestIndex = dms->window.dictLimit; 541 const BYTE* const dmsBase = dms->window.base; 542 const BYTE* const dmsEnd = dms->window.nextSrc; 543 const U32 dmsSize = (U32)(dmsEnd - dmsBase); 544 const U32 dmsIndexDelta = dictLimit - dmsSize; 545 const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; 546 547 matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; 548 549 for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { 550 size_t currentMl=0; 551 const BYTE* const match = dmsBase + matchIndex; 552 assert(match+4 <= dmsEnd); 553 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ 554 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; 555 556 /* save best solution */ 557 if (currentMl > ml) { 558 ml = currentMl; 559 *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; 560 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ 561 } 562 563 if (matchIndex <= dmsMinChain) break; 564 matchIndex = dmsChainTable[matchIndex & dmsChainMask]; 565 } 566 } 567 568 return ml; 569 } 570 571 572 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( 573 ZSTD_matchState_t* ms, 574 const BYTE* ip, const BYTE* const iLimit, 575 size_t* offsetPtr) 576 { 577 switch(ms->cParams.minMatch) 578 { 579 default : /* includes case 3 */ 580 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); 581 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); 582 case 7 : 583 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); 584 } 585 } 586 587 588 static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( 589 ZSTD_matchState_t* ms, 590 const BYTE* ip, const BYTE* const iLimit, 591 size_t* offsetPtr) 592 { 593 switch(ms->cParams.minMatch) 594 { 595 default : /* includes case 3 */ 596 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); 597 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); 598 case 7 : 599 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); 600 } 601 } 602 603 604 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( 605 ZSTD_matchState_t* ms, 606 const BYTE* ip, const BYTE* const iLimit, 607 size_t* offsetPtr) 608 { 609 switch(ms->cParams.minMatch) 610 { 611 default : /* includes case 3 */ 612 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); 613 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); 614 case 7 : 615 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); 616 } 617 } 618 619 620 /* ******************************* 621 * Common parser - lazy strategy 622 *********************************/ 623 typedef enum { search_hashChain, search_binaryTree } searchMethod_e; 624 625 FORCE_INLINE_TEMPLATE size_t 626 ZSTD_compressBlock_lazy_generic( 627 ZSTD_matchState_t* ms, seqStore_t* seqStore, 628 U32 rep[ZSTD_REP_NUM], 629 const void* src, size_t srcSize, 630 const searchMethod_e searchMethod, const U32 depth, 631 ZSTD_dictMode_e const dictMode) 632 { 633 const BYTE* const istart = (const BYTE*)src; 634 const BYTE* ip = istart; 635 const BYTE* anchor = istart; 636 const BYTE* const iend = istart + srcSize; 637 const BYTE* const ilimit = iend - 8; 638 const BYTE* const base = ms->window.base; 639 const U32 prefixLowestIndex = ms->window.dictLimit; 640 const BYTE* const prefixLowest = base + prefixLowestIndex; 641 642 typedef size_t (*searchMax_f)( 643 ZSTD_matchState_t* ms, 644 const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); 645 searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? 646 (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS 647 : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : 648 (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS 649 : ZSTD_HcFindBestMatch_selectMLS); 650 U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; 651 652 const ZSTD_matchState_t* const dms = ms->dictMatchState; 653 const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ? 654 dms->window.dictLimit : 0; 655 const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? 656 dms->window.base : NULL; 657 const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ? 658 dictBase + dictLowestIndex : NULL; 659 const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? 660 dms->window.nextSrc : NULL; 661 const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? 662 prefixLowestIndex - (U32)(dictEnd - dictBase) : 663 0; 664 const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); 665 666 DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); 667 668 /* init */ 669 ip += (dictAndPrefixLength == 0); 670 if (dictMode == ZSTD_noDict) { 671 U32 const current = (U32)(ip - base); 672 U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog); 673 U32 const maxRep = current - windowLow; 674 if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; 675 if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; 676 } 677 if (dictMode == ZSTD_dictMatchState) { 678 /* dictMatchState repCode checks don't currently handle repCode == 0 679 * disabling. */ 680 assert(offset_1 <= dictAndPrefixLength); 681 assert(offset_2 <= dictAndPrefixLength); 682 } 683 684 /* Match Loop */ 685 #if defined(__GNUC__) && defined(__x86_64__) 686 /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the 687 * code alignment is perturbed. To fix the instability align the loop on 32-bytes. 688 */ 689 __asm__(".p2align 5"); 690 #endif 691 while (ip < ilimit) { 692 size_t matchLength=0; 693 size_t offset=0; 694 const BYTE* start=ip+1; 695 696 /* check repCode */ 697 if (dictMode == ZSTD_dictMatchState) { 698 const U32 repIndex = (U32)(ip - base) + 1 - offset_1; 699 const BYTE* repMatch = (dictMode == ZSTD_dictMatchState 700 && repIndex < prefixLowestIndex) ? 701 dictBase + (repIndex - dictIndexDelta) : 702 base + repIndex; 703 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) 704 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 705 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 706 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; 707 if (depth==0) goto _storeSequence; 708 } 709 } 710 if ( dictMode == ZSTD_noDict 711 && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { 712 matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; 713 if (depth==0) goto _storeSequence; 714 } 715 716 /* first search (depth 0) */ 717 { size_t offsetFound = 999999999; 718 size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); 719 if (ml2 > matchLength) 720 matchLength = ml2, start = ip, offset=offsetFound; 721 } 722 723 if (matchLength < 4) { 724 ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ 725 continue; 726 } 727 728 /* let's try to find a better solution */ 729 if (depth>=1) 730 while (ip<ilimit) { 731 ip ++; 732 if ( (dictMode == ZSTD_noDict) 733 && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { 734 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; 735 int const gain2 = (int)(mlRep * 3); 736 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); 737 if ((mlRep >= 4) && (gain2 > gain1)) 738 matchLength = mlRep, offset = 0, start = ip; 739 } 740 if (dictMode == ZSTD_dictMatchState) { 741 const U32 repIndex = (U32)(ip - base) - offset_1; 742 const BYTE* repMatch = repIndex < prefixLowestIndex ? 743 dictBase + (repIndex - dictIndexDelta) : 744 base + repIndex; 745 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) 746 && (MEM_read32(repMatch) == MEM_read32(ip)) ) { 747 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 748 size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; 749 int const gain2 = (int)(mlRep * 3); 750 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); 751 if ((mlRep >= 4) && (gain2 > gain1)) 752 matchLength = mlRep, offset = 0, start = ip; 753 } 754 } 755 { size_t offset2=999999999; 756 size_t const ml2 = searchMax(ms, ip, iend, &offset2); 757 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 758 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); 759 if ((ml2 >= 4) && (gain2 > gain1)) { 760 matchLength = ml2, offset = offset2, start = ip; 761 continue; /* search a better one */ 762 } } 763 764 /* let's find an even better one */ 765 if ((depth==2) && (ip<ilimit)) { 766 ip ++; 767 if ( (dictMode == ZSTD_noDict) 768 && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { 769 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; 770 int const gain2 = (int)(mlRep * 4); 771 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); 772 if ((mlRep >= 4) && (gain2 > gain1)) 773 matchLength = mlRep, offset = 0, start = ip; 774 } 775 if (dictMode == ZSTD_dictMatchState) { 776 const U32 repIndex = (U32)(ip - base) - offset_1; 777 const BYTE* repMatch = repIndex < prefixLowestIndex ? 778 dictBase + (repIndex - dictIndexDelta) : 779 base + repIndex; 780 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) 781 && (MEM_read32(repMatch) == MEM_read32(ip)) ) { 782 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 783 size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; 784 int const gain2 = (int)(mlRep * 4); 785 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); 786 if ((mlRep >= 4) && (gain2 > gain1)) 787 matchLength = mlRep, offset = 0, start = ip; 788 } 789 } 790 { size_t offset2=999999999; 791 size_t const ml2 = searchMax(ms, ip, iend, &offset2); 792 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 793 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); 794 if ((ml2 >= 4) && (gain2 > gain1)) { 795 matchLength = ml2, offset = offset2, start = ip; 796 continue; 797 } } } 798 break; /* nothing found : store previous solution */ 799 } 800 801 /* NOTE: 802 * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. 803 * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which 804 * overflows the pointer, which is undefined behavior. 805 */ 806 /* catch up */ 807 if (offset) { 808 if (dictMode == ZSTD_noDict) { 809 while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) 810 && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ 811 { start--; matchLength++; } 812 } 813 if (dictMode == ZSTD_dictMatchState) { 814 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); 815 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; 816 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; 817 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ 818 } 819 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); 820 } 821 /* store sequence */ 822 _storeSequence: 823 { size_t const litLength = start - anchor; 824 ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); 825 anchor = ip = start + matchLength; 826 } 827 828 /* check immediate repcode */ 829 if (dictMode == ZSTD_dictMatchState) { 830 while (ip <= ilimit) { 831 U32 const current2 = (U32)(ip-base); 832 U32 const repIndex = current2 - offset_2; 833 const BYTE* repMatch = dictMode == ZSTD_dictMatchState 834 && repIndex < prefixLowestIndex ? 835 dictBase - dictIndexDelta + repIndex : 836 base + repIndex; 837 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) 838 && (MEM_read32(repMatch) == MEM_read32(ip)) ) { 839 const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; 840 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; 841 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ 842 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); 843 ip += matchLength; 844 anchor = ip; 845 continue; 846 } 847 break; 848 } 849 } 850 851 if (dictMode == ZSTD_noDict) { 852 while ( ((ip <= ilimit) & (offset_2>0)) 853 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { 854 /* store sequence */ 855 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; 856 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ 857 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); 858 ip += matchLength; 859 anchor = ip; 860 continue; /* faster when present ... (?) */ 861 } } } 862 863 /* Save reps for next block */ 864 rep[0] = offset_1 ? offset_1 : savedOffset; 865 rep[1] = offset_2 ? offset_2 : savedOffset; 866 867 /* Return the last literals size */ 868 return (size_t)(iend - anchor); 869 } 870 871 872 size_t ZSTD_compressBlock_btlazy2( 873 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 874 void const* src, size_t srcSize) 875 { 876 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); 877 } 878 879 size_t ZSTD_compressBlock_lazy2( 880 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 881 void const* src, size_t srcSize) 882 { 883 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); 884 } 885 886 size_t ZSTD_compressBlock_lazy( 887 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 888 void const* src, size_t srcSize) 889 { 890 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); 891 } 892 893 size_t ZSTD_compressBlock_greedy( 894 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 895 void const* src, size_t srcSize) 896 { 897 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); 898 } 899 900 size_t ZSTD_compressBlock_btlazy2_dictMatchState( 901 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 902 void const* src, size_t srcSize) 903 { 904 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); 905 } 906 907 size_t ZSTD_compressBlock_lazy2_dictMatchState( 908 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 909 void const* src, size_t srcSize) 910 { 911 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); 912 } 913 914 size_t ZSTD_compressBlock_lazy_dictMatchState( 915 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 916 void const* src, size_t srcSize) 917 { 918 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); 919 } 920 921 size_t ZSTD_compressBlock_greedy_dictMatchState( 922 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 923 void const* src, size_t srcSize) 924 { 925 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); 926 } 927 928 929 FORCE_INLINE_TEMPLATE 930 size_t ZSTD_compressBlock_lazy_extDict_generic( 931 ZSTD_matchState_t* ms, seqStore_t* seqStore, 932 U32 rep[ZSTD_REP_NUM], 933 const void* src, size_t srcSize, 934 const searchMethod_e searchMethod, const U32 depth) 935 { 936 const BYTE* const istart = (const BYTE*)src; 937 const BYTE* ip = istart; 938 const BYTE* anchor = istart; 939 const BYTE* const iend = istart + srcSize; 940 const BYTE* const ilimit = iend - 8; 941 const BYTE* const base = ms->window.base; 942 const U32 dictLimit = ms->window.dictLimit; 943 const BYTE* const prefixStart = base + dictLimit; 944 const BYTE* const dictBase = ms->window.dictBase; 945 const BYTE* const dictEnd = dictBase + dictLimit; 946 const BYTE* const dictStart = dictBase + ms->window.lowLimit; 947 const U32 windowLog = ms->cParams.windowLog; 948 949 typedef size_t (*searchMax_f)( 950 ZSTD_matchState_t* ms, 951 const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); 952 searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; 953 954 U32 offset_1 = rep[0], offset_2 = rep[1]; 955 956 DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); 957 958 /* init */ 959 ip += (ip == prefixStart); 960 961 /* Match Loop */ 962 #if defined(__GNUC__) && defined(__x86_64__) 963 /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the 964 * code alignment is perturbed. To fix the instability align the loop on 32-bytes. 965 */ 966 __asm__(".p2align 5"); 967 #endif 968 while (ip < ilimit) { 969 size_t matchLength=0; 970 size_t offset=0; 971 const BYTE* start=ip+1; 972 U32 current = (U32)(ip-base); 973 974 /* check repCode */ 975 { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog); 976 const U32 repIndex = (U32)(current+1 - offset_1); 977 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 978 const BYTE* const repMatch = repBase + repIndex; 979 if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ 980 & (offset_1 < current+1 - windowLow) ) /* note: we are searching at current+1 */ 981 if (MEM_read32(ip+1) == MEM_read32(repMatch)) { 982 /* repcode detected we should take it */ 983 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 984 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; 985 if (depth==0) goto _storeSequence; 986 } } 987 988 /* first search (depth 0) */ 989 { size_t offsetFound = 999999999; 990 size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); 991 if (ml2 > matchLength) 992 matchLength = ml2, start = ip, offset=offsetFound; 993 } 994 995 if (matchLength < 4) { 996 ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ 997 continue; 998 } 999 1000 /* let's try to find a better solution */ 1001 if (depth>=1) 1002 while (ip<ilimit) { 1003 ip ++; 1004 current++; 1005 /* check repCode */ 1006 if (offset) { 1007 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog); 1008 const U32 repIndex = (U32)(current - offset_1); 1009 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 1010 const BYTE* const repMatch = repBase + repIndex; 1011 if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ 1012 & (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */ 1013 if (MEM_read32(ip) == MEM_read32(repMatch)) { 1014 /* repcode detected */ 1015 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 1016 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 1017 int const gain2 = (int)(repLength * 3); 1018 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); 1019 if ((repLength >= 4) && (gain2 > gain1)) 1020 matchLength = repLength, offset = 0, start = ip; 1021 } } 1022 1023 /* search match, depth 1 */ 1024 { size_t offset2=999999999; 1025 size_t const ml2 = searchMax(ms, ip, iend, &offset2); 1026 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 1027 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); 1028 if ((ml2 >= 4) && (gain2 > gain1)) { 1029 matchLength = ml2, offset = offset2, start = ip; 1030 continue; /* search a better one */ 1031 } } 1032 1033 /* let's find an even better one */ 1034 if ((depth==2) && (ip<ilimit)) { 1035 ip ++; 1036 current++; 1037 /* check repCode */ 1038 if (offset) { 1039 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog); 1040 const U32 repIndex = (U32)(current - offset_1); 1041 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 1042 const BYTE* const repMatch = repBase + repIndex; 1043 if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ 1044 & (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */ 1045 if (MEM_read32(ip) == MEM_read32(repMatch)) { 1046 /* repcode detected */ 1047 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 1048 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 1049 int const gain2 = (int)(repLength * 4); 1050 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); 1051 if ((repLength >= 4) && (gain2 > gain1)) 1052 matchLength = repLength, offset = 0, start = ip; 1053 } } 1054 1055 /* search match, depth 2 */ 1056 { size_t offset2=999999999; 1057 size_t const ml2 = searchMax(ms, ip, iend, &offset2); 1058 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 1059 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); 1060 if ((ml2 >= 4) && (gain2 > gain1)) { 1061 matchLength = ml2, offset = offset2, start = ip; 1062 continue; 1063 } } } 1064 break; /* nothing found : store previous solution */ 1065 } 1066 1067 /* catch up */ 1068 if (offset) { 1069 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); 1070 const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; 1071 const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; 1072 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ 1073 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); 1074 } 1075 1076 /* store sequence */ 1077 _storeSequence: 1078 { size_t const litLength = start - anchor; 1079 ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); 1080 anchor = ip = start + matchLength; 1081 } 1082 1083 /* check immediate repcode */ 1084 while (ip <= ilimit) { 1085 const U32 repCurrent = (U32)(ip-base); 1086 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); 1087 const U32 repIndex = repCurrent - offset_2; 1088 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 1089 const BYTE* const repMatch = repBase + repIndex; 1090 if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ 1091 & (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ 1092 if (MEM_read32(ip) == MEM_read32(repMatch)) { 1093 /* repcode detected we should take it */ 1094 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 1095 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 1096 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ 1097 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); 1098 ip += matchLength; 1099 anchor = ip; 1100 continue; /* faster when present ... (?) */ 1101 } 1102 break; 1103 } } 1104 1105 /* Save reps for next block */ 1106 rep[0] = offset_1; 1107 rep[1] = offset_2; 1108 1109 /* Return the last literals size */ 1110 return (size_t)(iend - anchor); 1111 } 1112 1113 1114 size_t ZSTD_compressBlock_greedy_extDict( 1115 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1116 void const* src, size_t srcSize) 1117 { 1118 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); 1119 } 1120 1121 size_t ZSTD_compressBlock_lazy_extDict( 1122 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1123 void const* src, size_t srcSize) 1124 1125 { 1126 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); 1127 } 1128 1129 size_t ZSTD_compressBlock_lazy2_extDict( 1130 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1131 void const* src, size_t srcSize) 1132 1133 { 1134 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); 1135 } 1136 1137 size_t ZSTD_compressBlock_btlazy2_extDict( 1138 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1139 void const* src, size_t srcSize) 1140 1141 { 1142 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); 1143 } 1144