1 /* 2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 #include "zstd_lazy.h" 12 13 14 /*-************************************* 15 * Binary Tree search 16 ***************************************/ 17 /** ZSTD_insertBt1() : add one or multiple positions to tree. 18 * ip : assumed <= iend-8 . 19 * @return : nb of positions added */ 20 static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares, 21 U32 extDict) 22 { 23 U32* const hashTable = zc->hashTable; 24 U32 const hashLog = zc->appliedParams.cParams.hashLog; 25 size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 26 U32* const bt = zc->chainTable; 27 U32 const btLog = zc->appliedParams.cParams.chainLog - 1; 28 U32 const btMask = (1 << btLog) - 1; 29 U32 matchIndex = hashTable[h]; 30 size_t commonLengthSmaller=0, commonLengthLarger=0; 31 const BYTE* const base = zc->base; 32 const BYTE* const dictBase = zc->dictBase; 33 const U32 dictLimit = zc->dictLimit; 34 const BYTE* const dictEnd = dictBase + dictLimit; 35 const BYTE* const prefixStart = base + dictLimit; 36 const BYTE* match; 37 const U32 current = (U32)(ip-base); 38 const U32 btLow = btMask >= current ? 0 : current - btMask; 39 U32* smallerPtr = bt + 2*(current&btMask); 40 U32* largerPtr = smallerPtr + 1; 41 U32 dummy32; /* to be nullified at the end */ 42 U32 const windowLow = zc->lowLimit; 43 U32 matchEndIdx = current+8; 44 size_t bestLength = 8; 45 #ifdef ZSTD_C_PREDICT 46 U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); 47 U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); 48 predictedSmall += (predictedSmall>0); 49 predictedLarge += (predictedLarge>0); 50 #endif /* ZSTD_C_PREDICT */ 51 52 assert(ip <= iend-8); /* required for h calculation */ 53 hashTable[h] = current; /* Update Hash Table */ 54 55 while (nbCompares-- && (matchIndex > windowLow)) { 56 U32* const nextPtr = bt + 2*(matchIndex & btMask); 57 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 58 59 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ 60 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ 61 if (matchIndex == predictedSmall) { 62 /* no need to check length, result known */ 63 *smallerPtr = matchIndex; 64 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 65 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ 66 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 67 predictedSmall = predictPtr[1] + (predictPtr[1]>0); 68 continue; 69 } 70 if (matchIndex == predictedLarge) { 71 *largerPtr = matchIndex; 72 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 73 largerPtr = nextPtr; 74 matchIndex = nextPtr[0]; 75 predictedLarge = predictPtr[0] + (predictPtr[0]>0); 76 continue; 77 } 78 #endif 79 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { 80 match = base + matchIndex; 81 if (match[matchLength] == ip[matchLength]) 82 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; 83 } else { 84 match = dictBase + matchIndex; 85 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 86 if (matchIndex+matchLength >= dictLimit) 87 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ 88 } 89 90 if (matchLength > bestLength) { 91 bestLength = matchLength; 92 if (matchLength > matchEndIdx - matchIndex) 93 matchEndIdx = matchIndex + (U32)matchLength; 94 } 95 96 if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ 97 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ 98 99 if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ 100 /* match+1 is smaller than current */ 101 *smallerPtr = matchIndex; /* update smaller idx */ 102 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 103 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 104 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ 105 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 106 } else { 107 /* match is larger than current */ 108 *largerPtr = matchIndex; 109 commonLengthLarger = matchLength; 110 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 111 largerPtr = nextPtr; 112 matchIndex = nextPtr[0]; 113 } } 114 115 *smallerPtr = *largerPtr = 0; 116 if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ 117 if (matchEndIdx > current + 8) return matchEndIdx - (current + 8); 118 return 1; 119 } 120 121 122 static size_t ZSTD_insertBtAndFindBestMatch ( 123 ZSTD_CCtx* zc, 124 const BYTE* const ip, const BYTE* const iend, 125 size_t* offsetPtr, 126 U32 nbCompares, const U32 mls, 127 U32 extDict) 128 { 129 U32* const hashTable = zc->hashTable; 130 U32 const hashLog = zc->appliedParams.cParams.hashLog; 131 size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 132 U32* const bt = zc->chainTable; 133 U32 const btLog = zc->appliedParams.cParams.chainLog - 1; 134 U32 const btMask = (1 << btLog) - 1; 135 U32 matchIndex = hashTable[h]; 136 size_t commonLengthSmaller=0, commonLengthLarger=0; 137 const BYTE* const base = zc->base; 138 const BYTE* const dictBase = zc->dictBase; 139 const U32 dictLimit = zc->dictLimit; 140 const BYTE* const dictEnd = dictBase + dictLimit; 141 const BYTE* const prefixStart = base + dictLimit; 142 const U32 current = (U32)(ip-base); 143 const U32 btLow = btMask >= current ? 0 : current - btMask; 144 const U32 windowLow = zc->lowLimit; 145 U32* smallerPtr = bt + 2*(current&btMask); 146 U32* largerPtr = bt + 2*(current&btMask) + 1; 147 U32 matchEndIdx = current+8; 148 U32 dummy32; /* to be nullified at the end */ 149 size_t bestLength = 0; 150 151 assert(ip <= iend-8); /* required for h calculation */ 152 hashTable[h] = current; /* Update Hash Table */ 153 154 while (nbCompares-- && (matchIndex > windowLow)) { 155 U32* const nextPtr = bt + 2*(matchIndex & btMask); 156 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 157 const BYTE* match; 158 159 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { 160 match = base + matchIndex; 161 if (match[matchLength] == ip[matchLength]) 162 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; 163 } else { 164 match = dictBase + matchIndex; 165 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 166 if (matchIndex+matchLength >= dictLimit) 167 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ 168 } 169 170 if (matchLength > bestLength) { 171 if (matchLength > matchEndIdx - matchIndex) 172 matchEndIdx = matchIndex + (U32)matchLength; 173 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) 174 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; 175 if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ 176 break; /* drop, to guarantee consistency (miss a little bit of compression) */ 177 } 178 179 if (match[matchLength] < ip[matchLength]) { 180 /* match is smaller than current */ 181 *smallerPtr = matchIndex; /* update smaller idx */ 182 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 183 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 184 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ 185 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 186 } else { 187 /* match is larger than current */ 188 *largerPtr = matchIndex; 189 commonLengthLarger = matchLength; 190 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 191 largerPtr = nextPtr; 192 matchIndex = nextPtr[0]; 193 } } 194 195 *smallerPtr = *largerPtr = 0; 196 197 zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; 198 return bestLength; 199 } 200 201 202 void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) 203 { 204 const BYTE* const base = zc->base; 205 const U32 target = (U32)(ip - base); 206 U32 idx = zc->nextToUpdate; 207 208 while(idx < target) 209 idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0); 210 } 211 212 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ 213 static size_t ZSTD_BtFindBestMatch ( 214 ZSTD_CCtx* zc, 215 const BYTE* const ip, const BYTE* const iLimit, 216 size_t* offsetPtr, 217 const U32 maxNbAttempts, const U32 mls) 218 { 219 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ 220 ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); 221 return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0); 222 } 223 224 225 static size_t ZSTD_BtFindBestMatch_selectMLS ( 226 ZSTD_CCtx* zc, /* Index table will be updated */ 227 const BYTE* ip, const BYTE* const iLimit, 228 size_t* offsetPtr, 229 const U32 maxNbAttempts, const U32 matchLengthSearch) 230 { 231 switch(matchLengthSearch) 232 { 233 default : /* includes case 3 */ 234 case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); 235 case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); 236 case 7 : 237 case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); 238 } 239 } 240 241 242 void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) 243 { 244 const BYTE* const base = zc->base; 245 const U32 target = (U32)(ip - base); 246 U32 idx = zc->nextToUpdate; 247 248 while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); 249 } 250 251 252 /** Tree updater, providing best match */ 253 static size_t ZSTD_BtFindBestMatch_extDict ( 254 ZSTD_CCtx* zc, 255 const BYTE* const ip, const BYTE* const iLimit, 256 size_t* offsetPtr, 257 const U32 maxNbAttempts, const U32 mls) 258 { 259 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ 260 ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); 261 return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1); 262 } 263 264 265 static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( 266 ZSTD_CCtx* zc, /* Index table will be updated */ 267 const BYTE* ip, const BYTE* const iLimit, 268 size_t* offsetPtr, 269 const U32 maxNbAttempts, const U32 matchLengthSearch) 270 { 271 switch(matchLengthSearch) 272 { 273 default : /* includes case 3 */ 274 case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); 275 case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); 276 case 7 : 277 case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); 278 } 279 } 280 281 282 283 /* ********************************* 284 * Hash Chain 285 ***********************************/ 286 #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] 287 288 /* Update chains up to ip (excluded) 289 Assumption : always within prefix (i.e. not within extDict) */ 290 U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) 291 { 292 U32* const hashTable = zc->hashTable; 293 const U32 hashLog = zc->appliedParams.cParams.hashLog; 294 U32* const chainTable = zc->chainTable; 295 const U32 chainMask = (1 << zc->appliedParams.cParams.chainLog) - 1; 296 const BYTE* const base = zc->base; 297 const U32 target = (U32)(ip - base); 298 U32 idx = zc->nextToUpdate; 299 300 while(idx < target) { /* catch up */ 301 size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); 302 NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; 303 hashTable[h] = idx; 304 idx++; 305 } 306 307 zc->nextToUpdate = target; 308 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; 309 } 310 311 312 /* inlining is important to hardwire a hot branch (template emulation) */ 313 FORCE_INLINE_TEMPLATE 314 size_t ZSTD_HcFindBestMatch_generic ( 315 ZSTD_CCtx* zc, /* Index table will be updated */ 316 const BYTE* const ip, const BYTE* const iLimit, 317 size_t* offsetPtr, 318 const U32 maxNbAttempts, const U32 mls, const U32 extDict) 319 { 320 U32* const chainTable = zc->chainTable; 321 const U32 chainSize = (1 << zc->appliedParams.cParams.chainLog); 322 const U32 chainMask = chainSize-1; 323 const BYTE* const base = zc->base; 324 const BYTE* const dictBase = zc->dictBase; 325 const U32 dictLimit = zc->dictLimit; 326 const BYTE* const prefixStart = base + dictLimit; 327 const BYTE* const dictEnd = dictBase + dictLimit; 328 const U32 lowLimit = zc->lowLimit; 329 const U32 current = (U32)(ip-base); 330 const U32 minChain = current > chainSize ? current - chainSize : 0; 331 int nbAttempts=maxNbAttempts; 332 size_t ml=4-1; 333 334 /* HC4 match finder */ 335 U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); 336 337 for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { 338 const BYTE* match; 339 size_t currentMl=0; 340 if ((!extDict) || matchIndex >= dictLimit) { 341 match = base + matchIndex; 342 if (match[ml] == ip[ml]) /* potentially better */ 343 currentMl = ZSTD_count(ip, match, iLimit); 344 } else { 345 match = dictBase + matchIndex; 346 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ 347 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; 348 } 349 350 /* save best solution */ 351 if (currentMl > ml) { 352 ml = currentMl; 353 *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; 354 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ 355 } 356 357 if (matchIndex <= minChain) break; 358 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); 359 } 360 361 return ml; 362 } 363 364 365 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( 366 ZSTD_CCtx* zc, 367 const BYTE* ip, const BYTE* const iLimit, 368 size_t* offsetPtr, 369 const U32 maxNbAttempts, const U32 matchLengthSearch) 370 { 371 switch(matchLengthSearch) 372 { 373 default : /* includes case 3 */ 374 case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); 375 case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); 376 case 7 : 377 case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); 378 } 379 } 380 381 382 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( 383 ZSTD_CCtx* zc, 384 const BYTE* ip, const BYTE* const iLimit, 385 size_t* offsetPtr, 386 const U32 maxNbAttempts, const U32 matchLengthSearch) 387 { 388 switch(matchLengthSearch) 389 { 390 default : /* includes case 3 */ 391 case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); 392 case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); 393 case 7 : 394 case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); 395 } 396 } 397 398 399 /* ******************************* 400 * Common parser - lazy strategy 401 *********************************/ 402 FORCE_INLINE_TEMPLATE 403 size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, 404 const void* src, size_t srcSize, 405 const U32 searchMethod, const U32 depth) 406 { 407 seqStore_t* seqStorePtr = &(ctx->seqStore); 408 const BYTE* const istart = (const BYTE*)src; 409 const BYTE* ip = istart; 410 const BYTE* anchor = istart; 411 const BYTE* const iend = istart + srcSize; 412 const BYTE* const ilimit = iend - 8; 413 const BYTE* const base = ctx->base + ctx->dictLimit; 414 415 U32 const maxSearches = 1 << ctx->appliedParams.cParams.searchLog; 416 U32 const mls = ctx->appliedParams.cParams.searchLength; 417 418 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, 419 size_t* offsetPtr, 420 U32 maxNbAttempts, U32 matchLengthSearch); 421 searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; 422 U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1], savedOffset=0; 423 424 /* init */ 425 ip += (ip==base); 426 ctx->nextToUpdate3 = ctx->nextToUpdate; 427 { U32 const maxRep = (U32)(ip-base); 428 if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; 429 if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; 430 } 431 432 /* Match Loop */ 433 while (ip < ilimit) { 434 size_t matchLength=0; 435 size_t offset=0; 436 const BYTE* start=ip+1; 437 438 /* check repCode */ 439 if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) { 440 /* repcode : we take it */ 441 matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; 442 if (depth==0) goto _storeSequence; 443 } 444 445 /* first search (depth 0) */ 446 { size_t offsetFound = 99999999; 447 size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); 448 if (ml2 > matchLength) 449 matchLength = ml2, start = ip, offset=offsetFound; 450 } 451 452 if (matchLength < 4) { 453 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ 454 continue; 455 } 456 457 /* let's try to find a better solution */ 458 if (depth>=1) 459 while (ip<ilimit) { 460 ip ++; 461 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { 462 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; 463 int const gain2 = (int)(mlRep * 3); 464 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); 465 if ((mlRep >= 4) && (gain2 > gain1)) 466 matchLength = mlRep, offset = 0, start = ip; 467 } 468 { size_t offset2=99999999; 469 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); 470 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 471 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); 472 if ((ml2 >= 4) && (gain2 > gain1)) { 473 matchLength = ml2, offset = offset2, start = ip; 474 continue; /* search a better one */ 475 } } 476 477 /* let's find an even better one */ 478 if ((depth==2) && (ip<ilimit)) { 479 ip ++; 480 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { 481 size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; 482 int const gain2 = (int)(ml2 * 4); 483 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); 484 if ((ml2 >= 4) && (gain2 > gain1)) 485 matchLength = ml2, offset = 0, start = ip; 486 } 487 { size_t offset2=99999999; 488 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); 489 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 490 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); 491 if ((ml2 >= 4) && (gain2 > gain1)) { 492 matchLength = ml2, offset = offset2, start = ip; 493 continue; 494 } } } 495 break; /* nothing found : store previous solution */ 496 } 497 498 /* NOTE: 499 * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. 500 * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which 501 * overflows the pointer, which is undefined behavior. 502 */ 503 /* catch up */ 504 if (offset) { 505 while ( (start > anchor) 506 && (start > base+offset-ZSTD_REP_MOVE) 507 && (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1]) ) /* only search for offset within prefix */ 508 { start--; matchLength++; } 509 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); 510 } 511 /* store sequence */ 512 _storeSequence: 513 { size_t const litLength = start - anchor; 514 ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); 515 anchor = ip = start + matchLength; 516 } 517 518 /* check immediate repcode */ 519 while ( (ip <= ilimit) 520 && ((offset_2>0) 521 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { 522 /* store sequence */ 523 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; 524 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ 525 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); 526 ip += matchLength; 527 anchor = ip; 528 continue; /* faster when present ... (?) */ 529 } } 530 531 /* Save reps for next block */ 532 seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : savedOffset; 533 seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : savedOffset; 534 535 /* Return the last literals size */ 536 return iend - anchor; 537 } 538 539 540 size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 541 { 542 return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); 543 } 544 545 size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 546 { 547 return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); 548 } 549 550 size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 551 { 552 return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); 553 } 554 555 size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 556 { 557 return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); 558 } 559 560 561 FORCE_INLINE_TEMPLATE 562 size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, 563 const void* src, size_t srcSize, 564 const U32 searchMethod, const U32 depth) 565 { 566 seqStore_t* seqStorePtr = &(ctx->seqStore); 567 const BYTE* const istart = (const BYTE*)src; 568 const BYTE* ip = istart; 569 const BYTE* anchor = istart; 570 const BYTE* const iend = istart + srcSize; 571 const BYTE* const ilimit = iend - 8; 572 const BYTE* const base = ctx->base; 573 const U32 dictLimit = ctx->dictLimit; 574 const U32 lowestIndex = ctx->lowLimit; 575 const BYTE* const prefixStart = base + dictLimit; 576 const BYTE* const dictBase = ctx->dictBase; 577 const BYTE* const dictEnd = dictBase + dictLimit; 578 const BYTE* const dictStart = dictBase + ctx->lowLimit; 579 580 const U32 maxSearches = 1 << ctx->appliedParams.cParams.searchLog; 581 const U32 mls = ctx->appliedParams.cParams.searchLength; 582 583 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, 584 size_t* offsetPtr, 585 U32 maxNbAttempts, U32 matchLengthSearch); 586 searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; 587 588 U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1]; 589 590 /* init */ 591 ctx->nextToUpdate3 = ctx->nextToUpdate; 592 ip += (ip == prefixStart); 593 594 /* Match Loop */ 595 while (ip < ilimit) { 596 size_t matchLength=0; 597 size_t offset=0; 598 const BYTE* start=ip+1; 599 U32 current = (U32)(ip-base); 600 601 /* check repCode */ 602 { const U32 repIndex = (U32)(current+1 - offset_1); 603 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 604 const BYTE* const repMatch = repBase + repIndex; 605 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ 606 if (MEM_read32(ip+1) == MEM_read32(repMatch)) { 607 /* repcode detected we should take it */ 608 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 609 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; 610 if (depth==0) goto _storeSequence; 611 } } 612 613 /* first search (depth 0) */ 614 { size_t offsetFound = 99999999; 615 size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); 616 if (ml2 > matchLength) 617 matchLength = ml2, start = ip, offset=offsetFound; 618 } 619 620 if (matchLength < 4) { 621 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ 622 continue; 623 } 624 625 /* let's try to find a better solution */ 626 if (depth>=1) 627 while (ip<ilimit) { 628 ip ++; 629 current++; 630 /* check repCode */ 631 if (offset) { 632 const U32 repIndex = (U32)(current - offset_1); 633 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 634 const BYTE* const repMatch = repBase + repIndex; 635 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ 636 if (MEM_read32(ip) == MEM_read32(repMatch)) { 637 /* repcode detected */ 638 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 639 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 640 int const gain2 = (int)(repLength * 3); 641 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); 642 if ((repLength >= 4) && (gain2 > gain1)) 643 matchLength = repLength, offset = 0, start = ip; 644 } } 645 646 /* search match, depth 1 */ 647 { size_t offset2=99999999; 648 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); 649 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 650 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); 651 if ((ml2 >= 4) && (gain2 > gain1)) { 652 matchLength = ml2, offset = offset2, start = ip; 653 continue; /* search a better one */ 654 } } 655 656 /* let's find an even better one */ 657 if ((depth==2) && (ip<ilimit)) { 658 ip ++; 659 current++; 660 /* check repCode */ 661 if (offset) { 662 const U32 repIndex = (U32)(current - offset_1); 663 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 664 const BYTE* const repMatch = repBase + repIndex; 665 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ 666 if (MEM_read32(ip) == MEM_read32(repMatch)) { 667 /* repcode detected */ 668 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 669 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 670 int const gain2 = (int)(repLength * 4); 671 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); 672 if ((repLength >= 4) && (gain2 > gain1)) 673 matchLength = repLength, offset = 0, start = ip; 674 } } 675 676 /* search match, depth 2 */ 677 { size_t offset2=99999999; 678 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); 679 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 680 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); 681 if ((ml2 >= 4) && (gain2 > gain1)) { 682 matchLength = ml2, offset = offset2, start = ip; 683 continue; 684 } } } 685 break; /* nothing found : store previous solution */ 686 } 687 688 /* catch up */ 689 if (offset) { 690 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); 691 const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; 692 const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; 693 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ 694 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); 695 } 696 697 /* store sequence */ 698 _storeSequence: 699 { size_t const litLength = start - anchor; 700 ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); 701 anchor = ip = start + matchLength; 702 } 703 704 /* check immediate repcode */ 705 while (ip <= ilimit) { 706 const U32 repIndex = (U32)((ip-base) - offset_2); 707 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 708 const BYTE* const repMatch = repBase + repIndex; 709 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ 710 if (MEM_read32(ip) == MEM_read32(repMatch)) { 711 /* repcode detected we should take it */ 712 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 713 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 714 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ 715 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); 716 ip += matchLength; 717 anchor = ip; 718 continue; /* faster when present ... (?) */ 719 } 720 break; 721 } } 722 723 /* Save reps for next block */ 724 seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; 725 726 /* Return the last literals size */ 727 return iend - anchor; 728 } 729 730 731 size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 732 { 733 return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); 734 } 735 736 size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 737 { 738 return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); 739 } 740 741 size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 742 { 743 return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); 744 } 745 746 size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 747 { 748 return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); 749 } 750