1 /* 2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 #include "zstd_compress_internal.h" 12 #include "zstd_lazy.h" 13 14 15 /*-************************************* 16 * Binary Tree search 17 ***************************************/ 18 /** ZSTD_insertBt1() : add one or multiple positions to tree. 19 * ip : assumed <= iend-8 . 20 * @return : nb of positions added */ 21 static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, 22 const BYTE* const ip, const BYTE* const iend, 23 U32 nbCompares, U32 const mls, U32 const extDict) 24 { 25 U32* const hashTable = zc->hashTable; 26 U32 const hashLog = zc->appliedParams.cParams.hashLog; 27 size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 28 U32* const bt = zc->chainTable; 29 U32 const btLog = zc->appliedParams.cParams.chainLog - 1; 30 U32 const btMask = (1 << btLog) - 1; 31 U32 matchIndex = hashTable[h]; 32 size_t commonLengthSmaller=0, commonLengthLarger=0; 33 const BYTE* const base = zc->base; 34 const BYTE* const dictBase = zc->dictBase; 35 const U32 dictLimit = zc->dictLimit; 36 const BYTE* const dictEnd = dictBase + dictLimit; 37 const BYTE* const prefixStart = base + dictLimit; 38 const BYTE* match; 39 const U32 current = (U32)(ip-base); 40 const U32 btLow = btMask >= current ? 0 : current - btMask; 41 U32* smallerPtr = bt + 2*(current&btMask); 42 U32* largerPtr = smallerPtr + 1; 43 U32 dummy32; /* to be nullified at the end */ 44 U32 const windowLow = zc->lowLimit; 45 U32 matchEndIdx = current+8+1; 46 size_t bestLength = 8; 47 #ifdef ZSTD_C_PREDICT 48 U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); 49 U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); 50 predictedSmall += (predictedSmall>0); 51 predictedLarge += (predictedLarge>0); 52 #endif /* ZSTD_C_PREDICT */ 53 54 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); 55 56 assert(ip <= iend-8); /* required for h calculation */ 57 hashTable[h] = current; /* Update Hash Table */ 58 59 while (nbCompares-- && (matchIndex > windowLow)) { 60 U32* const nextPtr = bt + 2*(matchIndex & btMask); 61 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 62 assert(matchIndex < current); 63 64 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ 65 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ 66 if (matchIndex == predictedSmall) { 67 /* no need to check length, result known */ 68 *smallerPtr = matchIndex; 69 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 70 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ 71 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 72 predictedSmall = predictPtr[1] + (predictPtr[1]>0); 73 continue; 74 } 75 if (matchIndex == predictedLarge) { 76 *largerPtr = matchIndex; 77 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 78 largerPtr = nextPtr; 79 matchIndex = nextPtr[0]; 80 predictedLarge = predictPtr[0] + (predictPtr[0]>0); 81 continue; 82 } 83 #endif 84 85 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { 86 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */ 87 match = base + matchIndex; 88 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); 89 } else { 90 match = dictBase + matchIndex; 91 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 92 if (matchIndex+matchLength >= dictLimit) 93 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ 94 } 95 96 if (matchLength > bestLength) { 97 bestLength = matchLength; 98 if (matchLength > matchEndIdx - matchIndex) 99 matchEndIdx = matchIndex + (U32)matchLength; 100 } 101 102 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ 103 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ 104 } 105 106 if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ 107 /* match is smaller than current */ 108 *smallerPtr = matchIndex; /* update smaller idx */ 109 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 110 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 111 smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ 112 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ 113 } else { 114 /* match is larger than current */ 115 *largerPtr = matchIndex; 116 commonLengthLarger = matchLength; 117 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 118 largerPtr = nextPtr; 119 matchIndex = nextPtr[0]; 120 } } 121 122 *smallerPtr = *largerPtr = 0; 123 if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ 124 assert(matchEndIdx > current + 8); 125 return matchEndIdx - (current + 8); 126 } 127 128 FORCE_INLINE_TEMPLATE 129 void ZSTD_updateTree_internal(ZSTD_CCtx* zc, 130 const BYTE* const ip, const BYTE* const iend, 131 const U32 nbCompares, const U32 mls, const U32 extDict) 132 { 133 const BYTE* const base = zc->base; 134 U32 const target = (U32)(ip - base); 135 U32 idx = zc->nextToUpdate; 136 DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)", 137 idx, target, extDict); 138 139 while(idx < target) 140 idx += ZSTD_insertBt1(zc, base+idx, iend, nbCompares, mls, extDict); 141 zc->nextToUpdate = target; 142 } 143 144 void ZSTD_updateTree(ZSTD_CCtx* zc, 145 const BYTE* const ip, const BYTE* const iend, 146 const U32 nbCompares, const U32 mls) 147 { 148 ZSTD_updateTree_internal(zc, ip, iend, nbCompares, mls, 0 /*extDict*/); 149 } 150 151 void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, 152 const BYTE* const ip, const BYTE* const iend, 153 const U32 nbCompares, const U32 mls) 154 { 155 ZSTD_updateTree_internal(zc, ip, iend, nbCompares, mls, 1 /*extDict*/); 156 } 157 158 159 static size_t ZSTD_insertBtAndFindBestMatch ( 160 ZSTD_CCtx* zc, 161 const BYTE* const ip, const BYTE* const iend, 162 size_t* offsetPtr, 163 U32 nbCompares, const U32 mls, 164 U32 extDict) 165 { 166 U32* const hashTable = zc->hashTable; 167 U32 const hashLog = zc->appliedParams.cParams.hashLog; 168 size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 169 U32* const bt = zc->chainTable; 170 U32 const btLog = zc->appliedParams.cParams.chainLog - 1; 171 U32 const btMask = (1 << btLog) - 1; 172 U32 matchIndex = hashTable[h]; 173 size_t commonLengthSmaller=0, commonLengthLarger=0; 174 const BYTE* const base = zc->base; 175 const BYTE* const dictBase = zc->dictBase; 176 const U32 dictLimit = zc->dictLimit; 177 const BYTE* const dictEnd = dictBase + dictLimit; 178 const BYTE* const prefixStart = base + dictLimit; 179 const U32 current = (U32)(ip-base); 180 const U32 btLow = btMask >= current ? 0 : current - btMask; 181 const U32 windowLow = zc->lowLimit; 182 U32* smallerPtr = bt + 2*(current&btMask); 183 U32* largerPtr = bt + 2*(current&btMask) + 1; 184 U32 matchEndIdx = current+8+1; 185 U32 dummy32; /* to be nullified at the end */ 186 size_t bestLength = 0; 187 188 assert(ip <= iend-8); /* required for h calculation */ 189 hashTable[h] = current; /* Update Hash Table */ 190 191 while (nbCompares-- && (matchIndex > windowLow)) { 192 U32* const nextPtr = bt + 2*(matchIndex & btMask); 193 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 194 const BYTE* match; 195 196 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { 197 match = base + matchIndex; 198 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); 199 } else { 200 match = dictBase + matchIndex; 201 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 202 if (matchIndex+matchLength >= dictLimit) 203 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ 204 } 205 206 if (matchLength > bestLength) { 207 if (matchLength > matchEndIdx - matchIndex) 208 matchEndIdx = matchIndex + (U32)matchLength; 209 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) 210 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; 211 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ 212 break; /* drop, to guarantee consistency (miss a little bit of compression) */ 213 } 214 } 215 216 if (match[matchLength] < ip[matchLength]) { 217 /* match is smaller than current */ 218 *smallerPtr = matchIndex; /* update smaller idx */ 219 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 220 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 221 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ 222 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 223 } else { 224 /* match is larger than current */ 225 *largerPtr = matchIndex; 226 commonLengthLarger = matchLength; 227 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 228 largerPtr = nextPtr; 229 matchIndex = nextPtr[0]; 230 } } 231 232 *smallerPtr = *largerPtr = 0; 233 234 assert(matchEndIdx > current+8); 235 zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ 236 return bestLength; 237 } 238 239 240 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ 241 static size_t ZSTD_BtFindBestMatch ( 242 ZSTD_CCtx* zc, 243 const BYTE* const ip, const BYTE* const iLimit, 244 size_t* offsetPtr, 245 const U32 maxNbAttempts, const U32 mls) 246 { 247 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ 248 ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); 249 return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0); 250 } 251 252 253 static size_t ZSTD_BtFindBestMatch_selectMLS ( 254 ZSTD_CCtx* zc, /* Index table will be updated */ 255 const BYTE* ip, const BYTE* const iLimit, 256 size_t* offsetPtr, 257 const U32 maxNbAttempts, const U32 matchLengthSearch) 258 { 259 switch(matchLengthSearch) 260 { 261 default : /* includes case 3 */ 262 case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); 263 case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); 264 case 7 : 265 case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); 266 } 267 } 268 269 270 /** Tree updater, providing best match */ 271 static size_t ZSTD_BtFindBestMatch_extDict ( 272 ZSTD_CCtx* zc, 273 const BYTE* const ip, const BYTE* const iLimit, 274 size_t* offsetPtr, 275 const U32 maxNbAttempts, const U32 mls) 276 { 277 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ 278 ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); 279 return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1); 280 } 281 282 283 static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( 284 ZSTD_CCtx* zc, /* Index table will be updated */ 285 const BYTE* ip, const BYTE* const iLimit, 286 size_t* offsetPtr, 287 const U32 maxNbAttempts, const U32 matchLengthSearch) 288 { 289 switch(matchLengthSearch) 290 { 291 default : /* includes case 3 */ 292 case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); 293 case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); 294 case 7 : 295 case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); 296 } 297 } 298 299 300 301 /* ********************************* 302 * Hash Chain 303 ***********************************/ 304 #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] 305 306 /* Update chains up to ip (excluded) 307 Assumption : always within prefix (i.e. not within extDict) */ 308 U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) 309 { 310 U32* const hashTable = zc->hashTable; 311 const U32 hashLog = zc->appliedParams.cParams.hashLog; 312 U32* const chainTable = zc->chainTable; 313 const U32 chainMask = (1 << zc->appliedParams.cParams.chainLog) - 1; 314 const BYTE* const base = zc->base; 315 const U32 target = (U32)(ip - base); 316 U32 idx = zc->nextToUpdate; 317 318 while(idx < target) { /* catch up */ 319 size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); 320 NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; 321 hashTable[h] = idx; 322 idx++; 323 } 324 325 zc->nextToUpdate = target; 326 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; 327 } 328 329 330 /* inlining is important to hardwire a hot branch (template emulation) */ 331 FORCE_INLINE_TEMPLATE 332 size_t ZSTD_HcFindBestMatch_generic ( 333 ZSTD_CCtx* zc, /* Index table will be updated */ 334 const BYTE* const ip, const BYTE* const iLimit, 335 size_t* offsetPtr, 336 const U32 maxNbAttempts, const U32 mls, const U32 extDict) 337 { 338 U32* const chainTable = zc->chainTable; 339 const U32 chainSize = (1 << zc->appliedParams.cParams.chainLog); 340 const U32 chainMask = chainSize-1; 341 const BYTE* const base = zc->base; 342 const BYTE* const dictBase = zc->dictBase; 343 const U32 dictLimit = zc->dictLimit; 344 const BYTE* const prefixStart = base + dictLimit; 345 const BYTE* const dictEnd = dictBase + dictLimit; 346 const U32 lowLimit = zc->lowLimit; 347 const U32 current = (U32)(ip-base); 348 const U32 minChain = current > chainSize ? current - chainSize : 0; 349 int nbAttempts=maxNbAttempts; 350 size_t ml=4-1; 351 352 /* HC4 match finder */ 353 U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); 354 355 for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { 356 size_t currentMl=0; 357 if ((!extDict) || matchIndex >= dictLimit) { 358 const BYTE* const match = base + matchIndex; 359 if (match[ml] == ip[ml]) /* potentially better */ 360 currentMl = ZSTD_count(ip, match, iLimit); 361 } else { 362 const BYTE* const match = dictBase + matchIndex; 363 assert(match+4 <= dictEnd); 364 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ 365 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; 366 } 367 368 /* save best solution */ 369 if (currentMl > ml) { 370 ml = currentMl; 371 *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; 372 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ 373 } 374 375 if (matchIndex <= minChain) break; 376 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); 377 } 378 379 return ml; 380 } 381 382 383 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( 384 ZSTD_CCtx* zc, 385 const BYTE* ip, const BYTE* const iLimit, 386 size_t* offsetPtr, 387 const U32 maxNbAttempts, const U32 matchLengthSearch) 388 { 389 switch(matchLengthSearch) 390 { 391 default : /* includes case 3 */ 392 case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); 393 case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); 394 case 7 : 395 case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); 396 } 397 } 398 399 400 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( 401 ZSTD_CCtx* const zc, 402 const BYTE* ip, const BYTE* const iLimit, 403 size_t* const offsetPtr, 404 U32 const maxNbAttempts, U32 const matchLengthSearch) 405 { 406 switch(matchLengthSearch) 407 { 408 default : /* includes case 3 */ 409 case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); 410 case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); 411 case 7 : 412 case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); 413 } 414 } 415 416 417 /* ******************************* 418 * Common parser - lazy strategy 419 *********************************/ 420 FORCE_INLINE_TEMPLATE 421 size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, 422 const void* src, size_t srcSize, 423 const U32 searchMethod, const U32 depth) 424 { 425 seqStore_t* seqStorePtr = &(ctx->seqStore); 426 const BYTE* const istart = (const BYTE*)src; 427 const BYTE* ip = istart; 428 const BYTE* anchor = istart; 429 const BYTE* const iend = istart + srcSize; 430 const BYTE* const ilimit = iend - 8; 431 const BYTE* const base = ctx->base + ctx->dictLimit; 432 433 U32 const maxSearches = 1 << ctx->appliedParams.cParams.searchLog; 434 U32 const mls = ctx->appliedParams.cParams.searchLength; 435 436 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, 437 size_t* offsetPtr, 438 U32 maxNbAttempts, U32 matchLengthSearch); 439 searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; 440 U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1], savedOffset=0; 441 442 /* init */ 443 ip += (ip==base); 444 ctx->nextToUpdate3 = ctx->nextToUpdate; 445 { U32 const maxRep = (U32)(ip-base); 446 if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; 447 if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; 448 } 449 450 /* Match Loop */ 451 while (ip < ilimit) { 452 size_t matchLength=0; 453 size_t offset=0; 454 const BYTE* start=ip+1; 455 456 /* check repCode */ 457 if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) { 458 /* repcode : we take it */ 459 matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; 460 if (depth==0) goto _storeSequence; 461 } 462 463 /* first search (depth 0) */ 464 { size_t offsetFound = 99999999; 465 size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); 466 if (ml2 > matchLength) 467 matchLength = ml2, start = ip, offset=offsetFound; 468 } 469 470 if (matchLength < 4) { 471 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ 472 continue; 473 } 474 475 /* let's try to find a better solution */ 476 if (depth>=1) 477 while (ip<ilimit) { 478 ip ++; 479 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { 480 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; 481 int const gain2 = (int)(mlRep * 3); 482 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); 483 if ((mlRep >= 4) && (gain2 > gain1)) 484 matchLength = mlRep, offset = 0, start = ip; 485 } 486 { size_t offset2=99999999; 487 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); 488 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 489 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); 490 if ((ml2 >= 4) && (gain2 > gain1)) { 491 matchLength = ml2, offset = offset2, start = ip; 492 continue; /* search a better one */ 493 } } 494 495 /* let's find an even better one */ 496 if ((depth==2) && (ip<ilimit)) { 497 ip ++; 498 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { 499 size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; 500 int const gain2 = (int)(ml2 * 4); 501 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); 502 if ((ml2 >= 4) && (gain2 > gain1)) 503 matchLength = ml2, offset = 0, start = ip; 504 } 505 { size_t offset2=99999999; 506 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); 507 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 508 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); 509 if ((ml2 >= 4) && (gain2 > gain1)) { 510 matchLength = ml2, offset = offset2, start = ip; 511 continue; 512 } } } 513 break; /* nothing found : store previous solution */ 514 } 515 516 /* NOTE: 517 * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. 518 * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which 519 * overflows the pointer, which is undefined behavior. 520 */ 521 /* catch up */ 522 if (offset) { 523 while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > base)) 524 && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ 525 { start--; matchLength++; } 526 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); 527 } 528 /* store sequence */ 529 _storeSequence: 530 { size_t const litLength = start - anchor; 531 ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); 532 anchor = ip = start + matchLength; 533 } 534 535 /* check immediate repcode */ 536 while ( ((ip <= ilimit) & (offset_2>0)) 537 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { 538 /* store sequence */ 539 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; 540 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ 541 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); 542 ip += matchLength; 543 anchor = ip; 544 continue; /* faster when present ... (?) */ 545 } } 546 547 /* Save reps for next block */ 548 seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : savedOffset; 549 seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : savedOffset; 550 551 /* Return the last literals size */ 552 return iend - anchor; 553 } 554 555 556 size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 557 { 558 return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); 559 } 560 561 size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 562 { 563 return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); 564 } 565 566 size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 567 { 568 return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); 569 } 570 571 size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 572 { 573 return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); 574 } 575 576 577 FORCE_INLINE_TEMPLATE 578 size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, 579 const void* src, size_t srcSize, 580 const U32 searchMethod, const U32 depth) 581 { 582 seqStore_t* seqStorePtr = &(ctx->seqStore); 583 const BYTE* const istart = (const BYTE*)src; 584 const BYTE* ip = istart; 585 const BYTE* anchor = istart; 586 const BYTE* const iend = istart + srcSize; 587 const BYTE* const ilimit = iend - 8; 588 const BYTE* const base = ctx->base; 589 const U32 dictLimit = ctx->dictLimit; 590 const U32 lowestIndex = ctx->lowLimit; 591 const BYTE* const prefixStart = base + dictLimit; 592 const BYTE* const dictBase = ctx->dictBase; 593 const BYTE* const dictEnd = dictBase + dictLimit; 594 const BYTE* const dictStart = dictBase + ctx->lowLimit; 595 596 const U32 maxSearches = 1 << ctx->appliedParams.cParams.searchLog; 597 const U32 mls = ctx->appliedParams.cParams.searchLength; 598 599 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, 600 size_t* offsetPtr, 601 U32 maxNbAttempts, U32 matchLengthSearch); 602 searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; 603 604 U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1]; 605 606 /* init */ 607 ctx->nextToUpdate3 = ctx->nextToUpdate; 608 ip += (ip == prefixStart); 609 610 /* Match Loop */ 611 while (ip < ilimit) { 612 size_t matchLength=0; 613 size_t offset=0; 614 const BYTE* start=ip+1; 615 U32 current = (U32)(ip-base); 616 617 /* check repCode */ 618 { const U32 repIndex = (U32)(current+1 - offset_1); 619 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 620 const BYTE* const repMatch = repBase + repIndex; 621 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ 622 if (MEM_read32(ip+1) == MEM_read32(repMatch)) { 623 /* repcode detected we should take it */ 624 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 625 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; 626 if (depth==0) goto _storeSequence; 627 } } 628 629 /* first search (depth 0) */ 630 { size_t offsetFound = 99999999; 631 size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); 632 if (ml2 > matchLength) 633 matchLength = ml2, start = ip, offset=offsetFound; 634 } 635 636 if (matchLength < 4) { 637 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ 638 continue; 639 } 640 641 /* let's try to find a better solution */ 642 if (depth>=1) 643 while (ip<ilimit) { 644 ip ++; 645 current++; 646 /* check repCode */ 647 if (offset) { 648 const U32 repIndex = (U32)(current - offset_1); 649 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 650 const BYTE* const repMatch = repBase + repIndex; 651 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ 652 if (MEM_read32(ip) == MEM_read32(repMatch)) { 653 /* repcode detected */ 654 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 655 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 656 int const gain2 = (int)(repLength * 3); 657 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); 658 if ((repLength >= 4) && (gain2 > gain1)) 659 matchLength = repLength, offset = 0, start = ip; 660 } } 661 662 /* search match, depth 1 */ 663 { size_t offset2=99999999; 664 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); 665 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 666 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); 667 if ((ml2 >= 4) && (gain2 > gain1)) { 668 matchLength = ml2, offset = offset2, start = ip; 669 continue; /* search a better one */ 670 } } 671 672 /* let's find an even better one */ 673 if ((depth==2) && (ip<ilimit)) { 674 ip ++; 675 current++; 676 /* check repCode */ 677 if (offset) { 678 const U32 repIndex = (U32)(current - offset_1); 679 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 680 const BYTE* const repMatch = repBase + repIndex; 681 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ 682 if (MEM_read32(ip) == MEM_read32(repMatch)) { 683 /* repcode detected */ 684 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 685 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 686 int const gain2 = (int)(repLength * 4); 687 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); 688 if ((repLength >= 4) && (gain2 > gain1)) 689 matchLength = repLength, offset = 0, start = ip; 690 } } 691 692 /* search match, depth 2 */ 693 { size_t offset2=99999999; 694 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); 695 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 696 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); 697 if ((ml2 >= 4) && (gain2 > gain1)) { 698 matchLength = ml2, offset = offset2, start = ip; 699 continue; 700 } } } 701 break; /* nothing found : store previous solution */ 702 } 703 704 /* catch up */ 705 if (offset) { 706 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); 707 const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; 708 const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; 709 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ 710 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); 711 } 712 713 /* store sequence */ 714 _storeSequence: 715 { size_t const litLength = start - anchor; 716 ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); 717 anchor = ip = start + matchLength; 718 } 719 720 /* check immediate repcode */ 721 while (ip <= ilimit) { 722 const U32 repIndex = (U32)((ip-base) - offset_2); 723 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 724 const BYTE* const repMatch = repBase + repIndex; 725 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ 726 if (MEM_read32(ip) == MEM_read32(repMatch)) { 727 /* repcode detected we should take it */ 728 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 729 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 730 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ 731 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); 732 ip += matchLength; 733 anchor = ip; 734 continue; /* faster when present ... (?) */ 735 } 736 break; 737 } } 738 739 /* Save reps for next block */ 740 seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; 741 742 /* Return the last literals size */ 743 return iend - anchor; 744 } 745 746 747 size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 748 { 749 return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); 750 } 751 752 size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 753 { 754 return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); 755 } 756 757 size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 758 { 759 return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); 760 } 761 762 size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) 763 { 764 return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); 765 } 766