1 /* 2 * Copyright (c) Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ 12 #include "zstd_fast.h" 13 14 15 void ZSTD_fillHashTable(ZSTD_matchState_t* ms, 16 const void* const end, 17 ZSTD_dictTableLoadMethod_e dtlm) 18 { 19 const ZSTD_compressionParameters* const cParams = &ms->cParams; 20 U32* const hashTable = ms->hashTable; 21 U32 const hBits = cParams->hashLog; 22 U32 const mls = cParams->minMatch; 23 const BYTE* const base = ms->window.base; 24 const BYTE* ip = base + ms->nextToUpdate; 25 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; 26 const U32 fastHashFillStep = 3; 27 28 /* Always insert every fastHashFillStep position into the hash table. 29 * Insert the other positions if their hash entry is empty. 30 */ 31 for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { 32 U32 const curr = (U32)(ip - base); 33 size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); 34 hashTable[hash0] = curr; 35 if (dtlm == ZSTD_dtlm_fast) continue; 36 /* Only load extra positions for ZSTD_dtlm_full */ 37 { U32 p; 38 for (p = 1; p < fastHashFillStep; ++p) { 39 size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); 40 if (hashTable[hash] == 0) { /* not yet filled */ 41 hashTable[hash] = curr + p; 42 } } } } 43 } 44 45 46 /** 47 * If you squint hard enough (and ignore repcodes), the search operation at any 48 * given position is broken into 4 stages: 49 * 50 * 1. Hash (map position to hash value via input read) 51 * 2. Lookup (map hash val to index via hashtable read) 52 * 3. Load (map index to value at that position via input read) 53 * 4. Compare 54 * 55 * Each of these steps involves a memory read at an address which is computed 56 * from the previous step. This means these steps must be sequenced and their 57 * latencies are cumulative. 58 * 59 * Rather than do 1->2->3->4 sequentially for a single position before moving 60 * onto the next, this implementation interleaves these operations across the 61 * next few positions: 62 * 63 * R = Repcode Read & Compare 64 * H = Hash 65 * T = Table Lookup 66 * M = Match Read & Compare 67 * 68 * Pos | Time --> 69 * ----+------------------- 70 * N | ... M 71 * N+1 | ... TM 72 * N+2 | R H T M 73 * N+3 | H TM 74 * N+4 | R H T M 75 * N+5 | H ... 76 * N+6 | R ... 77 * 78 * This is very much analogous to the pipelining of execution in a CPU. And just 79 * like a CPU, we have to dump the pipeline when we find a match (i.e., take a 80 * branch). 81 * 82 * When this happens, we throw away our current state, and do the following prep 83 * to re-enter the loop: 84 * 85 * Pos | Time --> 86 * ----+------------------- 87 * N | H T 88 * N+1 | H 89 * 90 * This is also the work we do at the beginning to enter the loop initially. 91 */ 92 FORCE_INLINE_TEMPLATE size_t 93 ZSTD_compressBlock_fast_noDict_generic( 94 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 95 void const* src, size_t srcSize, 96 U32 const mls, U32 const hasStep) 97 { 98 const ZSTD_compressionParameters* const cParams = &ms->cParams; 99 U32* const hashTable = ms->hashTable; 100 U32 const hlog = cParams->hashLog; 101 /* support stepSize of 0 */ 102 size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2; 103 const BYTE* const base = ms->window.base; 104 const BYTE* const istart = (const BYTE*)src; 105 const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); 106 const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); 107 const BYTE* const prefixStart = base + prefixStartIndex; 108 const BYTE* const iend = istart + srcSize; 109 const BYTE* const ilimit = iend - HASH_READ_SIZE; 110 111 const BYTE* anchor = istart; 112 const BYTE* ip0 = istart; 113 const BYTE* ip1; 114 const BYTE* ip2; 115 const BYTE* ip3; 116 U32 current0; 117 118 U32 rep_offset1 = rep[0]; 119 U32 rep_offset2 = rep[1]; 120 U32 offsetSaved = 0; 121 122 size_t hash0; /* hash for ip0 */ 123 size_t hash1; /* hash for ip1 */ 124 U32 idx; /* match idx for ip0 */ 125 U32 mval; /* src value at match idx */ 126 127 U32 offcode; 128 const BYTE* match0; 129 size_t mLength; 130 131 /* ip0 and ip1 are always adjacent. The targetLength skipping and 132 * uncompressibility acceleration is applied to every other position, 133 * matching the behavior of #1562. step therefore represents the gap 134 * between pairs of positions, from ip0 to ip2 or ip1 to ip3. */ 135 size_t step; 136 const BYTE* nextStep; 137 const size_t kStepIncr = (1 << (kSearchStrength - 1)); 138 139 DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); 140 ip0 += (ip0 == prefixStart); 141 { U32 const curr = (U32)(ip0 - base); 142 U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); 143 U32 const maxRep = curr - windowLow; 144 if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0; 145 if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0; 146 } 147 148 /* start each op */ 149 _start: /* Requires: ip0 */ 150 151 step = stepSize; 152 nextStep = ip0 + kStepIncr; 153 154 /* calculate positions, ip0 - anchor == 0, so we skip step calc */ 155 ip1 = ip0 + 1; 156 ip2 = ip0 + step; 157 ip3 = ip2 + 1; 158 159 if (ip3 >= ilimit) { 160 goto _cleanup; 161 } 162 163 hash0 = ZSTD_hashPtr(ip0, hlog, mls); 164 hash1 = ZSTD_hashPtr(ip1, hlog, mls); 165 166 idx = hashTable[hash0]; 167 168 do { 169 /* load repcode match for ip[2]*/ 170 const U32 rval = MEM_read32(ip2 - rep_offset1); 171 172 /* write back hash table entry */ 173 current0 = (U32)(ip0 - base); 174 hashTable[hash0] = current0; 175 176 /* check repcode at ip[2] */ 177 if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) { 178 ip0 = ip2; 179 match0 = ip0 - rep_offset1; 180 mLength = ip0[-1] == match0[-1]; 181 ip0 -= mLength; 182 match0 -= mLength; 183 offcode = STORE_REPCODE_1; 184 mLength += 4; 185 goto _match; 186 } 187 188 /* load match for ip[0] */ 189 if (idx >= prefixStartIndex) { 190 mval = MEM_read32(base + idx); 191 } else { 192 mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ 193 } 194 195 /* check match at ip[0] */ 196 if (MEM_read32(ip0) == mval) { 197 /* found a match! */ 198 goto _offset; 199 } 200 201 /* lookup ip[1] */ 202 idx = hashTable[hash1]; 203 204 /* hash ip[2] */ 205 hash0 = hash1; 206 hash1 = ZSTD_hashPtr(ip2, hlog, mls); 207 208 /* advance to next positions */ 209 ip0 = ip1; 210 ip1 = ip2; 211 ip2 = ip3; 212 213 /* write back hash table entry */ 214 current0 = (U32)(ip0 - base); 215 hashTable[hash0] = current0; 216 217 /* load match for ip[0] */ 218 if (idx >= prefixStartIndex) { 219 mval = MEM_read32(base + idx); 220 } else { 221 mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ 222 } 223 224 /* check match at ip[0] */ 225 if (MEM_read32(ip0) == mval) { 226 /* found a match! */ 227 goto _offset; 228 } 229 230 /* lookup ip[1] */ 231 idx = hashTable[hash1]; 232 233 /* hash ip[2] */ 234 hash0 = hash1; 235 hash1 = ZSTD_hashPtr(ip2, hlog, mls); 236 237 /* advance to next positions */ 238 ip0 = ip1; 239 ip1 = ip2; 240 ip2 = ip0 + step; 241 ip3 = ip1 + step; 242 243 /* calculate step */ 244 if (ip2 >= nextStep) { 245 step++; 246 PREFETCH_L1(ip1 + 64); 247 PREFETCH_L1(ip1 + 128); 248 nextStep += kStepIncr; 249 } 250 } while (ip3 < ilimit); 251 252 _cleanup: 253 /* Note that there are probably still a couple positions we could search. 254 * However, it seems to be a meaningful performance hit to try to search 255 * them. So let's not. */ 256 257 /* save reps for next block */ 258 rep[0] = rep_offset1 ? rep_offset1 : offsetSaved; 259 rep[1] = rep_offset2 ? rep_offset2 : offsetSaved; 260 261 /* Return the last literals size */ 262 return (size_t)(iend - anchor); 263 264 _offset: /* Requires: ip0, idx */ 265 266 /* Compute the offset code. */ 267 match0 = base + idx; 268 rep_offset2 = rep_offset1; 269 rep_offset1 = (U32)(ip0-match0); 270 offcode = STORE_OFFSET(rep_offset1); 271 mLength = 4; 272 273 /* Count the backwards match length. */ 274 while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) { 275 ip0--; 276 match0--; 277 mLength++; 278 } 279 280 _match: /* Requires: ip0, match0, offcode */ 281 282 /* Count the forward length. */ 283 mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend); 284 285 ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength); 286 287 ip0 += mLength; 288 anchor = ip0; 289 290 /* write next hash table entry */ 291 if (ip1 < ip0) { 292 hashTable[hash1] = (U32)(ip1 - base); 293 } 294 295 /* Fill table and check for immediate repcode. */ 296 if (ip0 <= ilimit) { 297 /* Fill Table */ 298 assert(base+current0+2 > istart); /* check base overflow */ 299 hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ 300 hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); 301 302 if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */ 303 while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) { 304 /* store sequence */ 305 size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4; 306 { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */ 307 hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); 308 ip0 += rLength; 309 ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength); 310 anchor = ip0; 311 continue; /* faster when present (confirmed on gcc-8) ... (?) */ 312 } } } 313 314 goto _start; 315 } 316 317 #define ZSTD_GEN_FAST_FN(dictMode, mls, step) \ 318 static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \ 319 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ 320 void const* src, size_t srcSize) \ 321 { \ 322 return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \ 323 } 324 325 ZSTD_GEN_FAST_FN(noDict, 4, 1) 326 ZSTD_GEN_FAST_FN(noDict, 5, 1) 327 ZSTD_GEN_FAST_FN(noDict, 6, 1) 328 ZSTD_GEN_FAST_FN(noDict, 7, 1) 329 330 ZSTD_GEN_FAST_FN(noDict, 4, 0) 331 ZSTD_GEN_FAST_FN(noDict, 5, 0) 332 ZSTD_GEN_FAST_FN(noDict, 6, 0) 333 ZSTD_GEN_FAST_FN(noDict, 7, 0) 334 335 size_t ZSTD_compressBlock_fast( 336 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 337 void const* src, size_t srcSize) 338 { 339 U32 const mls = ms->cParams.minMatch; 340 assert(ms->dictMatchState == NULL); 341 if (ms->cParams.targetLength > 1) { 342 switch(mls) 343 { 344 default: /* includes case 3 */ 345 case 4 : 346 return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize); 347 case 5 : 348 return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize); 349 case 6 : 350 return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize); 351 case 7 : 352 return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize); 353 } 354 } else { 355 switch(mls) 356 { 357 default: /* includes case 3 */ 358 case 4 : 359 return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize); 360 case 5 : 361 return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize); 362 case 6 : 363 return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize); 364 case 7 : 365 return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize); 366 } 367 368 } 369 } 370 371 FORCE_INLINE_TEMPLATE 372 size_t ZSTD_compressBlock_fast_dictMatchState_generic( 373 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 374 void const* src, size_t srcSize, U32 const mls, U32 const hasStep) 375 { 376 const ZSTD_compressionParameters* const cParams = &ms->cParams; 377 U32* const hashTable = ms->hashTable; 378 U32 const hlog = cParams->hashLog; 379 /* support stepSize of 0 */ 380 U32 const stepSize = cParams->targetLength + !(cParams->targetLength); 381 const BYTE* const base = ms->window.base; 382 const BYTE* const istart = (const BYTE*)src; 383 const BYTE* ip = istart; 384 const BYTE* anchor = istart; 385 const U32 prefixStartIndex = ms->window.dictLimit; 386 const BYTE* const prefixStart = base + prefixStartIndex; 387 const BYTE* const iend = istart + srcSize; 388 const BYTE* const ilimit = iend - HASH_READ_SIZE; 389 U32 offset_1=rep[0], offset_2=rep[1]; 390 U32 offsetSaved = 0; 391 392 const ZSTD_matchState_t* const dms = ms->dictMatchState; 393 const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; 394 const U32* const dictHashTable = dms->hashTable; 395 const U32 dictStartIndex = dms->window.dictLimit; 396 const BYTE* const dictBase = dms->window.base; 397 const BYTE* const dictStart = dictBase + dictStartIndex; 398 const BYTE* const dictEnd = dms->window.nextSrc; 399 const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); 400 const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); 401 const U32 dictHLog = dictCParams->hashLog; 402 403 /* if a dictionary is still attached, it necessarily means that 404 * it is within window size. So we just check it. */ 405 const U32 maxDistance = 1U << cParams->windowLog; 406 const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); 407 assert(endIndex - prefixStartIndex <= maxDistance); 408 (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ 409 410 (void)hasStep; /* not currently specialized on whether it's accelerated */ 411 412 /* ensure there will be no underflow 413 * when translating a dict index into a local index */ 414 assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); 415 416 /* init */ 417 DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); 418 ip += (dictAndPrefixLength == 0); 419 /* dictMatchState repCode checks don't currently handle repCode == 0 420 * disabling. */ 421 assert(offset_1 <= dictAndPrefixLength); 422 assert(offset_2 <= dictAndPrefixLength); 423 424 /* Main Search Loop */ 425 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ 426 size_t mLength; 427 size_t const h = ZSTD_hashPtr(ip, hlog, mls); 428 U32 const curr = (U32)(ip-base); 429 U32 const matchIndex = hashTable[h]; 430 const BYTE* match = base + matchIndex; 431 const U32 repIndex = curr + 1 - offset_1; 432 const BYTE* repMatch = (repIndex < prefixStartIndex) ? 433 dictBase + (repIndex - dictIndexDelta) : 434 base + repIndex; 435 hashTable[h] = curr; /* update hash table */ 436 437 if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ 438 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 439 const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; 440 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; 441 ip++; 442 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); 443 } else if ( (matchIndex <= prefixStartIndex) ) { 444 size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); 445 U32 const dictMatchIndex = dictHashTable[dictHash]; 446 const BYTE* dictMatch = dictBase + dictMatchIndex; 447 if (dictMatchIndex <= dictStartIndex || 448 MEM_read32(dictMatch) != MEM_read32(ip)) { 449 assert(stepSize >= 1); 450 ip += ((ip-anchor) >> kSearchStrength) + stepSize; 451 continue; 452 } else { 453 /* found a dict match */ 454 U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta); 455 mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; 456 while (((ip>anchor) & (dictMatch>dictStart)) 457 && (ip[-1] == dictMatch[-1])) { 458 ip--; dictMatch--; mLength++; 459 } /* catch up */ 460 offset_2 = offset_1; 461 offset_1 = offset; 462 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); 463 } 464 } else if (MEM_read32(match) != MEM_read32(ip)) { 465 /* it's not a match, and we're not going to check the dictionary */ 466 assert(stepSize >= 1); 467 ip += ((ip-anchor) >> kSearchStrength) + stepSize; 468 continue; 469 } else { 470 /* found a regular match */ 471 U32 const offset = (U32)(ip-match); 472 mLength = ZSTD_count(ip+4, match+4, iend) + 4; 473 while (((ip>anchor) & (match>prefixStart)) 474 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 475 offset_2 = offset_1; 476 offset_1 = offset; 477 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); 478 } 479 480 /* match found */ 481 ip += mLength; 482 anchor = ip; 483 484 if (ip <= ilimit) { 485 /* Fill Table */ 486 assert(base+curr+2 > istart); /* check base overflow */ 487 hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */ 488 hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); 489 490 /* check immediate repcode */ 491 while (ip <= ilimit) { 492 U32 const current2 = (U32)(ip-base); 493 U32 const repIndex2 = current2 - offset_2; 494 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? 495 dictBase - dictIndexDelta + repIndex2 : 496 base + repIndex2; 497 if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) 498 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 499 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; 500 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; 501 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ 502 ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); 503 hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; 504 ip += repLength2; 505 anchor = ip; 506 continue; 507 } 508 break; 509 } 510 } 511 } 512 513 /* save reps for next block */ 514 rep[0] = offset_1 ? offset_1 : offsetSaved; 515 rep[1] = offset_2 ? offset_2 : offsetSaved; 516 517 /* Return the last literals size */ 518 return (size_t)(iend - anchor); 519 } 520 521 522 ZSTD_GEN_FAST_FN(dictMatchState, 4, 0) 523 ZSTD_GEN_FAST_FN(dictMatchState, 5, 0) 524 ZSTD_GEN_FAST_FN(dictMatchState, 6, 0) 525 ZSTD_GEN_FAST_FN(dictMatchState, 7, 0) 526 527 size_t ZSTD_compressBlock_fast_dictMatchState( 528 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 529 void const* src, size_t srcSize) 530 { 531 U32 const mls = ms->cParams.minMatch; 532 assert(ms->dictMatchState != NULL); 533 switch(mls) 534 { 535 default: /* includes case 3 */ 536 case 4 : 537 return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize); 538 case 5 : 539 return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize); 540 case 6 : 541 return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize); 542 case 7 : 543 return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize); 544 } 545 } 546 547 548 static size_t ZSTD_compressBlock_fast_extDict_generic( 549 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 550 void const* src, size_t srcSize, U32 const mls, U32 const hasStep) 551 { 552 const ZSTD_compressionParameters* const cParams = &ms->cParams; 553 U32* const hashTable = ms->hashTable; 554 U32 const hlog = cParams->hashLog; 555 /* support stepSize of 0 */ 556 U32 const stepSize = cParams->targetLength + !(cParams->targetLength); 557 const BYTE* const base = ms->window.base; 558 const BYTE* const dictBase = ms->window.dictBase; 559 const BYTE* const istart = (const BYTE*)src; 560 const BYTE* ip = istart; 561 const BYTE* anchor = istart; 562 const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); 563 const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); 564 const U32 dictStartIndex = lowLimit; 565 const BYTE* const dictStart = dictBase + dictStartIndex; 566 const U32 dictLimit = ms->window.dictLimit; 567 const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; 568 const BYTE* const prefixStart = base + prefixStartIndex; 569 const BYTE* const dictEnd = dictBase + prefixStartIndex; 570 const BYTE* const iend = istart + srcSize; 571 const BYTE* const ilimit = iend - 8; 572 U32 offset_1=rep[0], offset_2=rep[1]; 573 574 (void)hasStep; /* not currently specialized on whether it's accelerated */ 575 576 DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); 577 578 /* switch to "regular" variant if extDict is invalidated due to maxDistance */ 579 if (prefixStartIndex == dictStartIndex) 580 return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize); 581 582 /* Search Loop */ 583 while (ip < ilimit) { /* < instead of <=, because (ip+1) */ 584 const size_t h = ZSTD_hashPtr(ip, hlog, mls); 585 const U32 matchIndex = hashTable[h]; 586 const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; 587 const BYTE* match = matchBase + matchIndex; 588 const U32 curr = (U32)(ip-base); 589 const U32 repIndex = curr + 1 - offset_1; 590 const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; 591 const BYTE* const repMatch = repBase + repIndex; 592 hashTable[h] = curr; /* update hash table */ 593 DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr); 594 595 if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ 596 & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */ 597 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 598 const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; 599 size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; 600 ip++; 601 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength); 602 ip += rLength; 603 anchor = ip; 604 } else { 605 if ( (matchIndex < dictStartIndex) || 606 (MEM_read32(match) != MEM_read32(ip)) ) { 607 assert(stepSize >= 1); 608 ip += ((ip-anchor) >> kSearchStrength) + stepSize; 609 continue; 610 } 611 { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; 612 const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; 613 U32 const offset = curr - matchIndex; 614 size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; 615 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 616 offset_2 = offset_1; offset_1 = offset; /* update offset history */ 617 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); 618 ip += mLength; 619 anchor = ip; 620 } } 621 622 if (ip <= ilimit) { 623 /* Fill Table */ 624 hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; 625 hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); 626 /* check immediate repcode */ 627 while (ip <= ilimit) { 628 U32 const current2 = (U32)(ip-base); 629 U32 const repIndex2 = current2 - offset_2; 630 const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; 631 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */ 632 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 633 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; 634 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; 635 { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ 636 ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2); 637 hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; 638 ip += repLength2; 639 anchor = ip; 640 continue; 641 } 642 break; 643 } } } 644 645 /* save reps for next block */ 646 rep[0] = offset_1; 647 rep[1] = offset_2; 648 649 /* Return the last literals size */ 650 return (size_t)(iend - anchor); 651 } 652 653 ZSTD_GEN_FAST_FN(extDict, 4, 0) 654 ZSTD_GEN_FAST_FN(extDict, 5, 0) 655 ZSTD_GEN_FAST_FN(extDict, 6, 0) 656 ZSTD_GEN_FAST_FN(extDict, 7, 0) 657 658 size_t ZSTD_compressBlock_fast_extDict( 659 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 660 void const* src, size_t srcSize) 661 { 662 U32 const mls = ms->cParams.minMatch; 663 switch(mls) 664 { 665 default: /* includes case 3 */ 666 case 4 : 667 return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize); 668 case 5 : 669 return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize); 670 case 6 : 671 return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize); 672 case 7 : 673 return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize); 674 } 675 } 676