Lines Matching +full:4 +full:- +full:byte
1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
6 * This source code is licensed under both the BSD-style license (found in the
9 * You may select, at your option, one of the above-listed licenses.
24 /*-*************************************
31 const BYTE* ip, const BYTE* iend, in ZSTD_updateDUBT()
34 const ZSTD_compressionParameters* const cParams = &ms->cParams; in ZSTD_updateDUBT()
35 U32* const hashTable = ms->hashTable; in ZSTD_updateDUBT()
36 U32 const hashLog = cParams->hashLog; in ZSTD_updateDUBT()
38 U32* const bt = ms->chainTable; in ZSTD_updateDUBT()
39 U32 const btLog = cParams->chainLog - 1; in ZSTD_updateDUBT()
40 U32 const btMask = (1 << btLog) - 1; in ZSTD_updateDUBT()
42 const BYTE* const base = ms->window.base; in ZSTD_updateDUBT()
43 U32 const target = (U32)(ip - base); in ZSTD_updateDUBT()
44 U32 idx = ms->nextToUpdate; in ZSTD_updateDUBT()
48 idx, target, ms->window.dictLimit); in ZSTD_updateDUBT()
52 assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ in ZSTD_updateDUBT()
65 ms->nextToUpdate = target; in ZSTD_updateDUBT()
71 * assumption : curr >= btlow == (curr - btmask)
76 U32 curr, const BYTE* inputEnd, in ZSTD_insertDUBT1()
80 const ZSTD_compressionParameters* const cParams = &ms->cParams; in ZSTD_insertDUBT1()
81 U32* const bt = ms->chainTable; in ZSTD_insertDUBT1()
82 U32 const btLog = cParams->chainLog - 1; in ZSTD_insertDUBT1()
83 U32 const btMask = (1 << btLog) - 1; in ZSTD_insertDUBT1()
85 const BYTE* const base = ms->window.base; in ZSTD_insertDUBT1()
86 const BYTE* const dictBase = ms->window.dictBase; in ZSTD_insertDUBT1()
87 const U32 dictLimit = ms->window.dictLimit; in ZSTD_insertDUBT1()
88 const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr; in ZSTD_insertDUBT1()
89 const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit; in ZSTD_insertDUBT1()
90 const BYTE* const dictEnd = dictBase + dictLimit; in ZSTD_insertDUBT1()
91 const BYTE* const prefixStart = base + dictLimit; in ZSTD_insertDUBT1()
92 const BYTE* match; in ZSTD_insertDUBT1()
97 U32 const windowValid = ms->window.lowLimit; in ZSTD_insertDUBT1()
98 U32 const maxDistance = 1U << cParams->windowLog; in ZSTD_insertDUBT1()
99 U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid; in ZSTD_insertDUBT1()
107 for (; nbCompares && (matchIndex > windowLow); --nbCompares) { in ZSTD_insertDUBT1()
118 const BYTE* const mBase = ( (dictMode != ZSTD_extDict) in ZSTD_insertDUBT1()
167 const BYTE* const ip, const BYTE* const iend, in ZSTD_DUBT_findBetterDictMatch()
174 const ZSTD_MatchState_t * const dms = ms->dictMatchState; in ZSTD_DUBT_findBetterDictMatch()
175 const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; in ZSTD_DUBT_findBetterDictMatch()
176 const U32 * const dictHashTable = dms->hashTable; in ZSTD_DUBT_findBetterDictMatch()
177 U32 const hashLog = dmsCParams->hashLog; in ZSTD_DUBT_findBetterDictMatch()
181 const BYTE* const base = ms->window.base; in ZSTD_DUBT_findBetterDictMatch()
182 const BYTE* const prefixStart = base + ms->window.dictLimit; in ZSTD_DUBT_findBetterDictMatch()
183 U32 const curr = (U32)(ip-base); in ZSTD_DUBT_findBetterDictMatch()
184 const BYTE* const dictBase = dms->window.base; in ZSTD_DUBT_findBetterDictMatch()
185 const BYTE* const dictEnd = dms->window.nextSrc; in ZSTD_DUBT_findBetterDictMatch()
186 U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); in ZSTD_DUBT_findBetterDictMatch()
187 U32 const dictLowLimit = dms->window.lowLimit; in ZSTD_DUBT_findBetterDictMatch()
188 U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; in ZSTD_DUBT_findBetterDictMatch()
190 U32* const dictBt = dms->chainTable; in ZSTD_DUBT_findBetterDictMatch()
191 U32 const btLog = dmsCParams->chainLog - 1; in ZSTD_DUBT_findBetterDictMatch()
192 U32 const btMask = (1 << btLog) - 1; in ZSTD_DUBT_findBetterDictMatch()
193 …U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit … in ZSTD_DUBT_findBetterDictMatch()
200 for (; nbCompares && (dictMatchIndex > dictLowLimit); --nbCompares) { in ZSTD_DUBT_findBetterDictMatch()
203 const BYTE* match = dictBase + dictMatchIndex; in ZSTD_DUBT_findBetterDictMatch()
210 …if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32(… in ZSTD_DUBT_findBetterDictMatch()
211 …STD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dic… in ZSTD_DUBT_findBetterDictMatch()
212 … (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, OFFSET_TO_OFFBASE(curr - matchIndex), dictMat… in ZSTD_DUBT_findBetterDictMatch()
213 bestLength = matchLength, *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); in ZSTD_DUBT_findBetterDictMatch()
233 U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offsetPtr); (void)mIndex; in ZSTD_DUBT_findBetterDictMatch()
245 const BYTE* const ip, const BYTE* const iend, in ZSTD_DUBT_findBestMatch()
250 const ZSTD_compressionParameters* const cParams = &ms->cParams; in ZSTD_DUBT_findBestMatch()
251 U32* const hashTable = ms->hashTable; in ZSTD_DUBT_findBestMatch()
252 U32 const hashLog = cParams->hashLog; in ZSTD_DUBT_findBestMatch()
256 const BYTE* const base = ms->window.base; in ZSTD_DUBT_findBestMatch()
257 U32 const curr = (U32)(ip-base); in ZSTD_DUBT_findBestMatch()
258 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); in ZSTD_DUBT_findBestMatch()
260 U32* const bt = ms->chainTable; in ZSTD_DUBT_findBestMatch()
261 U32 const btLog = cParams->chainLog - 1; in ZSTD_DUBT_findBestMatch()
262 U32 const btMask = (1 << btLog) - 1; in ZSTD_DUBT_findBestMatch()
263 U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; in ZSTD_DUBT_findBestMatch()
268 U32 nbCompares = 1U << cParams->searchLog; in ZSTD_DUBT_findBestMatch()
273 assert(ip <= iend-8); /* required for h calculation */ in ZSTD_DUBT_findBestMatch()
287 nbCandidates --; in ZSTD_DUBT_findBestMatch()
312 const BYTE* const dictBase = ms->window.dictBase; in ZSTD_DUBT_findBestMatch()
313 const U32 dictLimit = ms->window.dictLimit; in ZSTD_DUBT_findBestMatch()
314 const BYTE* const dictEnd = dictBase + dictLimit; in ZSTD_DUBT_findBestMatch()
315 const BYTE* const prefixStart = base + dictLimit; in ZSTD_DUBT_findBestMatch()
325 for (; nbCompares && (matchIndex > windowLow); --nbCompares) { in ZSTD_DUBT_findBestMatch()
328 const BYTE* match; in ZSTD_DUBT_findBestMatch()
341 if (matchLength > matchEndIdx - matchIndex) in ZSTD_DUBT_findBestMatch()
343 …if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbi… in ZSTD_DUBT_findBestMatch()
344 bestLength = matchLength, *offBasePtr = OFFSET_TO_OFFBASE(curr - matchIndex); in ZSTD_DUBT_findBestMatch()
382 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ in ZSTD_DUBT_findBestMatch()
384 U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offBasePtr); (void)mIndex; in ZSTD_DUBT_findBestMatch()
397 const BYTE* const ip, const BYTE* const iLimit, in ZSTD_BtFindBestMatch()
403 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ in ZSTD_BtFindBestMatch()
412 void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_MatchState_t* ms, const BYTE* const ip) in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
414 const BYTE* const base = ms->window.base; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
415 U32 const target = (U32)(ip - base); in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
416 U32* const hashTable = ms->hashTable; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
417 U32* const chainTable = ms->chainTable; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
418 U32 const chainSize = 1 << ms->cParams.chainLog; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
419 U32 idx = ms->nextToUpdate; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
420 U32 const minChain = chainSize < target - idx ? target - chainSize : idx; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
422 U32 const cacheSize = bucketSize - 1; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
423 U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
431 U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
434 U32 const tmpChainSize = (U32)((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
435 U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
438 assert(ms->cParams.chainLog <= 24); in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
439 assert(ms->cParams.hashLog > ms->cParams.chainLog); in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
445 U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch); in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
447 tmpChainTable[idx - tmpMinChain] = hashTable[h]; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
465 i = tmpChainTable[i - tmpMinChain]; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
487 i = tmpChainTable[i - tmpMinChain]; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
493 tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
503 U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
509 hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
513 for (idx = ms->nextToUpdate; idx < target; idx++) { in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
514 U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch) in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
518 for (i = cacheSize - 1; i; i--) in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
519 hashTable[h + i] = hashTable[h + i - 1]; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
523 ms->nextToUpdate = target; in ZSTD_dedicatedDictSearch_lazy_loadDictionary()
532 const BYTE* const ip, const BYTE* const iLimit, in ZSTD_dedicatedDictSearch_lazy_search()
533 const BYTE* const prefixStart, const U32 curr, in ZSTD_dedicatedDictSearch_lazy_search()
535 const U32 ddsLowestIndex = dms->window.dictLimit; in ZSTD_dedicatedDictSearch_lazy_search()
536 const BYTE* const ddsBase = dms->window.base; in ZSTD_dedicatedDictSearch_lazy_search()
537 const BYTE* const ddsEnd = dms->window.nextSrc; in ZSTD_dedicatedDictSearch_lazy_search()
538 const U32 ddsSize = (U32)(ddsEnd - ddsBase); in ZSTD_dedicatedDictSearch_lazy_search()
539 const U32 ddsIndexDelta = dictLimit - ddsSize; in ZSTD_dedicatedDictSearch_lazy_search()
541 const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; in ZSTD_dedicatedDictSearch_lazy_search()
545 for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { in ZSTD_dedicatedDictSearch_lazy_search()
546 PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); in ZSTD_dedicatedDictSearch_lazy_search()
550 U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; in ZSTD_dedicatedDictSearch_lazy_search()
553 PREFETCH_L1(&dms->chainTable[chainIndex]); in ZSTD_dedicatedDictSearch_lazy_search()
558 const BYTE* match; in ZSTD_dedicatedDictSearch_lazy_search()
559 matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; in ZSTD_dedicatedDictSearch_lazy_search()
569 assert(match+4 <= ddsEnd); in ZSTD_dedicatedDictSearch_lazy_search()
571 /* assumption : matchIndex <= dictLimit-4 (by table construction) */ in ZSTD_dedicatedDictSearch_lazy_search()
572 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; in ZSTD_dedicatedDictSearch_lazy_search()
578 *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta)); in ZSTD_dedicatedDictSearch_lazy_search()
587 U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; in ZSTD_dedicatedDictSearch_lazy_search()
590 U32 const chainAttempts = nbAttempts - ddsAttempt; in ZSTD_dedicatedDictSearch_lazy_search()
595 PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); in ZSTD_dedicatedDictSearch_lazy_search()
600 const BYTE* match; in ZSTD_dedicatedDictSearch_lazy_search()
601 matchIndex = dms->chainTable[chainIndex]; in ZSTD_dedicatedDictSearch_lazy_search()
606 assert(match+4 <= ddsEnd); in ZSTD_dedicatedDictSearch_lazy_search()
608 /* assumption : matchIndex <= dictLimit-4 (by table construction) */ in ZSTD_dedicatedDictSearch_lazy_search()
609 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; in ZSTD_dedicatedDictSearch_lazy_search()
615 *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta)); in ZSTD_dedicatedDictSearch_lazy_search()
636 const BYTE* ip, U32 const mls, U32 const lazySkipping) in ZSTD_insertAndFindFirstIndex_internal()
638 U32* const hashTable = ms->hashTable; in ZSTD_insertAndFindFirstIndex_internal()
639 const U32 hashLog = cParams->hashLog; in ZSTD_insertAndFindFirstIndex_internal()
640 U32* const chainTable = ms->chainTable; in ZSTD_insertAndFindFirstIndex_internal()
641 const U32 chainMask = (1 << cParams->chainLog) - 1; in ZSTD_insertAndFindFirstIndex_internal()
642 const BYTE* const base = ms->window.base; in ZSTD_insertAndFindFirstIndex_internal()
643 const U32 target = (U32)(ip - base); in ZSTD_insertAndFindFirstIndex_internal()
644 U32 idx = ms->nextToUpdate; in ZSTD_insertAndFindFirstIndex_internal()
656 ms->nextToUpdate = target; in ZSTD_insertAndFindFirstIndex_internal()
660 U32 ZSTD_insertAndFindFirstIndex(ZSTD_MatchState_t* ms, const BYTE* ip) { in ZSTD_insertAndFindFirstIndex()
661 const ZSTD_compressionParameters* const cParams = &ms->cParams; in ZSTD_insertAndFindFirstIndex()
662 …return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkippin… in ZSTD_insertAndFindFirstIndex()
670 const BYTE* const ip, const BYTE* const iLimit, in ZSTD_HcFindBestMatch()
674 const ZSTD_compressionParameters* const cParams = &ms->cParams; in ZSTD_HcFindBestMatch()
675 U32* const chainTable = ms->chainTable; in ZSTD_HcFindBestMatch()
676 const U32 chainSize = (1 << cParams->chainLog); in ZSTD_HcFindBestMatch()
677 const U32 chainMask = chainSize-1; in ZSTD_HcFindBestMatch()
678 const BYTE* const base = ms->window.base; in ZSTD_HcFindBestMatch()
679 const BYTE* const dictBase = ms->window.dictBase; in ZSTD_HcFindBestMatch()
680 const U32 dictLimit = ms->window.dictLimit; in ZSTD_HcFindBestMatch()
681 const BYTE* const prefixStart = base + dictLimit; in ZSTD_HcFindBestMatch()
682 const BYTE* const dictEnd = dictBase + dictLimit; in ZSTD_HcFindBestMatch()
683 const U32 curr = (U32)(ip-base); in ZSTD_HcFindBestMatch()
684 const U32 maxDistance = 1U << cParams->windowLog; in ZSTD_HcFindBestMatch()
685 const U32 lowestValid = ms->window.lowLimit; in ZSTD_HcFindBestMatch()
686 …const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestVali… in ZSTD_HcFindBestMatch()
687 const U32 isDictionary = (ms->loadedDictEnd != 0); in ZSTD_HcFindBestMatch()
689 const U32 minChain = curr > chainSize ? curr - chainSize : 0; in ZSTD_HcFindBestMatch()
690 U32 nbAttempts = 1U << cParams->searchLog; in ZSTD_HcFindBestMatch()
691 size_t ml=4-1; in ZSTD_HcFindBestMatch()
693 const ZSTD_MatchState_t* const dms = ms->dictMatchState; in ZSTD_HcFindBestMatch()
695 ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0; in ZSTD_HcFindBestMatch()
702 const U32* entry = &dms->hashTable[ddsIdx]; in ZSTD_HcFindBestMatch()
707 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping); in ZSTD_HcFindBestMatch()
709 for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) { in ZSTD_HcFindBestMatch()
712 const BYTE* const match = base + matchIndex; in ZSTD_HcFindBestMatch()
714 /* read 4B starting from (match + ml + 1 - sizeof(U32)) */ in ZSTD_HcFindBestMatch()
715 if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */ in ZSTD_HcFindBestMatch()
718 const BYTE* const match = dictBase + matchIndex; in ZSTD_HcFindBestMatch()
719 assert(match+4 <= dictEnd); in ZSTD_HcFindBestMatch()
720 …if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table con… in ZSTD_HcFindBestMatch()
721 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; in ZSTD_HcFindBestMatch()
727 *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); in ZSTD_HcFindBestMatch()
740 const U32* const dmsChainTable = dms->chainTable; in ZSTD_HcFindBestMatch()
741 const U32 dmsChainSize = (1 << dms->cParams.chainLog); in ZSTD_HcFindBestMatch()
742 const U32 dmsChainMask = dmsChainSize - 1; in ZSTD_HcFindBestMatch()
743 const U32 dmsLowestIndex = dms->window.dictLimit; in ZSTD_HcFindBestMatch()
744 const BYTE* const dmsBase = dms->window.base; in ZSTD_HcFindBestMatch()
745 const BYTE* const dmsEnd = dms->window.nextSrc; in ZSTD_HcFindBestMatch()
746 const U32 dmsSize = (U32)(dmsEnd - dmsBase); in ZSTD_HcFindBestMatch()
747 const U32 dmsIndexDelta = dictLimit - dmsSize; in ZSTD_HcFindBestMatch()
748 const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; in ZSTD_HcFindBestMatch()
750 matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; in ZSTD_HcFindBestMatch()
752 for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { in ZSTD_HcFindBestMatch()
754 const BYTE* const match = dmsBase + matchIndex; in ZSTD_HcFindBestMatch()
755 assert(match+4 <= dmsEnd); in ZSTD_HcFindBestMatch()
756 …if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table con… in ZSTD_HcFindBestMatch()
757 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; in ZSTD_HcFindBestMatch()
763 *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta)); in ZSTD_HcFindBestMatch()
777 * (SIMD) Row-based matchfinder
779 /* Constants for row-based hash */
780 #define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
783 #define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1)
788 * Starting from the LSB, returns the idx of the next non-zero bit.
799 FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) { in ZSTD_row_nextIndex()
800 U32 next = (*tagRow-1) & rowMask; in ZSTD_row_nextIndex()
802 *tagRow = (BYTE)next; in ZSTD_row_nextIndex()
810 assert((align & (align - 1)) == 0); in ZSTD_isAligned()
811 return (((size_t)ptr) & (align - 1)) == 0; in ZSTD_isAligned()
817 FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const … in ZSTD_row_prefetch()
821 … /* Note: prefetching more of the hash table does not appear to be beneficial for 128-entry rows */ in ZSTD_row_prefetch()
827 assert(rowLog == 4 || rowLog == 5 || rowLog == 6); in ZSTD_row_prefetch()
828 …sAligned(hashTable + relRow, 64)); /* prefetched hash row always 64-byte aligned */ in ZSTD_row_prefetch()
838 void ZSTD_row_fillHashCache(ZSTD_MatchState_t* ms, const BYTE* base, in ZSTD_row_fillHashCache()
840 U32 idx, const BYTE* const iLimit) in ZSTD_row_fillHashCache()
842 U32 const* const hashTable = ms->hashTable; in ZSTD_row_fillHashCache()
843 BYTE const* const tagTable = ms->tagTable; in ZSTD_row_fillHashCache()
844 U32 const hashLog = ms->rowHashLog; in ZSTD_row_fillHashCache()
845 U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1); in ZSTD_row_fillHashCache()
849 …st hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt); in ZSTD_row_fillHashCache()
852 ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash; in ZSTD_row_fillHashCache()
855 …DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache… in ZSTD_row_fillHashCache()
856 … ms->hashCache[2], ms->hashCache[3], ms->hashCache[4], in ZSTD_row_fillHashCache()
857 … ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]); in ZSTD_row_fillHashCache()
861 * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
867 BYTE const* tagTable, BYTE const* base, in ZSTD_row_nextCachedHash()
891 U32* const hashTable = ms->hashTable; in ZSTD_row_update_internalImpl()
892 BYTE* const tagTable = ms->tagTable; in ZSTD_row_update_internalImpl()
893 U32 const hashLog = ms->rowHashLog; in ZSTD_row_update_internalImpl()
894 const BYTE* const base = ms->window.base; in ZSTD_row_update_internalImpl()
898 …seCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLo… in ZSTD_row_update_internalImpl()
899 …U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt); in ZSTD_row_update_internalImpl()
902 BYTE* tagRow = tagTable + relRow; in ZSTD_row_update_internalImpl()
905 …== ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt)); in ZSTD_row_update_internalImpl()
912 …* Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUp…
917 void ZSTD_row_update_internal(ZSTD_MatchState_t* ms, const BYTE* ip, in ZSTD_row_update_internal()
921 U32 idx = ms->nextToUpdate; in ZSTD_row_update_internal()
922 const BYTE* const base = ms->window.base; in ZSTD_row_update_internal()
923 const U32 target = (U32)(ip - base); in ZSTD_row_update_internal()
934 if (UNLIKELY(target - idx > kSkipThreshold)) { in ZSTD_row_update_internal()
937 idx = target - kMaxMatchEndPositionsToUpdate; in ZSTD_row_update_internal()
943 ms->nextToUpdate = target; in ZSTD_row_update_internal()
950 void ZSTD_row_update(ZSTD_MatchState_t* const ms, const BYTE* ip) { in ZSTD_row_update()
951 const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); in ZSTD_row_update()
952 const U32 rowMask = (1u << rowLog) - 1; in ZSTD_row_update()
953 const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */); in ZSTD_row_update()
975 return 4; in ZSTD_row_matchMaskGroupWidth()
989 ZSTD_row_getSSEMask(int nbChunks, const BYTE* const src, const BYTE tag, const U32 head) in ZSTD_row_getSSEMask()
992 int matches[4] = {0}; in ZSTD_row_getSSEMask()
994 assert(nbChunks == 1 || nbChunks == 2 || nbChunks == 4); in ZSTD_row_getSSEMask()
1002 assert(nbChunks == 4); in ZSTD_row_getSSEMask()
1009 ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag, const U32 headGro… in ZSTD_row_getNEONMask()
1013 /* vshrn_n_u16 shifts by 4 every u16 and narrows to 8 lower bits. in ZSTD_row_getNEONMask()
1014 * After that groups of 4 bits represent the equalMask. We lower in ZSTD_row_getNEONMask()
1020 const uint8x8_t res = vshrn_n_u16(equalMask, 4); in ZSTD_row_getNEONMask()
1033 const uint8x8_t res = vsli_n_u8(t0, t1, 4); in ZSTD_row_getNEONMask()
1047 const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4); in ZSTD_row_getNEONMask()
1048 const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4); in ZSTD_row_getNEONMask()
1056 * ZSTD_row_matchMaskGroupWidth) of bits set to 1 if the newly-computed "tag"
1062 ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 ro… in ZSTD_row_getMatchMask()
1064 const BYTE* const src = tagRow; in ZSTD_row_getMatchMask()
1073 #else /* SW or NEON-LE */ in ZSTD_row_getMatchMask()
1076 /* This NEON path only works for little endian - otherwise use SWAR below */ in ZSTD_row_getMatchMask()
1083 const size_t shiftAmount = ((chunkSize * 8) - chunkSize); in ZSTD_row_getMatchMask()
1089 int i = rowEntries - chunkSize; in ZSTD_row_getMatchMask()
1090 assert((sizeof(size_t) == 4) || (sizeof(size_t) == 8)); in ZSTD_row_getMatchMask()
1096 chunk = (((chunk | x80) - x01) | chunk) & x80; in ZSTD_row_getMatchMask()
1099 i -= chunkSize; in ZSTD_row_getMatchMask()
1107 chunk = (((chunk | x80) - x01) | chunk) & x80; in ZSTD_row_getMatchMask()
1110 i -= chunkSize; in ZSTD_row_getMatchMask()
1125 /* The high-level approach of the SIMD row based match finder is as follows:
1126 * - Figure out where to insert the new entry:
1127 …* - Generate a hash for current input position and split it into a one byte of tag and `rowHa…
1128 …* - The hash is salted by a value that changes on every context reset, so when the same …
1130 …* - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and …
1132 …* - Determine the correct position within the row to insert the entry into. Each row of 15 or…
1135 …* - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the …
1137 * - Pick the longest match.
1138 * - Insert the tag into the equivalent row and position in the tagTable.
1144 const BYTE* const ip, const BYTE* const iLimit, in ZSTD_RowFindBestMatch()
1149 U32* const hashTable = ms->hashTable; in ZSTD_RowFindBestMatch()
1150 BYTE* const tagTable = ms->tagTable; in ZSTD_RowFindBestMatch()
1151 U32* const hashCache = ms->hashCache; in ZSTD_RowFindBestMatch()
1152 const U32 hashLog = ms->rowHashLog; in ZSTD_RowFindBestMatch()
1153 const ZSTD_compressionParameters* const cParams = &ms->cParams; in ZSTD_RowFindBestMatch()
1154 const BYTE* const base = ms->window.base; in ZSTD_RowFindBestMatch()
1155 const BYTE* const dictBase = ms->window.dictBase; in ZSTD_RowFindBestMatch()
1156 const U32 dictLimit = ms->window.dictLimit; in ZSTD_RowFindBestMatch()
1157 const BYTE* const prefixStart = base + dictLimit; in ZSTD_RowFindBestMatch()
1158 const BYTE* const dictEnd = dictBase + dictLimit; in ZSTD_RowFindBestMatch()
1159 const U32 curr = (U32)(ip-base); in ZSTD_RowFindBestMatch()
1160 const U32 maxDistance = 1U << cParams->windowLog; in ZSTD_RowFindBestMatch()
1161 const U32 lowestValid = ms->window.lowLimit; in ZSTD_RowFindBestMatch()
1162 …const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestVali… in ZSTD_RowFindBestMatch()
1163 const U32 isDictionary = (ms->loadedDictEnd != 0); in ZSTD_RowFindBestMatch()
1166 const U32 rowMask = rowEntries - 1; in ZSTD_RowFindBestMatch()
1167 …const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb ent… in ZSTD_RowFindBestMatch()
1169 const U64 hashSalt = ms->hashSalt; in ZSTD_RowFindBestMatch()
1171 size_t ml=4-1; in ZSTD_RowFindBestMatch()
1175 const ZSTD_MatchState_t* const dms = ms->dictMatchState; in ZSTD_RowFindBestMatch()
1182 BYTE* dmsTagRow = NULL; in ZSTD_RowFindBestMatch()
1185 const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; in ZSTD_RowFindBestMatch()
1188 PREFETCH_L1(&dms->hashTable[ddsIdx]); in ZSTD_RowFindBestMatch()
1190 ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0; in ZSTD_RowFindBestMatch()
1195 U32* const dmsHashTable = dms->hashTable; in ZSTD_RowFindBestMatch()
1196 BYTE* const dmsTagTable = dms->tagTable; in ZSTD_RowFindBestMatch()
1197 U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls); in ZSTD_RowFindBestMatch()
1200 dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow); in ZSTD_RowFindBestMatch()
1206 if (!ms->lazySkipping) { in ZSTD_RowFindBestMatch()
1214 ms->nextToUpdate = curr; in ZSTD_RowFindBestMatch()
1216 ms->hashSaltEntropy += hash; /* collect salt entropy */ in ZSTD_RowFindBestMatch()
1222 BYTE* tagRow = (BYTE*)(tagTable + relRow); in ZSTD_RowFindBestMatch()
1227 ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries); in ZSTD_RowFindBestMatch()
1230 for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) { in ZSTD_RowFindBestMatch()
1243 --nbAttempts; in ZSTD_RowFindBestMatch()
1246 …/* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the… in ZSTD_RowFindBestMatch()
1250 tagRow[pos] = (BYTE)tag; in ZSTD_RowFindBestMatch()
1251 row[pos] = ms->nextToUpdate++; in ZSTD_RowFindBestMatch()
1262 const BYTE* const match = base + matchIndex; in ZSTD_RowFindBestMatch()
1264 /* read 4B starting from (match + ml + 1 - sizeof(U32)) */ in ZSTD_RowFindBestMatch()
1265 … if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */ in ZSTD_RowFindBestMatch()
1268 const BYTE* const match = dictBase + matchIndex; in ZSTD_RowFindBestMatch()
1269 assert(match+4 <= dictEnd); in ZSTD_RowFindBestMatch()
1270 …_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table constructi… in ZSTD_RowFindBestMatch()
1271 … currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; in ZSTD_RowFindBestMatch()
1277 *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); in ZSTD_RowFindBestMatch()
1289 const U32 dmsLowestIndex = dms->window.dictLimit; in ZSTD_RowFindBestMatch()
1290 const BYTE* const dmsBase = dms->window.base; in ZSTD_RowFindBestMatch()
1291 const BYTE* const dmsEnd = dms->window.nextSrc; in ZSTD_RowFindBestMatch()
1292 const U32 dmsSize = (U32)(dmsEnd - dmsBase); in ZSTD_RowFindBestMatch()
1293 const U32 dmsIndexDelta = dictLimit - dmsSize; in ZSTD_RowFindBestMatch()
1299 … ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries); in ZSTD_RowFindBestMatch()
1301 for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) { in ZSTD_RowFindBestMatch()
1309 --nbAttempts; in ZSTD_RowFindBestMatch()
1319 { const BYTE* const match = dmsBase + matchIndex; in ZSTD_RowFindBestMatch()
1320 assert(match+4 <= dmsEnd); in ZSTD_RowFindBestMatch()
1322 … currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; in ZSTD_RowFindBestMatch()
1328 *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta)); in ZSTD_RowFindBestMatch()
1349 * TODO: Move the match re-winding into searchMax. This improves compression
1352 * TODO: Try moving the repcode search into searchMax. After the re-winding
1369 const BYTE* ip, const BYTE* const iLimit, \
1372 assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
1379 const BYTE* ip, const BYTE* const iLimit, \
1382 assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
1389 const BYTE* ip, const BYTE* const iLimit, \
1392 assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
1393 assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog); \
1398 X(dictMode, mls, 4) \
1403 ZSTD_FOR_EACH_ROWLOG(X, dictMode, 4) \
1408 X(dictMode, 4) \
1480 * @param mls The minimum search length, in the range [4, 6].
1481 * @param rowLog The row log (if applicable), in the range [4, 6].
1490 const BYTE* ip, in ZSTD_searchMax()
1491 const BYTE* iend, in ZSTD_searchMax()
1512 * Common parser - lazy strategy
1524 const BYTE* const istart = (const BYTE*)src; in ZSTD_compressBlock_lazy_generic()
1525 const BYTE* ip = istart; in ZSTD_compressBlock_lazy_generic()
1526 const BYTE* anchor = istart; in ZSTD_compressBlock_lazy_generic()
1527 const BYTE* const iend = istart + srcSize; in ZSTD_compressBlock_lazy_generic()
1528 …const BYTE* const ilimit = (searchMethod == search_rowHash) ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE … in ZSTD_compressBlock_lazy_generic()
1529 const BYTE* const base = ms->window.base; in ZSTD_compressBlock_lazy_generic()
1530 const U32 prefixLowestIndex = ms->window.dictLimit; in ZSTD_compressBlock_lazy_generic()
1531 const BYTE* const prefixLowest = base + prefixLowestIndex; in ZSTD_compressBlock_lazy_generic()
1532 const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6); in ZSTD_compressBlock_lazy_generic()
1533 const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); in ZSTD_compressBlock_lazy_generic()
1541 const ZSTD_MatchState_t* const dms = ms->dictMatchState; in ZSTD_compressBlock_lazy_generic()
1542 const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0; in ZSTD_compressBlock_lazy_generic()
1543 const BYTE* const dictBase = isDxS ? dms->window.base : NULL; in ZSTD_compressBlock_lazy_generic()
1544 const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL; in ZSTD_compressBlock_lazy_generic()
1545 const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL; in ZSTD_compressBlock_lazy_generic()
1547 prefixLowestIndex - (U32)(dictEnd - dictBase) : in ZSTD_compressBlock_lazy_generic()
1549 const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); in ZSTD_compressBlock_lazy_generic()
1554 U32 const curr = (U32)(ip - base); in ZSTD_compressBlock_lazy_generic()
1555 U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog); in ZSTD_compressBlock_lazy_generic()
1556 U32 const maxRep = curr - windowLow; in ZSTD_compressBlock_lazy_generic()
1568 ms->lazySkipping = 0; in ZSTD_compressBlock_lazy_generic()
1571 ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); in ZSTD_compressBlock_lazy_generic()
1577 * code alignment is perturbed. To fix the instability align the loop on 32-bytes. in ZSTD_compressBlock_lazy_generic()
1584 const BYTE* start=ip+1; in ZSTD_compressBlock_lazy_generic()
1589 const U32 repIndex = (U32)(ip - base) + 1 - offset_1; in ZSTD_compressBlock_lazy_generic()
1590 … const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) in ZSTD_compressBlock_lazy_generic()
1592 dictBase + (repIndex - dictIndexDelta) : in ZSTD_compressBlock_lazy_generic()
1596 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; in ZSTD_compressBlock_lazy_generic()
1597 … matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; in ZSTD_compressBlock_lazy_generic()
1602 && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { in ZSTD_compressBlock_lazy_generic()
1603 matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; in ZSTD_compressBlock_lazy_generic()
1614 if (matchLength < 4) { in ZSTD_compressBlock_lazy_generic()
1615 …size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompress… in ZSTD_compressBlock_lazy_generic()
1624 ms->lazySkipping = step > kLazySkippingStep; in ZSTD_compressBlock_lazy_generic()
1634 && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { in ZSTD_compressBlock_lazy_generic()
1635 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; in ZSTD_compressBlock_lazy_generic()
1637 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1); in ZSTD_compressBlock_lazy_generic()
1638 if ((mlRep >= 4) && (gain2 > gain1)) in ZSTD_compressBlock_lazy_generic()
1642 const U32 repIndex = (U32)(ip - base) - offset_1; in ZSTD_compressBlock_lazy_generic()
1643 const BYTE* repMatch = repIndex < prefixLowestIndex ? in ZSTD_compressBlock_lazy_generic()
1644 dictBase + (repIndex - dictIndexDelta) : in ZSTD_compressBlock_lazy_generic()
1648 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; in ZSTD_compressBlock_lazy_generic()
1649 … size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; in ZSTD_compressBlock_lazy_generic()
1651 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1); in ZSTD_compressBlock_lazy_generic()
1652 if ((mlRep >= 4) && (gain2 > gain1)) in ZSTD_compressBlock_lazy_generic()
1658 … int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ in ZSTD_compressBlock_lazy_generic()
1659 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4); in ZSTD_compressBlock_lazy_generic()
1660 if ((ml2 >= 4) && (gain2 > gain1)) { in ZSTD_compressBlock_lazy_generic()
1670 && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { in ZSTD_compressBlock_lazy_generic()
1671 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; in ZSTD_compressBlock_lazy_generic()
1672 int const gain2 = (int)(mlRep * 4); in ZSTD_compressBlock_lazy_generic()
1673 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1); in ZSTD_compressBlock_lazy_generic()
1674 if ((mlRep >= 4) && (gain2 > gain1)) in ZSTD_compressBlock_lazy_generic()
1678 const U32 repIndex = (U32)(ip - base) - offset_1; in ZSTD_compressBlock_lazy_generic()
1679 const BYTE* repMatch = repIndex < prefixLowestIndex ? in ZSTD_compressBlock_lazy_generic()
1680 dictBase + (repIndex - dictIndexDelta) : in ZSTD_compressBlock_lazy_generic()
1684 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; in ZSTD_compressBlock_lazy_generic()
1685 … size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; in ZSTD_compressBlock_lazy_generic()
1686 int const gain2 = (int)(mlRep * 4); in ZSTD_compressBlock_lazy_generic()
1687 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1); in ZSTD_compressBlock_lazy_generic()
1688 if ((mlRep >= 4) && (gain2 > gain1)) in ZSTD_compressBlock_lazy_generic()
1694 … int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ in ZSTD_compressBlock_lazy_generic()
1695 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7); in ZSTD_compressBlock_lazy_generic()
1696 if ((ml2 >= 4) && (gain2 > gain1)) { in ZSTD_compressBlock_lazy_generic()
1704 * Pay attention that `start[-value]` can lead to strange undefined behavior in ZSTD_compressBlock_lazy_generic()
1705 * notably if `value` is unsigned, resulting in a large positive `-value`. in ZSTD_compressBlock_lazy_generic()
1710 while ( ((start > anchor) & (start - OFFBASE_TO_OFFSET(offBase) > prefixLowest)) in ZSTD_compressBlock_lazy_generic()
1711 …&& (start[-1] == (start-OFFBASE_TO_OFFSET(offBase))[-1]) ) /* only search for offset within prefi… in ZSTD_compressBlock_lazy_generic()
1712 { start--; matchLength++; } in ZSTD_compressBlock_lazy_generic()
1715 U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase)); in ZSTD_compressBlock_lazy_generic()
1716 …const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : ba… in ZSTD_compressBlock_lazy_generic()
1717 … const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; in ZSTD_compressBlock_lazy_generic()
1718 …while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLeng… in ZSTD_compressBlock_lazy_generic()
1724 { size_t const litLength = (size_t)(start - anchor); in ZSTD_compressBlock_lazy_generic()
1728 if (ms->lazySkipping) { in ZSTD_compressBlock_lazy_generic()
1731 ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); in ZSTD_compressBlock_lazy_generic()
1733 ms->lazySkipping = 0; in ZSTD_compressBlock_lazy_generic()
1739 U32 const current2 = (U32)(ip-base); in ZSTD_compressBlock_lazy_generic()
1740 U32 const repIndex = current2 - offset_2; in ZSTD_compressBlock_lazy_generic()
1741 const BYTE* repMatch = repIndex < prefixLowestIndex ? in ZSTD_compressBlock_lazy_generic()
1742 dictBase - dictIndexDelta + repIndex : in ZSTD_compressBlock_lazy_generic()
1746 const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; in ZSTD_compressBlock_lazy_generic()
1747 … matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; in ZSTD_compressBlock_lazy_generic()
1760 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { in ZSTD_compressBlock_lazy_generic()
1762 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; in ZSTD_compressBlock_lazy_generic()
1779 return (size_t)(iend - anchor); in ZSTD_compressBlock_lazy_generic()
1944 const BYTE* const istart = (const BYTE*)src; in ZSTD_compressBlock_lazy_extDict_generic()
1945 const BYTE* ip = istart; in ZSTD_compressBlock_lazy_extDict_generic()
1946 const BYTE* anchor = istart; in ZSTD_compressBlock_lazy_extDict_generic()
1947 const BYTE* const iend = istart + srcSize; in ZSTD_compressBlock_lazy_extDict_generic()
1948 …const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : … in ZSTD_compressBlock_lazy_extDict_generic()
1949 const BYTE* const base = ms->window.base; in ZSTD_compressBlock_lazy_extDict_generic()
1950 const U32 dictLimit = ms->window.dictLimit; in ZSTD_compressBlock_lazy_extDict_generic()
1951 const BYTE* const prefixStart = base + dictLimit; in ZSTD_compressBlock_lazy_extDict_generic()
1952 const BYTE* const dictBase = ms->window.dictBase; in ZSTD_compressBlock_lazy_extDict_generic()
1953 const BYTE* const dictEnd = dictBase + dictLimit; in ZSTD_compressBlock_lazy_extDict_generic()
1954 const BYTE* const dictStart = dictBase + ms->window.lowLimit; in ZSTD_compressBlock_lazy_extDict_generic()
1955 const U32 windowLog = ms->cParams.windowLog; in ZSTD_compressBlock_lazy_extDict_generic()
1956 const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6); in ZSTD_compressBlock_lazy_extDict_generic()
1957 const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); in ZSTD_compressBlock_lazy_extDict_generic()
1964 ms->lazySkipping = 0; in ZSTD_compressBlock_lazy_extDict_generic()
1969 ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); in ZSTD_compressBlock_lazy_extDict_generic()
1975 * code alignment is perturbed. To fix the instability align the loop on 32-bytes. in ZSTD_compressBlock_lazy_extDict_generic()
1982 const BYTE* start=ip+1; in ZSTD_compressBlock_lazy_extDict_generic()
1983 U32 curr = (U32)(ip-base); in ZSTD_compressBlock_lazy_extDict_generic()
1987 const U32 repIndex = (U32)(curr+1 - offset_1); in ZSTD_compressBlock_lazy_extDict_generic()
1988 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; in ZSTD_compressBlock_lazy_extDict_generic()
1989 const BYTE* const repMatch = repBase + repIndex; in ZSTD_compressBlock_lazy_extDict_generic()
1991 & (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */ in ZSTD_compressBlock_lazy_extDict_generic()
1994 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; in ZSTD_compressBlock_lazy_extDict_generic()
1995 … matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; in ZSTD_compressBlock_lazy_extDict_generic()
2006 if (matchLength < 4) { in ZSTD_compressBlock_lazy_extDict_generic()
2007 size_t const step = ((size_t)(ip-anchor) >> kSearchStrength); in ZSTD_compressBlock_lazy_extDict_generic()
2016 ms->lazySkipping = step > kLazySkippingStep; in ZSTD_compressBlock_lazy_extDict_generic()
2028 const U32 repIndex = (U32)(curr - offset_1); in ZSTD_compressBlock_lazy_extDict_generic()
2029 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; in ZSTD_compressBlock_lazy_extDict_generic()
2030 const BYTE* const repMatch = repBase + repIndex; in ZSTD_compressBlock_lazy_extDict_generic()
2032 … & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ in ZSTD_compressBlock_lazy_extDict_generic()
2035 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; in ZSTD_compressBlock_lazy_extDict_generic()
2036 … size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; in ZSTD_compressBlock_lazy_extDict_generic()
2038 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1); in ZSTD_compressBlock_lazy_extDict_generic()
2039 if ((repLength >= 4) && (gain2 > gain1)) in ZSTD_compressBlock_lazy_extDict_generic()
2046 … int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ in ZSTD_compressBlock_lazy_extDict_generic()
2047 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4); in ZSTD_compressBlock_lazy_extDict_generic()
2048 if ((ml2 >= 4) && (gain2 > gain1)) { in ZSTD_compressBlock_lazy_extDict_generic()
2060 const U32 repIndex = (U32)(curr - offset_1); in ZSTD_compressBlock_lazy_extDict_generic()
2061 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; in ZSTD_compressBlock_lazy_extDict_generic()
2062 const BYTE* const repMatch = repBase + repIndex; in ZSTD_compressBlock_lazy_extDict_generic()
2064 … & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ in ZSTD_compressBlock_lazy_extDict_generic()
2067 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; in ZSTD_compressBlock_lazy_extDict_generic()
2068 … size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; in ZSTD_compressBlock_lazy_extDict_generic()
2069 int const gain2 = (int)(repLength * 4); in ZSTD_compressBlock_lazy_extDict_generic()
2070 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1); in ZSTD_compressBlock_lazy_extDict_generic()
2071 if ((repLength >= 4) && (gain2 > gain1)) in ZSTD_compressBlock_lazy_extDict_generic()
2078 … int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ in ZSTD_compressBlock_lazy_extDict_generic()
2079 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7); in ZSTD_compressBlock_lazy_extDict_generic()
2080 if ((ml2 >= 4) && (gain2 > gain1)) { in ZSTD_compressBlock_lazy_extDict_generic()
2089 U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase)); in ZSTD_compressBlock_lazy_extDict_generic()
2090 … const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; in ZSTD_compressBlock_lazy_extDict_generic()
2091 const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; in ZSTD_compressBlock_lazy_extDict_generic()
2092 …while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLeng… in ZSTD_compressBlock_lazy_extDict_generic()
2098 { size_t const litLength = (size_t)(start - anchor); in ZSTD_compressBlock_lazy_extDict_generic()
2102 if (ms->lazySkipping) { in ZSTD_compressBlock_lazy_extDict_generic()
2105 ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); in ZSTD_compressBlock_lazy_extDict_generic()
2107 ms->lazySkipping = 0; in ZSTD_compressBlock_lazy_extDict_generic()
2112 const U32 repCurrent = (U32)(ip-base); in ZSTD_compressBlock_lazy_extDict_generic()
2114 const U32 repIndex = repCurrent - offset_2; in ZSTD_compressBlock_lazy_extDict_generic()
2115 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; in ZSTD_compressBlock_lazy_extDict_generic()
2116 const BYTE* const repMatch = repBase + repIndex; in ZSTD_compressBlock_lazy_extDict_generic()
2118 … & (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ in ZSTD_compressBlock_lazy_extDict_generic()
2121 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; in ZSTD_compressBlock_lazy_extDict_generic()
2122 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; in ZSTD_compressBlock_lazy_extDict_generic()
2137 return (size_t)(iend - anchor); in ZSTD_compressBlock_lazy_extDict_generic()