xref: /freebsd/sys/contrib/openzfs/module/zstd/lib/compress/zstd_lazy.c (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1*61145dc2SMartin Matuska // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
2c03c5b1cSMartin Matuska /*
3c03c5b1cSMartin Matuska  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
4c03c5b1cSMartin Matuska  * All rights reserved.
5c03c5b1cSMartin Matuska  *
6c03c5b1cSMartin Matuska  * This source code is licensed under both the BSD-style license (found in the
7c03c5b1cSMartin Matuska  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8c03c5b1cSMartin Matuska  * in the COPYING file in the root directory of this source tree).
9c03c5b1cSMartin Matuska  * You may select, at your option, one of the above-listed licenses.
10c03c5b1cSMartin Matuska  */
11c03c5b1cSMartin Matuska 
12c03c5b1cSMartin Matuska #include "zstd_compress_internal.h"
13c03c5b1cSMartin Matuska #include "zstd_lazy.h"
14c03c5b1cSMartin Matuska 
15c03c5b1cSMartin Matuska 
16c03c5b1cSMartin Matuska /*-*************************************
17c03c5b1cSMartin Matuska *  Binary Tree search
18c03c5b1cSMartin Matuska ***************************************/
19c03c5b1cSMartin Matuska 
20c03c5b1cSMartin Matuska static void
ZSTD_updateDUBT(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * iend,U32 mls)21c03c5b1cSMartin Matuska ZSTD_updateDUBT(ZSTD_matchState_t* ms,
22c03c5b1cSMartin Matuska                 const BYTE* ip, const BYTE* iend,
23c03c5b1cSMartin Matuska                 U32 mls)
24c03c5b1cSMartin Matuska {
25c03c5b1cSMartin Matuska     const ZSTD_compressionParameters* const cParams = &ms->cParams;
26c03c5b1cSMartin Matuska     U32* const hashTable = ms->hashTable;
27c03c5b1cSMartin Matuska     U32  const hashLog = cParams->hashLog;
28c03c5b1cSMartin Matuska 
29c03c5b1cSMartin Matuska     U32* const bt = ms->chainTable;
30c03c5b1cSMartin Matuska     U32  const btLog  = cParams->chainLog - 1;
31c03c5b1cSMartin Matuska     U32  const btMask = (1 << btLog) - 1;
32c03c5b1cSMartin Matuska 
33c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
34c03c5b1cSMartin Matuska     U32 const target = (U32)(ip - base);
35c03c5b1cSMartin Matuska     U32 idx = ms->nextToUpdate;
36c03c5b1cSMartin Matuska 
37c03c5b1cSMartin Matuska     if (idx != target)
38c03c5b1cSMartin Matuska         DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
39c03c5b1cSMartin Matuska                     idx, target, ms->window.dictLimit);
40c03c5b1cSMartin Matuska     assert(ip + 8 <= iend);   /* condition for ZSTD_hashPtr */
41c03c5b1cSMartin Matuska     (void)iend;
42c03c5b1cSMartin Matuska 
43c03c5b1cSMartin Matuska     assert(idx >= ms->window.dictLimit);   /* condition for valid base+idx */
44c03c5b1cSMartin Matuska     for ( ; idx < target ; idx++) {
45c03c5b1cSMartin Matuska         size_t const h  = ZSTD_hashPtr(base + idx, hashLog, mls);   /* assumption : ip + 8 <= iend */
46c03c5b1cSMartin Matuska         U32    const matchIndex = hashTable[h];
47c03c5b1cSMartin Matuska 
48c03c5b1cSMartin Matuska         U32*   const nextCandidatePtr = bt + 2*(idx&btMask);
49c03c5b1cSMartin Matuska         U32*   const sortMarkPtr  = nextCandidatePtr + 1;
50c03c5b1cSMartin Matuska 
51c03c5b1cSMartin Matuska         DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
52c03c5b1cSMartin Matuska         hashTable[h] = idx;   /* Update Hash Table */
53c03c5b1cSMartin Matuska         *nextCandidatePtr = matchIndex;   /* update BT like a chain */
54c03c5b1cSMartin Matuska         *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
55c03c5b1cSMartin Matuska     }
56c03c5b1cSMartin Matuska     ms->nextToUpdate = target;
57c03c5b1cSMartin Matuska }
58c03c5b1cSMartin Matuska 
59c03c5b1cSMartin Matuska 
60c03c5b1cSMartin Matuska /** ZSTD_insertDUBT1() :
61c03c5b1cSMartin Matuska  *  sort one already inserted but unsorted position
62c03c5b1cSMartin Matuska  *  assumption : current >= btlow == (current - btmask)
63c03c5b1cSMartin Matuska  *  doesn't fail */
64c03c5b1cSMartin Matuska static void
ZSTD_insertDUBT1(ZSTD_matchState_t * ms,U32 current,const BYTE * inputEnd,U32 nbCompares,U32 btLow,const ZSTD_dictMode_e dictMode)65c03c5b1cSMartin Matuska ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
66c03c5b1cSMartin Matuska                  U32 current, const BYTE* inputEnd,
67c03c5b1cSMartin Matuska                  U32 nbCompares, U32 btLow,
68c03c5b1cSMartin Matuska                  const ZSTD_dictMode_e dictMode)
69c03c5b1cSMartin Matuska {
70c03c5b1cSMartin Matuska     const ZSTD_compressionParameters* const cParams = &ms->cParams;
71c03c5b1cSMartin Matuska     U32* const bt = ms->chainTable;
72c03c5b1cSMartin Matuska     U32  const btLog  = cParams->chainLog - 1;
73c03c5b1cSMartin Matuska     U32  const btMask = (1 << btLog) - 1;
74c03c5b1cSMartin Matuska     size_t commonLengthSmaller=0, commonLengthLarger=0;
75c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
76c03c5b1cSMartin Matuska     const BYTE* const dictBase = ms->window.dictBase;
77c03c5b1cSMartin Matuska     const U32 dictLimit = ms->window.dictLimit;
78c03c5b1cSMartin Matuska     const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
79c03c5b1cSMartin Matuska     const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
80c03c5b1cSMartin Matuska     const BYTE* const dictEnd = dictBase + dictLimit;
81c03c5b1cSMartin Matuska     const BYTE* const prefixStart = base + dictLimit;
82c03c5b1cSMartin Matuska     const BYTE* match;
83c03c5b1cSMartin Matuska     U32* smallerPtr = bt + 2*(current&btMask);
84c03c5b1cSMartin Matuska     U32* largerPtr  = smallerPtr + 1;
85c03c5b1cSMartin Matuska     U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
86c03c5b1cSMartin Matuska     U32 dummy32;   /* to be nullified at the end */
87c03c5b1cSMartin Matuska     U32 const windowValid = ms->window.lowLimit;
88c03c5b1cSMartin Matuska     U32 const maxDistance = 1U << cParams->windowLog;
89c03c5b1cSMartin Matuska     U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
90c03c5b1cSMartin Matuska 
91c03c5b1cSMartin Matuska 
92c03c5b1cSMartin Matuska     DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
93c03c5b1cSMartin Matuska                 current, dictLimit, windowLow);
94c03c5b1cSMartin Matuska     assert(current >= btLow);
95c03c5b1cSMartin Matuska     assert(ip < iend);   /* condition for ZSTD_count */
96c03c5b1cSMartin Matuska 
97c03c5b1cSMartin Matuska     while (nbCompares-- && (matchIndex > windowLow)) {
98c03c5b1cSMartin Matuska         U32* const nextPtr = bt + 2*(matchIndex & btMask);
99c03c5b1cSMartin Matuska         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
100c03c5b1cSMartin Matuska         assert(matchIndex < current);
101c03c5b1cSMartin Matuska         /* note : all candidates are now supposed sorted,
102c03c5b1cSMartin Matuska          * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
103c03c5b1cSMartin Matuska          * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
104c03c5b1cSMartin Matuska 
105c03c5b1cSMartin Matuska         if ( (dictMode != ZSTD_extDict)
106c03c5b1cSMartin Matuska           || (matchIndex+matchLength >= dictLimit)  /* both in current segment*/
107c03c5b1cSMartin Matuska           || (current < dictLimit) /* both in extDict */) {
108c03c5b1cSMartin Matuska             const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
109c03c5b1cSMartin Matuska                                      || (matchIndex+matchLength >= dictLimit)) ?
110c03c5b1cSMartin Matuska                                         base : dictBase;
111c03c5b1cSMartin Matuska             assert( (matchIndex+matchLength >= dictLimit)   /* might be wrong if extDict is incorrectly set to 0 */
112c03c5b1cSMartin Matuska                  || (current < dictLimit) );
113c03c5b1cSMartin Matuska             match = mBase + matchIndex;
114c03c5b1cSMartin Matuska             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
115c03c5b1cSMartin Matuska         } else {
116c03c5b1cSMartin Matuska             match = dictBase + matchIndex;
117c03c5b1cSMartin Matuska             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
118c03c5b1cSMartin Matuska             if (matchIndex+matchLength >= dictLimit)
119c03c5b1cSMartin Matuska                 match = base + matchIndex;   /* preparation for next read of match[matchLength] */
120c03c5b1cSMartin Matuska         }
121c03c5b1cSMartin Matuska 
122c03c5b1cSMartin Matuska         DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
123c03c5b1cSMartin Matuska                     current, matchIndex, (U32)matchLength);
124c03c5b1cSMartin Matuska 
125c03c5b1cSMartin Matuska         if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
126c03c5b1cSMartin Matuska             break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
127c03c5b1cSMartin Matuska         }
128c03c5b1cSMartin Matuska 
129c03c5b1cSMartin Matuska         if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
130c03c5b1cSMartin Matuska             /* match is smaller than current */
131c03c5b1cSMartin Matuska             *smallerPtr = matchIndex;             /* update smaller idx */
132c03c5b1cSMartin Matuska             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
133c03c5b1cSMartin Matuska             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
134c03c5b1cSMartin Matuska             DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
135c03c5b1cSMartin Matuska                         matchIndex, btLow, nextPtr[1]);
136c03c5b1cSMartin Matuska             smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
137c03c5b1cSMartin Matuska             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
138c03c5b1cSMartin Matuska         } else {
139c03c5b1cSMartin Matuska             /* match is larger than current */
140c03c5b1cSMartin Matuska             *largerPtr = matchIndex;
141c03c5b1cSMartin Matuska             commonLengthLarger = matchLength;
142c03c5b1cSMartin Matuska             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
143c03c5b1cSMartin Matuska             DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
144c03c5b1cSMartin Matuska                         matchIndex, btLow, nextPtr[0]);
145c03c5b1cSMartin Matuska             largerPtr = nextPtr;
146c03c5b1cSMartin Matuska             matchIndex = nextPtr[0];
147c03c5b1cSMartin Matuska     }   }
148c03c5b1cSMartin Matuska 
149c03c5b1cSMartin Matuska     *smallerPtr = *largerPtr = 0;
150c03c5b1cSMartin Matuska }
151c03c5b1cSMartin Matuska 
152c03c5b1cSMartin Matuska 
153c03c5b1cSMartin Matuska static size_t
ZSTD_DUBT_findBetterDictMatch(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,size_t * offsetPtr,size_t bestLength,U32 nbCompares,U32 const mls,const ZSTD_dictMode_e dictMode)154c03c5b1cSMartin Matuska ZSTD_DUBT_findBetterDictMatch (
155c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms,
156c03c5b1cSMartin Matuska         const BYTE* const ip, const BYTE* const iend,
157c03c5b1cSMartin Matuska         size_t* offsetPtr,
158c03c5b1cSMartin Matuska         size_t bestLength,
159c03c5b1cSMartin Matuska         U32 nbCompares,
160c03c5b1cSMartin Matuska         U32 const mls,
161c03c5b1cSMartin Matuska         const ZSTD_dictMode_e dictMode)
162c03c5b1cSMartin Matuska {
163c03c5b1cSMartin Matuska     const ZSTD_matchState_t * const dms = ms->dictMatchState;
164c03c5b1cSMartin Matuska     const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
165c03c5b1cSMartin Matuska     const U32 * const dictHashTable = dms->hashTable;
166c03c5b1cSMartin Matuska     U32         const hashLog = dmsCParams->hashLog;
167c03c5b1cSMartin Matuska     size_t      const h  = ZSTD_hashPtr(ip, hashLog, mls);
168c03c5b1cSMartin Matuska     U32               dictMatchIndex = dictHashTable[h];
169c03c5b1cSMartin Matuska 
170c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
171c03c5b1cSMartin Matuska     const BYTE* const prefixStart = base + ms->window.dictLimit;
172c03c5b1cSMartin Matuska     U32         const current = (U32)(ip-base);
173c03c5b1cSMartin Matuska     const BYTE* const dictBase = dms->window.base;
174c03c5b1cSMartin Matuska     const BYTE* const dictEnd = dms->window.nextSrc;
175c03c5b1cSMartin Matuska     U32         const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
176c03c5b1cSMartin Matuska     U32         const dictLowLimit = dms->window.lowLimit;
177c03c5b1cSMartin Matuska     U32         const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
178c03c5b1cSMartin Matuska 
179c03c5b1cSMartin Matuska     U32*        const dictBt = dms->chainTable;
180c03c5b1cSMartin Matuska     U32         const btLog  = dmsCParams->chainLog - 1;
181c03c5b1cSMartin Matuska     U32         const btMask = (1 << btLog) - 1;
182c03c5b1cSMartin Matuska     U32         const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
183c03c5b1cSMartin Matuska 
184c03c5b1cSMartin Matuska     size_t commonLengthSmaller=0, commonLengthLarger=0;
185c03c5b1cSMartin Matuska 
186c03c5b1cSMartin Matuska     (void)dictMode;
187c03c5b1cSMartin Matuska     assert(dictMode == ZSTD_dictMatchState);
188c03c5b1cSMartin Matuska 
189c03c5b1cSMartin Matuska     while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
190c03c5b1cSMartin Matuska         U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
191c03c5b1cSMartin Matuska         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
192c03c5b1cSMartin Matuska         const BYTE* match = dictBase + dictMatchIndex;
193c03c5b1cSMartin Matuska         matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
194c03c5b1cSMartin Matuska         if (dictMatchIndex+matchLength >= dictHighLimit)
195c03c5b1cSMartin Matuska             match = base + dictMatchIndex + dictIndexDelta;   /* to prepare for next usage of match[matchLength] */
196c03c5b1cSMartin Matuska 
197c03c5b1cSMartin Matuska         if (matchLength > bestLength) {
198c03c5b1cSMartin Matuska             U32 matchIndex = dictMatchIndex + dictIndexDelta;
199c03c5b1cSMartin Matuska             if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
200c03c5b1cSMartin Matuska                 DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
201c03c5b1cSMartin Matuska                     current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
202c03c5b1cSMartin Matuska                 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
203c03c5b1cSMartin Matuska             }
204c03c5b1cSMartin Matuska             if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
205c03c5b1cSMartin Matuska                 break;   /* drop, to guarantee consistency (miss a little bit of compression) */
206c03c5b1cSMartin Matuska             }
207c03c5b1cSMartin Matuska         }
208c03c5b1cSMartin Matuska 
209c03c5b1cSMartin Matuska         if (match[matchLength] < ip[matchLength]) {
210c03c5b1cSMartin Matuska             if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
211c03c5b1cSMartin Matuska             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
212c03c5b1cSMartin Matuska             dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
213c03c5b1cSMartin Matuska         } else {
214c03c5b1cSMartin Matuska             /* match is larger than current */
215c03c5b1cSMartin Matuska             if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
216c03c5b1cSMartin Matuska             commonLengthLarger = matchLength;
217c03c5b1cSMartin Matuska             dictMatchIndex = nextPtr[0];
218c03c5b1cSMartin Matuska         }
219c03c5b1cSMartin Matuska     }
220c03c5b1cSMartin Matuska 
221c03c5b1cSMartin Matuska     if (bestLength >= MINMATCH) {
222c03c5b1cSMartin Matuska         U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
223c03c5b1cSMartin Matuska         DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
224c03c5b1cSMartin Matuska                     current, (U32)bestLength, (U32)*offsetPtr, mIndex);
225c03c5b1cSMartin Matuska     }
226c03c5b1cSMartin Matuska     return bestLength;
227c03c5b1cSMartin Matuska 
228c03c5b1cSMartin Matuska }
229c03c5b1cSMartin Matuska 
230c03c5b1cSMartin Matuska 
231c03c5b1cSMartin Matuska static size_t
ZSTD_DUBT_findBestMatch(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,size_t * offsetPtr,U32 const mls,const ZSTD_dictMode_e dictMode)232c03c5b1cSMartin Matuska ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
233c03c5b1cSMartin Matuska                         const BYTE* const ip, const BYTE* const iend,
234c03c5b1cSMartin Matuska                         size_t* offsetPtr,
235c03c5b1cSMartin Matuska                         U32 const mls,
236c03c5b1cSMartin Matuska                         const ZSTD_dictMode_e dictMode)
237c03c5b1cSMartin Matuska {
238c03c5b1cSMartin Matuska     const ZSTD_compressionParameters* const cParams = &ms->cParams;
239c03c5b1cSMartin Matuska     U32*   const hashTable = ms->hashTable;
240c03c5b1cSMartin Matuska     U32    const hashLog = cParams->hashLog;
241c03c5b1cSMartin Matuska     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
242c03c5b1cSMartin Matuska     U32          matchIndex  = hashTable[h];
243c03c5b1cSMartin Matuska 
244c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
245c03c5b1cSMartin Matuska     U32    const current = (U32)(ip-base);
246c03c5b1cSMartin Matuska     U32    const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
247c03c5b1cSMartin Matuska 
248c03c5b1cSMartin Matuska     U32*   const bt = ms->chainTable;
249c03c5b1cSMartin Matuska     U32    const btLog  = cParams->chainLog - 1;
250c03c5b1cSMartin Matuska     U32    const btMask = (1 << btLog) - 1;
251c03c5b1cSMartin Matuska     U32    const btLow = (btMask >= current) ? 0 : current - btMask;
252c03c5b1cSMartin Matuska     U32    const unsortLimit = MAX(btLow, windowLow);
253c03c5b1cSMartin Matuska 
254c03c5b1cSMartin Matuska     U32*         nextCandidate = bt + 2*(matchIndex&btMask);
255c03c5b1cSMartin Matuska     U32*         unsortedMark = bt + 2*(matchIndex&btMask) + 1;
256c03c5b1cSMartin Matuska     U32          nbCompares = 1U << cParams->searchLog;
257c03c5b1cSMartin Matuska     U32          nbCandidates = nbCompares;
258c03c5b1cSMartin Matuska     U32          previousCandidate = 0;
259c03c5b1cSMartin Matuska 
260c03c5b1cSMartin Matuska     DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
261c03c5b1cSMartin Matuska     assert(ip <= iend-8);   /* required for h calculation */
262c03c5b1cSMartin Matuska 
263c03c5b1cSMartin Matuska     /* reach end of unsorted candidates list */
264c03c5b1cSMartin Matuska     while ( (matchIndex > unsortLimit)
265c03c5b1cSMartin Matuska          && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
266c03c5b1cSMartin Matuska          && (nbCandidates > 1) ) {
267c03c5b1cSMartin Matuska         DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
268c03c5b1cSMartin Matuska                     matchIndex);
269c03c5b1cSMartin Matuska         *unsortedMark = previousCandidate;  /* the unsortedMark becomes a reversed chain, to move up back to original position */
270c03c5b1cSMartin Matuska         previousCandidate = matchIndex;
271c03c5b1cSMartin Matuska         matchIndex = *nextCandidate;
272c03c5b1cSMartin Matuska         nextCandidate = bt + 2*(matchIndex&btMask);
273c03c5b1cSMartin Matuska         unsortedMark = bt + 2*(matchIndex&btMask) + 1;
274c03c5b1cSMartin Matuska         nbCandidates --;
275c03c5b1cSMartin Matuska     }
276c03c5b1cSMartin Matuska 
277c03c5b1cSMartin Matuska     /* nullify last candidate if it's still unsorted
278c03c5b1cSMartin Matuska      * simplification, detrimental to compression ratio, beneficial for speed */
279c03c5b1cSMartin Matuska     if ( (matchIndex > unsortLimit)
280c03c5b1cSMartin Matuska       && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
281c03c5b1cSMartin Matuska         DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
282c03c5b1cSMartin Matuska                     matchIndex);
283c03c5b1cSMartin Matuska         *nextCandidate = *unsortedMark = 0;
284c03c5b1cSMartin Matuska     }
285c03c5b1cSMartin Matuska 
286c03c5b1cSMartin Matuska     /* batch sort stacked candidates */
287c03c5b1cSMartin Matuska     matchIndex = previousCandidate;
288c03c5b1cSMartin Matuska     while (matchIndex) {  /* will end on matchIndex == 0 */
289c03c5b1cSMartin Matuska         U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
290c03c5b1cSMartin Matuska         U32 const nextCandidateIdx = *nextCandidateIdxPtr;
291c03c5b1cSMartin Matuska         ZSTD_insertDUBT1(ms, matchIndex, iend,
292c03c5b1cSMartin Matuska                          nbCandidates, unsortLimit, dictMode);
293c03c5b1cSMartin Matuska         matchIndex = nextCandidateIdx;
294c03c5b1cSMartin Matuska         nbCandidates++;
295c03c5b1cSMartin Matuska     }
296c03c5b1cSMartin Matuska 
297c03c5b1cSMartin Matuska     /* find longest match */
298c03c5b1cSMartin Matuska     {   size_t commonLengthSmaller = 0, commonLengthLarger = 0;
299c03c5b1cSMartin Matuska         const BYTE* const dictBase = ms->window.dictBase;
300c03c5b1cSMartin Matuska         const U32 dictLimit = ms->window.dictLimit;
301c03c5b1cSMartin Matuska         const BYTE* const dictEnd = dictBase + dictLimit;
302c03c5b1cSMartin Matuska         const BYTE* const prefixStart = base + dictLimit;
303c03c5b1cSMartin Matuska         U32* smallerPtr = bt + 2*(current&btMask);
304c03c5b1cSMartin Matuska         U32* largerPtr  = bt + 2*(current&btMask) + 1;
305c03c5b1cSMartin Matuska         U32 matchEndIdx = current + 8 + 1;
306c03c5b1cSMartin Matuska         U32 dummy32;   /* to be nullified at the end */
307c03c5b1cSMartin Matuska         size_t bestLength = 0;
308c03c5b1cSMartin Matuska 
309c03c5b1cSMartin Matuska         matchIndex  = hashTable[h];
310c03c5b1cSMartin Matuska         hashTable[h] = current;   /* Update Hash Table */
311c03c5b1cSMartin Matuska 
312c03c5b1cSMartin Matuska         while (nbCompares-- && (matchIndex > windowLow)) {
313c03c5b1cSMartin Matuska             U32* const nextPtr = bt + 2*(matchIndex & btMask);
314c03c5b1cSMartin Matuska             size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
315c03c5b1cSMartin Matuska             const BYTE* match;
316c03c5b1cSMartin Matuska 
317c03c5b1cSMartin Matuska             if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
318c03c5b1cSMartin Matuska                 match = base + matchIndex;
319c03c5b1cSMartin Matuska                 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
320c03c5b1cSMartin Matuska             } else {
321c03c5b1cSMartin Matuska                 match = dictBase + matchIndex;
322c03c5b1cSMartin Matuska                 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
323c03c5b1cSMartin Matuska                 if (matchIndex+matchLength >= dictLimit)
324c03c5b1cSMartin Matuska                     match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
325c03c5b1cSMartin Matuska             }
326c03c5b1cSMartin Matuska 
327c03c5b1cSMartin Matuska             if (matchLength > bestLength) {
328c03c5b1cSMartin Matuska                 if (matchLength > matchEndIdx - matchIndex)
329c03c5b1cSMartin Matuska                     matchEndIdx = matchIndex + (U32)matchLength;
330c03c5b1cSMartin Matuska                 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
331c03c5b1cSMartin Matuska                     bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
332c03c5b1cSMartin Matuska                 if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
333c03c5b1cSMartin Matuska                     if (dictMode == ZSTD_dictMatchState) {
334c03c5b1cSMartin Matuska                         nbCompares = 0; /* in addition to avoiding checking any
335c03c5b1cSMartin Matuska                                          * further in this loop, make sure we
336c03c5b1cSMartin Matuska                                          * skip checking in the dictionary. */
337c03c5b1cSMartin Matuska                     }
338c03c5b1cSMartin Matuska                     break;   /* drop, to guarantee consistency (miss a little bit of compression) */
339c03c5b1cSMartin Matuska                 }
340c03c5b1cSMartin Matuska             }
341c03c5b1cSMartin Matuska 
342c03c5b1cSMartin Matuska             if (match[matchLength] < ip[matchLength]) {
343c03c5b1cSMartin Matuska                 /* match is smaller than current */
344c03c5b1cSMartin Matuska                 *smallerPtr = matchIndex;             /* update smaller idx */
345c03c5b1cSMartin Matuska                 commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
346c03c5b1cSMartin Matuska                 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
347c03c5b1cSMartin Matuska                 smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
348c03c5b1cSMartin Matuska                 matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
349c03c5b1cSMartin Matuska             } else {
350c03c5b1cSMartin Matuska                 /* match is larger than current */
351c03c5b1cSMartin Matuska                 *largerPtr = matchIndex;
352c03c5b1cSMartin Matuska                 commonLengthLarger = matchLength;
353c03c5b1cSMartin Matuska                 if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
354c03c5b1cSMartin Matuska                 largerPtr = nextPtr;
355c03c5b1cSMartin Matuska                 matchIndex = nextPtr[0];
356c03c5b1cSMartin Matuska         }   }
357c03c5b1cSMartin Matuska 
358c03c5b1cSMartin Matuska         *smallerPtr = *largerPtr = 0;
359c03c5b1cSMartin Matuska 
360c03c5b1cSMartin Matuska         if (dictMode == ZSTD_dictMatchState && nbCompares) {
361c03c5b1cSMartin Matuska             bestLength = ZSTD_DUBT_findBetterDictMatch(
362c03c5b1cSMartin Matuska                     ms, ip, iend,
363c03c5b1cSMartin Matuska                     offsetPtr, bestLength, nbCompares,
364c03c5b1cSMartin Matuska                     mls, dictMode);
365c03c5b1cSMartin Matuska         }
366c03c5b1cSMartin Matuska 
367c03c5b1cSMartin Matuska         assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
368c03c5b1cSMartin Matuska         ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
369c03c5b1cSMartin Matuska         if (bestLength >= MINMATCH) {
370c03c5b1cSMartin Matuska             U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
371c03c5b1cSMartin Matuska             DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
372c03c5b1cSMartin Matuska                         current, (U32)bestLength, (U32)*offsetPtr, mIndex);
373c03c5b1cSMartin Matuska         }
374c03c5b1cSMartin Matuska         return bestLength;
375c03c5b1cSMartin Matuska     }
376c03c5b1cSMartin Matuska }
377c03c5b1cSMartin Matuska 
378c03c5b1cSMartin Matuska 
379c03c5b1cSMartin Matuska /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
380c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE size_t
ZSTD_BtFindBestMatch(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iLimit,size_t * offsetPtr,const U32 mls,const ZSTD_dictMode_e dictMode)381c03c5b1cSMartin Matuska ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
382c03c5b1cSMartin Matuska                 const BYTE* const ip, const BYTE* const iLimit,
383c03c5b1cSMartin Matuska                       size_t* offsetPtr,
384c03c5b1cSMartin Matuska                 const U32 mls /* template */,
385c03c5b1cSMartin Matuska                 const ZSTD_dictMode_e dictMode)
386c03c5b1cSMartin Matuska {
387c03c5b1cSMartin Matuska     DEBUGLOG(7, "ZSTD_BtFindBestMatch");
388c03c5b1cSMartin Matuska     if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
389c03c5b1cSMartin Matuska     ZSTD_updateDUBT(ms, ip, iLimit, mls);
390c03c5b1cSMartin Matuska     return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
391c03c5b1cSMartin Matuska }
392c03c5b1cSMartin Matuska 
393c03c5b1cSMartin Matuska 
394c03c5b1cSMartin Matuska static size_t
ZSTD_BtFindBestMatch_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)395c03c5b1cSMartin Matuska ZSTD_BtFindBestMatch_selectMLS (  ZSTD_matchState_t* ms,
396c03c5b1cSMartin Matuska                             const BYTE* ip, const BYTE* const iLimit,
397c03c5b1cSMartin Matuska                                   size_t* offsetPtr)
398c03c5b1cSMartin Matuska {
399c03c5b1cSMartin Matuska     switch(ms->cParams.minMatch)
400c03c5b1cSMartin Matuska     {
401c03c5b1cSMartin Matuska     default : /* includes case 3 */
402c03c5b1cSMartin Matuska     case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
403c03c5b1cSMartin Matuska     case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
404c03c5b1cSMartin Matuska     case 7 :
405c03c5b1cSMartin Matuska     case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
406c03c5b1cSMartin Matuska     }
407c03c5b1cSMartin Matuska }
408c03c5b1cSMartin Matuska 
409c03c5b1cSMartin Matuska 
ZSTD_BtFindBestMatch_dictMatchState_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)410c03c5b1cSMartin Matuska static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
411c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
412c03c5b1cSMartin Matuska                         const BYTE* ip, const BYTE* const iLimit,
413c03c5b1cSMartin Matuska                         size_t* offsetPtr)
414c03c5b1cSMartin Matuska {
415c03c5b1cSMartin Matuska     switch(ms->cParams.minMatch)
416c03c5b1cSMartin Matuska     {
417c03c5b1cSMartin Matuska     default : /* includes case 3 */
418c03c5b1cSMartin Matuska     case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
419c03c5b1cSMartin Matuska     case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
420c03c5b1cSMartin Matuska     case 7 :
421c03c5b1cSMartin Matuska     case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
422c03c5b1cSMartin Matuska     }
423c03c5b1cSMartin Matuska }
424c03c5b1cSMartin Matuska 
425c03c5b1cSMartin Matuska 
ZSTD_BtFindBestMatch_extDict_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)426c03c5b1cSMartin Matuska static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
427c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
428c03c5b1cSMartin Matuska                         const BYTE* ip, const BYTE* const iLimit,
429c03c5b1cSMartin Matuska                         size_t* offsetPtr)
430c03c5b1cSMartin Matuska {
431c03c5b1cSMartin Matuska     switch(ms->cParams.minMatch)
432c03c5b1cSMartin Matuska     {
433c03c5b1cSMartin Matuska     default : /* includes case 3 */
434c03c5b1cSMartin Matuska     case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
435c03c5b1cSMartin Matuska     case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
436c03c5b1cSMartin Matuska     case 7 :
437c03c5b1cSMartin Matuska     case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
438c03c5b1cSMartin Matuska     }
439c03c5b1cSMartin Matuska }
440c03c5b1cSMartin Matuska 
441c03c5b1cSMartin Matuska 
442c03c5b1cSMartin Matuska 
443c03c5b1cSMartin Matuska /* *********************************
444c03c5b1cSMartin Matuska *  Hash Chain
445c03c5b1cSMartin Matuska ***********************************/
446c03c5b1cSMartin Matuska #define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
447c03c5b1cSMartin Matuska 
448c03c5b1cSMartin Matuska /* Update chains up to ip (excluded)
449c03c5b1cSMartin Matuska    Assumption : always within prefix (i.e. not within extDict) */
ZSTD_insertAndFindFirstIndex_internal(ZSTD_matchState_t * ms,const ZSTD_compressionParameters * const cParams,const BYTE * ip,U32 const mls)450c03c5b1cSMartin Matuska static U32 ZSTD_insertAndFindFirstIndex_internal(
451c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
452c03c5b1cSMartin Matuska                         const ZSTD_compressionParameters* const cParams,
453c03c5b1cSMartin Matuska                         const BYTE* ip, U32 const mls)
454c03c5b1cSMartin Matuska {
455c03c5b1cSMartin Matuska     U32* const hashTable  = ms->hashTable;
456c03c5b1cSMartin Matuska     const U32 hashLog = cParams->hashLog;
457c03c5b1cSMartin Matuska     U32* const chainTable = ms->chainTable;
458c03c5b1cSMartin Matuska     const U32 chainMask = (1 << cParams->chainLog) - 1;
459c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
460c03c5b1cSMartin Matuska     const U32 target = (U32)(ip - base);
461c03c5b1cSMartin Matuska     U32 idx = ms->nextToUpdate;
462c03c5b1cSMartin Matuska 
463c03c5b1cSMartin Matuska     while(idx < target) { /* catch up */
464c03c5b1cSMartin Matuska         size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
465c03c5b1cSMartin Matuska         NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
466c03c5b1cSMartin Matuska         hashTable[h] = idx;
467c03c5b1cSMartin Matuska         idx++;
468c03c5b1cSMartin Matuska     }
469c03c5b1cSMartin Matuska 
470c03c5b1cSMartin Matuska     ms->nextToUpdate = target;
471c03c5b1cSMartin Matuska     return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
472c03c5b1cSMartin Matuska }
473c03c5b1cSMartin Matuska 
ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t * ms,const BYTE * ip)474c03c5b1cSMartin Matuska U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
475c03c5b1cSMartin Matuska     const ZSTD_compressionParameters* const cParams = &ms->cParams;
476c03c5b1cSMartin Matuska     return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
477c03c5b1cSMartin Matuska }
478c03c5b1cSMartin Matuska 
479c03c5b1cSMartin Matuska 
480c03c5b1cSMartin Matuska /* inlining is important to hardwire a hot branch (template emulation) */
481c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE
ZSTD_HcFindBestMatch_generic(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iLimit,size_t * offsetPtr,const U32 mls,const ZSTD_dictMode_e dictMode)482c03c5b1cSMartin Matuska size_t ZSTD_HcFindBestMatch_generic (
483c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
484c03c5b1cSMartin Matuska                         const BYTE* const ip, const BYTE* const iLimit,
485c03c5b1cSMartin Matuska                         size_t* offsetPtr,
486c03c5b1cSMartin Matuska                         const U32 mls, const ZSTD_dictMode_e dictMode)
487c03c5b1cSMartin Matuska {
488c03c5b1cSMartin Matuska     const ZSTD_compressionParameters* const cParams = &ms->cParams;
489c03c5b1cSMartin Matuska     U32* const chainTable = ms->chainTable;
490c03c5b1cSMartin Matuska     const U32 chainSize = (1 << cParams->chainLog);
491c03c5b1cSMartin Matuska     const U32 chainMask = chainSize-1;
492c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
493c03c5b1cSMartin Matuska     const BYTE* const dictBase = ms->window.dictBase;
494c03c5b1cSMartin Matuska     const U32 dictLimit = ms->window.dictLimit;
495c03c5b1cSMartin Matuska     const BYTE* const prefixStart = base + dictLimit;
496c03c5b1cSMartin Matuska     const BYTE* const dictEnd = dictBase + dictLimit;
497c03c5b1cSMartin Matuska     const U32 current = (U32)(ip-base);
498c03c5b1cSMartin Matuska     const U32 maxDistance = 1U << cParams->windowLog;
499c03c5b1cSMartin Matuska     const U32 lowestValid = ms->window.lowLimit;
500c03c5b1cSMartin Matuska     const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
501c03c5b1cSMartin Matuska     const U32 isDictionary = (ms->loadedDictEnd != 0);
502c03c5b1cSMartin Matuska     const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
503c03c5b1cSMartin Matuska     const U32 minChain = current > chainSize ? current - chainSize : 0;
504c03c5b1cSMartin Matuska     U32 nbAttempts = 1U << cParams->searchLog;
505c03c5b1cSMartin Matuska     size_t ml=4-1;
506c03c5b1cSMartin Matuska 
507c03c5b1cSMartin Matuska     /* HC4 match finder */
508c03c5b1cSMartin Matuska     U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
509c03c5b1cSMartin Matuska 
510c03c5b1cSMartin Matuska     for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
511c03c5b1cSMartin Matuska         size_t currentMl=0;
512c03c5b1cSMartin Matuska         if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
513c03c5b1cSMartin Matuska             const BYTE* const match = base + matchIndex;
514c03c5b1cSMartin Matuska             assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
515c03c5b1cSMartin Matuska             if (match[ml] == ip[ml])   /* potentially better */
516c03c5b1cSMartin Matuska                 currentMl = ZSTD_count(ip, match, iLimit);
517c03c5b1cSMartin Matuska         } else {
518c03c5b1cSMartin Matuska             const BYTE* const match = dictBase + matchIndex;
519c03c5b1cSMartin Matuska             assert(match+4 <= dictEnd);
520c03c5b1cSMartin Matuska             if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
521c03c5b1cSMartin Matuska                 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
522c03c5b1cSMartin Matuska         }
523c03c5b1cSMartin Matuska 
524c03c5b1cSMartin Matuska         /* save best solution */
525c03c5b1cSMartin Matuska         if (currentMl > ml) {
526c03c5b1cSMartin Matuska             ml = currentMl;
527c03c5b1cSMartin Matuska             *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
528c03c5b1cSMartin Matuska             if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
529c03c5b1cSMartin Matuska         }
530c03c5b1cSMartin Matuska 
531c03c5b1cSMartin Matuska         if (matchIndex <= minChain) break;
532c03c5b1cSMartin Matuska         matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
533c03c5b1cSMartin Matuska     }
534c03c5b1cSMartin Matuska 
535c03c5b1cSMartin Matuska     if (dictMode == ZSTD_dictMatchState) {
536c03c5b1cSMartin Matuska         const ZSTD_matchState_t* const dms = ms->dictMatchState;
537c03c5b1cSMartin Matuska         const U32* const dmsChainTable = dms->chainTable;
538c03c5b1cSMartin Matuska         const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
539c03c5b1cSMartin Matuska         const U32 dmsChainMask         = dmsChainSize - 1;
540c03c5b1cSMartin Matuska         const U32 dmsLowestIndex       = dms->window.dictLimit;
541c03c5b1cSMartin Matuska         const BYTE* const dmsBase      = dms->window.base;
542c03c5b1cSMartin Matuska         const BYTE* const dmsEnd       = dms->window.nextSrc;
543c03c5b1cSMartin Matuska         const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
544c03c5b1cSMartin Matuska         const U32 dmsIndexDelta        = dictLimit - dmsSize;
545c03c5b1cSMartin Matuska         const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
546c03c5b1cSMartin Matuska 
547c03c5b1cSMartin Matuska         matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
548c03c5b1cSMartin Matuska 
549c03c5b1cSMartin Matuska         for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
550c03c5b1cSMartin Matuska             size_t currentMl=0;
551c03c5b1cSMartin Matuska             const BYTE* const match = dmsBase + matchIndex;
552c03c5b1cSMartin Matuska             assert(match+4 <= dmsEnd);
553c03c5b1cSMartin Matuska             if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
554c03c5b1cSMartin Matuska                 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
555c03c5b1cSMartin Matuska 
556c03c5b1cSMartin Matuska             /* save best solution */
557c03c5b1cSMartin Matuska             if (currentMl > ml) {
558c03c5b1cSMartin Matuska                 ml = currentMl;
559c03c5b1cSMartin Matuska                 *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
560c03c5b1cSMartin Matuska                 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
561c03c5b1cSMartin Matuska             }
562c03c5b1cSMartin Matuska 
563c03c5b1cSMartin Matuska             if (matchIndex <= dmsMinChain) break;
564c03c5b1cSMartin Matuska             matchIndex = dmsChainTable[matchIndex & dmsChainMask];
565c03c5b1cSMartin Matuska         }
566c03c5b1cSMartin Matuska     }
567c03c5b1cSMartin Matuska 
568c03c5b1cSMartin Matuska     return ml;
569c03c5b1cSMartin Matuska }
570c03c5b1cSMartin Matuska 
571c03c5b1cSMartin Matuska 
ZSTD_HcFindBestMatch_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)572c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
573c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
574c03c5b1cSMartin Matuska                         const BYTE* ip, const BYTE* const iLimit,
575c03c5b1cSMartin Matuska                         size_t* offsetPtr)
576c03c5b1cSMartin Matuska {
577c03c5b1cSMartin Matuska     switch(ms->cParams.minMatch)
578c03c5b1cSMartin Matuska     {
579c03c5b1cSMartin Matuska     default : /* includes case 3 */
580c03c5b1cSMartin Matuska     case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
581c03c5b1cSMartin Matuska     case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
582c03c5b1cSMartin Matuska     case 7 :
583c03c5b1cSMartin Matuska     case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
584c03c5b1cSMartin Matuska     }
585c03c5b1cSMartin Matuska }
586c03c5b1cSMartin Matuska 
587c03c5b1cSMartin Matuska 
ZSTD_HcFindBestMatch_dictMatchState_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)588c03c5b1cSMartin Matuska static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
589c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
590c03c5b1cSMartin Matuska                         const BYTE* ip, const BYTE* const iLimit,
591c03c5b1cSMartin Matuska                         size_t* offsetPtr)
592c03c5b1cSMartin Matuska {
593c03c5b1cSMartin Matuska     switch(ms->cParams.minMatch)
594c03c5b1cSMartin Matuska     {
595c03c5b1cSMartin Matuska     default : /* includes case 3 */
596c03c5b1cSMartin Matuska     case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
597c03c5b1cSMartin Matuska     case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
598c03c5b1cSMartin Matuska     case 7 :
599c03c5b1cSMartin Matuska     case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
600c03c5b1cSMartin Matuska     }
601c03c5b1cSMartin Matuska }
602c03c5b1cSMartin Matuska 
603c03c5b1cSMartin Matuska 
ZSTD_HcFindBestMatch_extDict_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)604c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
605c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
606c03c5b1cSMartin Matuska                         const BYTE* ip, const BYTE* const iLimit,
607c03c5b1cSMartin Matuska                         size_t* offsetPtr)
608c03c5b1cSMartin Matuska {
609c03c5b1cSMartin Matuska     switch(ms->cParams.minMatch)
610c03c5b1cSMartin Matuska     {
611c03c5b1cSMartin Matuska     default : /* includes case 3 */
612c03c5b1cSMartin Matuska     case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
613c03c5b1cSMartin Matuska     case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
614c03c5b1cSMartin Matuska     case 7 :
615c03c5b1cSMartin Matuska     case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
616c03c5b1cSMartin Matuska     }
617c03c5b1cSMartin Matuska }
618c03c5b1cSMartin Matuska 
619c03c5b1cSMartin Matuska 
620c03c5b1cSMartin Matuska /* *******************************
621c03c5b1cSMartin Matuska *  Common parser - lazy strategy
622c03c5b1cSMartin Matuska *********************************/
623c03c5b1cSMartin Matuska typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
624c03c5b1cSMartin Matuska 
625c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_lazy_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const searchMethod_e searchMethod,const U32 depth,ZSTD_dictMode_e const dictMode)626c03c5b1cSMartin Matuska ZSTD_compressBlock_lazy_generic(
627c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
628c03c5b1cSMartin Matuska                         U32 rep[ZSTD_REP_NUM],
629c03c5b1cSMartin Matuska                         const void* src, size_t srcSize,
630c03c5b1cSMartin Matuska                         const searchMethod_e searchMethod, const U32 depth,
631c03c5b1cSMartin Matuska                         ZSTD_dictMode_e const dictMode)
632c03c5b1cSMartin Matuska {
633c03c5b1cSMartin Matuska     const BYTE* const istart = (const BYTE*)src;
634c03c5b1cSMartin Matuska     const BYTE* ip = istart;
635c03c5b1cSMartin Matuska     const BYTE* anchor = istart;
636c03c5b1cSMartin Matuska     const BYTE* const iend = istart + srcSize;
637c03c5b1cSMartin Matuska     const BYTE* const ilimit = iend - 8;
638c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
639c03c5b1cSMartin Matuska     const U32 prefixLowestIndex = ms->window.dictLimit;
640c03c5b1cSMartin Matuska     const BYTE* const prefixLowest = base + prefixLowestIndex;
641c03c5b1cSMartin Matuska 
642c03c5b1cSMartin Matuska     typedef size_t (*searchMax_f)(
643c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
644c03c5b1cSMartin Matuska                         const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
645c03c5b1cSMartin Matuska     searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
646c03c5b1cSMartin Matuska         (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
647c03c5b1cSMartin Matuska                                          : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
648c03c5b1cSMartin Matuska         (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
649c03c5b1cSMartin Matuska                                          : ZSTD_HcFindBestMatch_selectMLS);
650c03c5b1cSMartin Matuska     U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
651c03c5b1cSMartin Matuska 
652c03c5b1cSMartin Matuska     const ZSTD_matchState_t* const dms = ms->dictMatchState;
653c03c5b1cSMartin Matuska     const U32 dictLowestIndex      = dictMode == ZSTD_dictMatchState ?
654c03c5b1cSMartin Matuska                                      dms->window.dictLimit : 0;
655c03c5b1cSMartin Matuska     const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ?
656c03c5b1cSMartin Matuska                                      dms->window.base : NULL;
657c03c5b1cSMartin Matuska     const BYTE* const dictLowest   = dictMode == ZSTD_dictMatchState ?
658c03c5b1cSMartin Matuska                                      dictBase + dictLowestIndex : NULL;
659c03c5b1cSMartin Matuska     const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ?
660c03c5b1cSMartin Matuska                                      dms->window.nextSrc : NULL;
661c03c5b1cSMartin Matuska     const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?
662c03c5b1cSMartin Matuska                                      prefixLowestIndex - (U32)(dictEnd - dictBase) :
663c03c5b1cSMartin Matuska                                      0;
664c03c5b1cSMartin Matuska     const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
665c03c5b1cSMartin Matuska 
666c03c5b1cSMartin Matuska     DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
667c03c5b1cSMartin Matuska 
668c03c5b1cSMartin Matuska     /* init */
669c03c5b1cSMartin Matuska     ip += (dictAndPrefixLength == 0);
670c03c5b1cSMartin Matuska     if (dictMode == ZSTD_noDict) {
671c03c5b1cSMartin Matuska         U32 const current = (U32)(ip - base);
672c03c5b1cSMartin Matuska         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
673c03c5b1cSMartin Matuska         U32 const maxRep = current - windowLow;
674c03c5b1cSMartin Matuska         if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
675c03c5b1cSMartin Matuska         if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
676c03c5b1cSMartin Matuska     }
677c03c5b1cSMartin Matuska     if (dictMode == ZSTD_dictMatchState) {
678c03c5b1cSMartin Matuska         /* dictMatchState repCode checks don't currently handle repCode == 0
679c03c5b1cSMartin Matuska          * disabling. */
680c03c5b1cSMartin Matuska         assert(offset_1 <= dictAndPrefixLength);
681c03c5b1cSMartin Matuska         assert(offset_2 <= dictAndPrefixLength);
682c03c5b1cSMartin Matuska     }
683c03c5b1cSMartin Matuska 
684c03c5b1cSMartin Matuska     /* Match Loop */
685c03c5b1cSMartin Matuska #if defined(__GNUC__) && defined(__x86_64__)
686c03c5b1cSMartin Matuska     /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
687c03c5b1cSMartin Matuska      * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
688c03c5b1cSMartin Matuska      */
689c03c5b1cSMartin Matuska     __asm__(".p2align 5");
690c03c5b1cSMartin Matuska #endif
691c03c5b1cSMartin Matuska     while (ip < ilimit) {
692c03c5b1cSMartin Matuska         size_t matchLength=0;
693c03c5b1cSMartin Matuska         size_t offset=0;
694c03c5b1cSMartin Matuska         const BYTE* start=ip+1;
695c03c5b1cSMartin Matuska 
696c03c5b1cSMartin Matuska         /* check repCode */
697c03c5b1cSMartin Matuska         if (dictMode == ZSTD_dictMatchState) {
698c03c5b1cSMartin Matuska             const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
699c03c5b1cSMartin Matuska             const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
700c03c5b1cSMartin Matuska                                 && repIndex < prefixLowestIndex) ?
701c03c5b1cSMartin Matuska                                    dictBase + (repIndex - dictIndexDelta) :
702c03c5b1cSMartin Matuska                                    base + repIndex;
703c03c5b1cSMartin Matuska             if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
704c03c5b1cSMartin Matuska                 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
705c03c5b1cSMartin Matuska                 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
706c03c5b1cSMartin Matuska                 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
707c03c5b1cSMartin Matuska                 if (depth==0) goto _storeSequence;
708c03c5b1cSMartin Matuska             }
709c03c5b1cSMartin Matuska         }
710c03c5b1cSMartin Matuska         if ( dictMode == ZSTD_noDict
711c03c5b1cSMartin Matuska           && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
712c03c5b1cSMartin Matuska             matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
713c03c5b1cSMartin Matuska             if (depth==0) goto _storeSequence;
714c03c5b1cSMartin Matuska         }
715c03c5b1cSMartin Matuska 
716c03c5b1cSMartin Matuska         /* first search (depth 0) */
717c03c5b1cSMartin Matuska         {   size_t offsetFound = 999999999;
718c03c5b1cSMartin Matuska             size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
719c03c5b1cSMartin Matuska             if (ml2 > matchLength)
720c03c5b1cSMartin Matuska                 matchLength = ml2, start = ip, offset=offsetFound;
721c03c5b1cSMartin Matuska         }
722c03c5b1cSMartin Matuska 
723c03c5b1cSMartin Matuska         if (matchLength < 4) {
724c03c5b1cSMartin Matuska             ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
725c03c5b1cSMartin Matuska             continue;
726c03c5b1cSMartin Matuska         }
727c03c5b1cSMartin Matuska 
728c03c5b1cSMartin Matuska         /* let's try to find a better solution */
729c03c5b1cSMartin Matuska         if (depth>=1)
730c03c5b1cSMartin Matuska         while (ip<ilimit) {
731c03c5b1cSMartin Matuska             ip ++;
732c03c5b1cSMartin Matuska             if ( (dictMode == ZSTD_noDict)
733c03c5b1cSMartin Matuska               && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
734c03c5b1cSMartin Matuska                 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
735c03c5b1cSMartin Matuska                 int const gain2 = (int)(mlRep * 3);
736c03c5b1cSMartin Matuska                 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
737c03c5b1cSMartin Matuska                 if ((mlRep >= 4) && (gain2 > gain1))
738c03c5b1cSMartin Matuska                     matchLength = mlRep, offset = 0, start = ip;
739c03c5b1cSMartin Matuska             }
740c03c5b1cSMartin Matuska             if (dictMode == ZSTD_dictMatchState) {
741c03c5b1cSMartin Matuska                 const U32 repIndex = (U32)(ip - base) - offset_1;
742c03c5b1cSMartin Matuska                 const BYTE* repMatch = repIndex < prefixLowestIndex ?
743c03c5b1cSMartin Matuska                                dictBase + (repIndex - dictIndexDelta) :
744c03c5b1cSMartin Matuska                                base + repIndex;
745c03c5b1cSMartin Matuska                 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
746c03c5b1cSMartin Matuska                     && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
747c03c5b1cSMartin Matuska                     const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
748c03c5b1cSMartin Matuska                     size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
749c03c5b1cSMartin Matuska                     int const gain2 = (int)(mlRep * 3);
750c03c5b1cSMartin Matuska                     int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
751c03c5b1cSMartin Matuska                     if ((mlRep >= 4) && (gain2 > gain1))
752c03c5b1cSMartin Matuska                         matchLength = mlRep, offset = 0, start = ip;
753c03c5b1cSMartin Matuska                 }
754c03c5b1cSMartin Matuska             }
755c03c5b1cSMartin Matuska             {   size_t offset2=999999999;
756c03c5b1cSMartin Matuska                 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
757c03c5b1cSMartin Matuska                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
758c03c5b1cSMartin Matuska                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
759c03c5b1cSMartin Matuska                 if ((ml2 >= 4) && (gain2 > gain1)) {
760c03c5b1cSMartin Matuska                     matchLength = ml2, offset = offset2, start = ip;
761c03c5b1cSMartin Matuska                     continue;   /* search a better one */
762c03c5b1cSMartin Matuska             }   }
763c03c5b1cSMartin Matuska 
764c03c5b1cSMartin Matuska             /* let's find an even better one */
765c03c5b1cSMartin Matuska             if ((depth==2) && (ip<ilimit)) {
766c03c5b1cSMartin Matuska                 ip ++;
767c03c5b1cSMartin Matuska                 if ( (dictMode == ZSTD_noDict)
768c03c5b1cSMartin Matuska                   && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
769c03c5b1cSMartin Matuska                     size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
770c03c5b1cSMartin Matuska                     int const gain2 = (int)(mlRep * 4);
771c03c5b1cSMartin Matuska                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
772c03c5b1cSMartin Matuska                     if ((mlRep >= 4) && (gain2 > gain1))
773c03c5b1cSMartin Matuska                         matchLength = mlRep, offset = 0, start = ip;
774c03c5b1cSMartin Matuska                 }
775c03c5b1cSMartin Matuska                 if (dictMode == ZSTD_dictMatchState) {
776c03c5b1cSMartin Matuska                     const U32 repIndex = (U32)(ip - base) - offset_1;
777c03c5b1cSMartin Matuska                     const BYTE* repMatch = repIndex < prefixLowestIndex ?
778c03c5b1cSMartin Matuska                                    dictBase + (repIndex - dictIndexDelta) :
779c03c5b1cSMartin Matuska                                    base + repIndex;
780c03c5b1cSMartin Matuska                     if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
781c03c5b1cSMartin Matuska                         && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
782c03c5b1cSMartin Matuska                         const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
783c03c5b1cSMartin Matuska                         size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
784c03c5b1cSMartin Matuska                         int const gain2 = (int)(mlRep * 4);
785c03c5b1cSMartin Matuska                         int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
786c03c5b1cSMartin Matuska                         if ((mlRep >= 4) && (gain2 > gain1))
787c03c5b1cSMartin Matuska                             matchLength = mlRep, offset = 0, start = ip;
788c03c5b1cSMartin Matuska                     }
789c03c5b1cSMartin Matuska                 }
790c03c5b1cSMartin Matuska                 {   size_t offset2=999999999;
791c03c5b1cSMartin Matuska                     size_t const ml2 = searchMax(ms, ip, iend, &offset2);
792c03c5b1cSMartin Matuska                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
793c03c5b1cSMartin Matuska                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
794c03c5b1cSMartin Matuska                     if ((ml2 >= 4) && (gain2 > gain1)) {
795c03c5b1cSMartin Matuska                         matchLength = ml2, offset = offset2, start = ip;
796c03c5b1cSMartin Matuska                         continue;
797c03c5b1cSMartin Matuska             }   }   }
798c03c5b1cSMartin Matuska             break;  /* nothing found : store previous solution */
799c03c5b1cSMartin Matuska         }
800c03c5b1cSMartin Matuska 
801c03c5b1cSMartin Matuska         /* NOTE:
802c03c5b1cSMartin Matuska          * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
803c03c5b1cSMartin Matuska          * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
804c03c5b1cSMartin Matuska          * overflows the pointer, which is undefined behavior.
805c03c5b1cSMartin Matuska          */
806c03c5b1cSMartin Matuska         /* catch up */
807c03c5b1cSMartin Matuska         if (offset) {
808c03c5b1cSMartin Matuska             if (dictMode == ZSTD_noDict) {
809c03c5b1cSMartin Matuska                 while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
810c03c5b1cSMartin Matuska                      && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
811c03c5b1cSMartin Matuska                     { start--; matchLength++; }
812c03c5b1cSMartin Matuska             }
813c03c5b1cSMartin Matuska             if (dictMode == ZSTD_dictMatchState) {
814c03c5b1cSMartin Matuska                 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
815c03c5b1cSMartin Matuska                 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
816c03c5b1cSMartin Matuska                 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
817c03c5b1cSMartin Matuska                 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
818c03c5b1cSMartin Matuska             }
819c03c5b1cSMartin Matuska             offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
820c03c5b1cSMartin Matuska         }
821c03c5b1cSMartin Matuska         /* store sequence */
822c03c5b1cSMartin Matuska _storeSequence:
823c03c5b1cSMartin Matuska         {   size_t const litLength = start - anchor;
824c03c5b1cSMartin Matuska             ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
825c03c5b1cSMartin Matuska             anchor = ip = start + matchLength;
826c03c5b1cSMartin Matuska         }
827c03c5b1cSMartin Matuska 
828c03c5b1cSMartin Matuska         /* check immediate repcode */
829c03c5b1cSMartin Matuska         if (dictMode == ZSTD_dictMatchState) {
830c03c5b1cSMartin Matuska             while (ip <= ilimit) {
831c03c5b1cSMartin Matuska                 U32 const current2 = (U32)(ip-base);
832c03c5b1cSMartin Matuska                 U32 const repIndex = current2 - offset_2;
833c03c5b1cSMartin Matuska                 const BYTE* repMatch = dictMode == ZSTD_dictMatchState
834c03c5b1cSMartin Matuska                     && repIndex < prefixLowestIndex ?
835c03c5b1cSMartin Matuska                         dictBase - dictIndexDelta + repIndex :
836c03c5b1cSMartin Matuska                         base + repIndex;
837c03c5b1cSMartin Matuska                 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
838c03c5b1cSMartin Matuska                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
839c03c5b1cSMartin Matuska                     const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
840c03c5b1cSMartin Matuska                     matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
841c03c5b1cSMartin Matuska                     offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
842c03c5b1cSMartin Matuska                     ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
843c03c5b1cSMartin Matuska                     ip += matchLength;
844c03c5b1cSMartin Matuska                     anchor = ip;
845c03c5b1cSMartin Matuska                     continue;
846c03c5b1cSMartin Matuska                 }
847c03c5b1cSMartin Matuska                 break;
848c03c5b1cSMartin Matuska             }
849c03c5b1cSMartin Matuska         }
850c03c5b1cSMartin Matuska 
851c03c5b1cSMartin Matuska         if (dictMode == ZSTD_noDict) {
852c03c5b1cSMartin Matuska             while ( ((ip <= ilimit) & (offset_2>0))
853c03c5b1cSMartin Matuska                  && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
854c03c5b1cSMartin Matuska                 /* store sequence */
855c03c5b1cSMartin Matuska                 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
856c03c5b1cSMartin Matuska                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
857c03c5b1cSMartin Matuska                 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
858c03c5b1cSMartin Matuska                 ip += matchLength;
859c03c5b1cSMartin Matuska                 anchor = ip;
860c03c5b1cSMartin Matuska                 continue;   /* faster when present ... (?) */
861c03c5b1cSMartin Matuska     }   }   }
862c03c5b1cSMartin Matuska 
863c03c5b1cSMartin Matuska     /* Save reps for next block */
864c03c5b1cSMartin Matuska     rep[0] = offset_1 ? offset_1 : savedOffset;
865c03c5b1cSMartin Matuska     rep[1] = offset_2 ? offset_2 : savedOffset;
866c03c5b1cSMartin Matuska 
867c03c5b1cSMartin Matuska     /* Return the last literals size */
868c03c5b1cSMartin Matuska     return (size_t)(iend - anchor);
869c03c5b1cSMartin Matuska }
870c03c5b1cSMartin Matuska 
871c03c5b1cSMartin Matuska 
ZSTD_compressBlock_btlazy2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)872c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_btlazy2(
873c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
874c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
875c03c5b1cSMartin Matuska {
876c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
877c03c5b1cSMartin Matuska }
878c03c5b1cSMartin Matuska 
ZSTD_compressBlock_lazy2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)879c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_lazy2(
880c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
881c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
882c03c5b1cSMartin Matuska {
883c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
884c03c5b1cSMartin Matuska }
885c03c5b1cSMartin Matuska 
ZSTD_compressBlock_lazy(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)886c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_lazy(
887c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
888c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
889c03c5b1cSMartin Matuska {
890c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
891c03c5b1cSMartin Matuska }
892c03c5b1cSMartin Matuska 
ZSTD_compressBlock_greedy(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)893c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_greedy(
894c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
895c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
896c03c5b1cSMartin Matuska {
897c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
898c03c5b1cSMartin Matuska }
899c03c5b1cSMartin Matuska 
ZSTD_compressBlock_btlazy2_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)900c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_btlazy2_dictMatchState(
901c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
902c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
903c03c5b1cSMartin Matuska {
904c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
905c03c5b1cSMartin Matuska }
906c03c5b1cSMartin Matuska 
ZSTD_compressBlock_lazy2_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)907c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_lazy2_dictMatchState(
908c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
909c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
910c03c5b1cSMartin Matuska {
911c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
912c03c5b1cSMartin Matuska }
913c03c5b1cSMartin Matuska 
ZSTD_compressBlock_lazy_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)914c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_lazy_dictMatchState(
915c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
916c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
917c03c5b1cSMartin Matuska {
918c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
919c03c5b1cSMartin Matuska }
920c03c5b1cSMartin Matuska 
ZSTD_compressBlock_greedy_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)921c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_greedy_dictMatchState(
922c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
923c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
924c03c5b1cSMartin Matuska {
925c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
926c03c5b1cSMartin Matuska }
927c03c5b1cSMartin Matuska 
928c03c5b1cSMartin Matuska 
929c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE
ZSTD_compressBlock_lazy_extDict_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const searchMethod_e searchMethod,const U32 depth)930c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_lazy_extDict_generic(
931c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
932c03c5b1cSMartin Matuska                         U32 rep[ZSTD_REP_NUM],
933c03c5b1cSMartin Matuska                         const void* src, size_t srcSize,
934c03c5b1cSMartin Matuska                         const searchMethod_e searchMethod, const U32 depth)
935c03c5b1cSMartin Matuska {
936c03c5b1cSMartin Matuska     const BYTE* const istart = (const BYTE*)src;
937c03c5b1cSMartin Matuska     const BYTE* ip = istart;
938c03c5b1cSMartin Matuska     const BYTE* anchor = istart;
939c03c5b1cSMartin Matuska     const BYTE* const iend = istart + srcSize;
940c03c5b1cSMartin Matuska     const BYTE* const ilimit = iend - 8;
941c03c5b1cSMartin Matuska     const BYTE* const base = ms->window.base;
942c03c5b1cSMartin Matuska     const U32 dictLimit = ms->window.dictLimit;
943c03c5b1cSMartin Matuska     const BYTE* const prefixStart = base + dictLimit;
944c03c5b1cSMartin Matuska     const BYTE* const dictBase = ms->window.dictBase;
945c03c5b1cSMartin Matuska     const BYTE* const dictEnd  = dictBase + dictLimit;
946c03c5b1cSMartin Matuska     const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
947c03c5b1cSMartin Matuska     const U32 windowLog = ms->cParams.windowLog;
948c03c5b1cSMartin Matuska 
949c03c5b1cSMartin Matuska     typedef size_t (*searchMax_f)(
950c03c5b1cSMartin Matuska                         ZSTD_matchState_t* ms,
951c03c5b1cSMartin Matuska                         const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
952c03c5b1cSMartin Matuska     searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
953c03c5b1cSMartin Matuska 
954c03c5b1cSMartin Matuska     U32 offset_1 = rep[0], offset_2 = rep[1];
955c03c5b1cSMartin Matuska 
956c03c5b1cSMartin Matuska     DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
957c03c5b1cSMartin Matuska 
958c03c5b1cSMartin Matuska     /* init */
959c03c5b1cSMartin Matuska     ip += (ip == prefixStart);
960c03c5b1cSMartin Matuska 
961c03c5b1cSMartin Matuska     /* Match Loop */
962c03c5b1cSMartin Matuska #if defined(__GNUC__) && defined(__x86_64__)
963c03c5b1cSMartin Matuska     /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
964c03c5b1cSMartin Matuska      * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
965c03c5b1cSMartin Matuska      */
966c03c5b1cSMartin Matuska     __asm__(".p2align 5");
967c03c5b1cSMartin Matuska #endif
968c03c5b1cSMartin Matuska     while (ip < ilimit) {
969c03c5b1cSMartin Matuska         size_t matchLength=0;
970c03c5b1cSMartin Matuska         size_t offset=0;
971c03c5b1cSMartin Matuska         const BYTE* start=ip+1;
972c03c5b1cSMartin Matuska         U32 current = (U32)(ip-base);
973c03c5b1cSMartin Matuska 
974c03c5b1cSMartin Matuska         /* check repCode */
975c03c5b1cSMartin Matuska         {   const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
976c03c5b1cSMartin Matuska             const U32 repIndex = (U32)(current+1 - offset_1);
977c03c5b1cSMartin Matuska             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
978c03c5b1cSMartin Matuska             const BYTE* const repMatch = repBase + repIndex;
979c9539b89SMartin Matuska             if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
980c9539b89SMartin Matuska                & (offset_1 < current+1 - windowLow) ) /* note: we are searching at current+1 */
981c03c5b1cSMartin Matuska             if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
982c03c5b1cSMartin Matuska                 /* repcode detected we should take it */
983c03c5b1cSMartin Matuska                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
984c03c5b1cSMartin Matuska                 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
985c03c5b1cSMartin Matuska                 if (depth==0) goto _storeSequence;
986c03c5b1cSMartin Matuska         }   }
987c03c5b1cSMartin Matuska 
988c03c5b1cSMartin Matuska         /* first search (depth 0) */
989c03c5b1cSMartin Matuska         {   size_t offsetFound = 999999999;
990c03c5b1cSMartin Matuska             size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
991c03c5b1cSMartin Matuska             if (ml2 > matchLength)
992c03c5b1cSMartin Matuska                 matchLength = ml2, start = ip, offset=offsetFound;
993c03c5b1cSMartin Matuska         }
994c03c5b1cSMartin Matuska 
995c03c5b1cSMartin Matuska          if (matchLength < 4) {
996c03c5b1cSMartin Matuska             ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
997c03c5b1cSMartin Matuska             continue;
998c03c5b1cSMartin Matuska         }
999c03c5b1cSMartin Matuska 
1000c03c5b1cSMartin Matuska         /* let's try to find a better solution */
1001c03c5b1cSMartin Matuska         if (depth>=1)
1002c03c5b1cSMartin Matuska         while (ip<ilimit) {
1003c03c5b1cSMartin Matuska             ip ++;
1004c03c5b1cSMartin Matuska             current++;
1005c03c5b1cSMartin Matuska             /* check repCode */
1006c03c5b1cSMartin Matuska             if (offset) {
1007c03c5b1cSMartin Matuska                 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1008c03c5b1cSMartin Matuska                 const U32 repIndex = (U32)(current - offset_1);
1009c03c5b1cSMartin Matuska                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1010c03c5b1cSMartin Matuska                 const BYTE* const repMatch = repBase + repIndex;
1011c9539b89SMartin Matuska                     if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
1012c9539b89SMartin Matuska                        & (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */
1013c03c5b1cSMartin Matuska                 if (MEM_read32(ip) == MEM_read32(repMatch)) {
1014c03c5b1cSMartin Matuska                     /* repcode detected */
1015c03c5b1cSMartin Matuska                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1016c03c5b1cSMartin Matuska                     size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1017c03c5b1cSMartin Matuska                     int const gain2 = (int)(repLength * 3);
1018c03c5b1cSMartin Matuska                     int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1019c03c5b1cSMartin Matuska                     if ((repLength >= 4) && (gain2 > gain1))
1020c03c5b1cSMartin Matuska                         matchLength = repLength, offset = 0, start = ip;
1021c03c5b1cSMartin Matuska             }   }
1022c03c5b1cSMartin Matuska 
1023c03c5b1cSMartin Matuska             /* search match, depth 1 */
1024c03c5b1cSMartin Matuska             {   size_t offset2=999999999;
1025c03c5b1cSMartin Matuska                 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1026c03c5b1cSMartin Matuska                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
1027c03c5b1cSMartin Matuska                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
1028c03c5b1cSMartin Matuska                 if ((ml2 >= 4) && (gain2 > gain1)) {
1029c03c5b1cSMartin Matuska                     matchLength = ml2, offset = offset2, start = ip;
1030c03c5b1cSMartin Matuska                     continue;   /* search a better one */
1031c03c5b1cSMartin Matuska             }   }
1032c03c5b1cSMartin Matuska 
1033c03c5b1cSMartin Matuska             /* let's find an even better one */
1034c03c5b1cSMartin Matuska             if ((depth==2) && (ip<ilimit)) {
1035c03c5b1cSMartin Matuska                 ip ++;
1036c03c5b1cSMartin Matuska                 current++;
1037c03c5b1cSMartin Matuska                 /* check repCode */
1038c03c5b1cSMartin Matuska                 if (offset) {
1039c03c5b1cSMartin Matuska                     const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1040c03c5b1cSMartin Matuska                     const U32 repIndex = (U32)(current - offset_1);
1041c03c5b1cSMartin Matuska                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1042c03c5b1cSMartin Matuska                     const BYTE* const repMatch = repBase + repIndex;
1043c9539b89SMartin Matuska                 if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
1044c9539b89SMartin Matuska                    & (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */
1045c03c5b1cSMartin Matuska                     if (MEM_read32(ip) == MEM_read32(repMatch)) {
1046c03c5b1cSMartin Matuska                         /* repcode detected */
1047c03c5b1cSMartin Matuska                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1048c03c5b1cSMartin Matuska                         size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1049c03c5b1cSMartin Matuska                         int const gain2 = (int)(repLength * 4);
1050c03c5b1cSMartin Matuska                         int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1051c03c5b1cSMartin Matuska                         if ((repLength >= 4) && (gain2 > gain1))
1052c03c5b1cSMartin Matuska                             matchLength = repLength, offset = 0, start = ip;
1053c03c5b1cSMartin Matuska                 }   }
1054c03c5b1cSMartin Matuska 
1055c03c5b1cSMartin Matuska                 /* search match, depth 2 */
1056c03c5b1cSMartin Matuska                 {   size_t offset2=999999999;
1057c03c5b1cSMartin Matuska                     size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1058c03c5b1cSMartin Matuska                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
1059c03c5b1cSMartin Matuska                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
1060c03c5b1cSMartin Matuska                     if ((ml2 >= 4) && (gain2 > gain1)) {
1061c03c5b1cSMartin Matuska                         matchLength = ml2, offset = offset2, start = ip;
1062c03c5b1cSMartin Matuska                         continue;
1063c03c5b1cSMartin Matuska             }   }   }
1064c03c5b1cSMartin Matuska             break;  /* nothing found : store previous solution */
1065c03c5b1cSMartin Matuska         }
1066c03c5b1cSMartin Matuska 
1067c03c5b1cSMartin Matuska         /* catch up */
1068c03c5b1cSMartin Matuska         if (offset) {
1069c03c5b1cSMartin Matuska             U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
1070c03c5b1cSMartin Matuska             const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
1071c03c5b1cSMartin Matuska             const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
1072c03c5b1cSMartin Matuska             while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
1073c03c5b1cSMartin Matuska             offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
1074c03c5b1cSMartin Matuska         }
1075c03c5b1cSMartin Matuska 
1076c03c5b1cSMartin Matuska         /* store sequence */
1077c03c5b1cSMartin Matuska _storeSequence:
1078c03c5b1cSMartin Matuska         {   size_t const litLength = start - anchor;
1079c03c5b1cSMartin Matuska             ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
1080c03c5b1cSMartin Matuska             anchor = ip = start + matchLength;
1081c03c5b1cSMartin Matuska         }
1082c03c5b1cSMartin Matuska 
1083c03c5b1cSMartin Matuska         /* check immediate repcode */
1084c03c5b1cSMartin Matuska         while (ip <= ilimit) {
1085c03c5b1cSMartin Matuska             const U32 repCurrent = (U32)(ip-base);
1086c03c5b1cSMartin Matuska             const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
1087c03c5b1cSMartin Matuska             const U32 repIndex = repCurrent - offset_2;
1088c03c5b1cSMartin Matuska             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1089c03c5b1cSMartin Matuska             const BYTE* const repMatch = repBase + repIndex;
1090c9539b89SMartin Matuska             if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
1091c9539b89SMartin Matuska                & (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
1092c03c5b1cSMartin Matuska             if (MEM_read32(ip) == MEM_read32(repMatch)) {
1093c03c5b1cSMartin Matuska                 /* repcode detected we should take it */
1094c03c5b1cSMartin Matuska                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1095c03c5b1cSMartin Matuska                 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1096c03c5b1cSMartin Matuska                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
1097c03c5b1cSMartin Matuska                 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1098c03c5b1cSMartin Matuska                 ip += matchLength;
1099c03c5b1cSMartin Matuska                 anchor = ip;
1100c03c5b1cSMartin Matuska                 continue;   /* faster when present ... (?) */
1101c03c5b1cSMartin Matuska             }
1102c03c5b1cSMartin Matuska             break;
1103c03c5b1cSMartin Matuska     }   }
1104c03c5b1cSMartin Matuska 
1105c03c5b1cSMartin Matuska     /* Save reps for next block */
1106c03c5b1cSMartin Matuska     rep[0] = offset_1;
1107c03c5b1cSMartin Matuska     rep[1] = offset_2;
1108c03c5b1cSMartin Matuska 
1109c03c5b1cSMartin Matuska     /* Return the last literals size */
1110c03c5b1cSMartin Matuska     return (size_t)(iend - anchor);
1111c03c5b1cSMartin Matuska }
1112c03c5b1cSMartin Matuska 
1113c03c5b1cSMartin Matuska 
ZSTD_compressBlock_greedy_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1114c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_greedy_extDict(
1115c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1116c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
1117c03c5b1cSMartin Matuska {
1118c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
1119c03c5b1cSMartin Matuska }
1120c03c5b1cSMartin Matuska 
ZSTD_compressBlock_lazy_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1121c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_lazy_extDict(
1122c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1123c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
1124c03c5b1cSMartin Matuska 
1125c03c5b1cSMartin Matuska {
1126c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
1127c03c5b1cSMartin Matuska }
1128c03c5b1cSMartin Matuska 
ZSTD_compressBlock_lazy2_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1129c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_lazy2_extDict(
1130c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1131c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
1132c03c5b1cSMartin Matuska 
1133c03c5b1cSMartin Matuska {
1134c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
1135c03c5b1cSMartin Matuska }
1136c03c5b1cSMartin Matuska 
ZSTD_compressBlock_btlazy2_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1137c03c5b1cSMartin Matuska size_t ZSTD_compressBlock_btlazy2_extDict(
1138c03c5b1cSMartin Matuska         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1139c03c5b1cSMartin Matuska         void const* src, size_t srcSize)
1140c03c5b1cSMartin Matuska 
1141c03c5b1cSMartin Matuska {
1142c03c5b1cSMartin Matuska     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
1143c03c5b1cSMartin Matuska }
1144