xref: /freebsd/sys/contrib/zstd/lib/compress/zstd_opt.c (revision 052d3c129019c2f03488f7cb7399580091f9a713)
10c16b537SWarner Losh /*
20c16b537SWarner Losh  * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
30c16b537SWarner Losh  * All rights reserved.
40c16b537SWarner Losh  *
50c16b537SWarner Losh  * This source code is licensed under both the BSD-style license (found in the
60c16b537SWarner Losh  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
70c16b537SWarner Losh  * in the COPYING file in the root directory of this source tree).
80c16b537SWarner Losh  * You may select, at your option, one of the above-listed licenses.
90c16b537SWarner Losh  */
100c16b537SWarner Losh 
11*052d3c12SConrad Meyer #include "zstd_compress_internal.h"
120c16b537SWarner Losh #include "zstd_opt.h"
13*052d3c12SConrad Meyer #include "zstd_lazy.h"   /* ZSTD_updateTree, ZSTD_updateTree_extDict */
140c16b537SWarner Losh 
150c16b537SWarner Losh 
16*052d3c12SConrad Meyer #define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */
17*052d3c12SConrad Meyer #define ZSTD_FREQ_DIV       4   /* log factor when using previous stats to init next stats */
180c16b537SWarner Losh #define ZSTD_MAX_PRICE     (1<<30)
190c16b537SWarner Losh 
20*052d3c12SConrad Meyer 
210c16b537SWarner Losh /*-*************************************
220c16b537SWarner Losh *  Price functions for optimal parser
230c16b537SWarner Losh ***************************************/
240c16b537SWarner Losh static void ZSTD_setLog2Prices(optState_t* optPtr)
250c16b537SWarner Losh {
260c16b537SWarner Losh     optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1);
27*052d3c12SConrad Meyer     optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1);
28*052d3c12SConrad Meyer     optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1);
290c16b537SWarner Losh     optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1);
300c16b537SWarner Losh }
310c16b537SWarner Losh 
320c16b537SWarner Losh 
33*052d3c12SConrad Meyer static void ZSTD_rescaleFreqs(optState_t* const optPtr,
34*052d3c12SConrad Meyer                               const BYTE* const src, size_t const srcSize)
350c16b537SWarner Losh {
360c16b537SWarner Losh     optPtr->staticPrices = 0;
370c16b537SWarner Losh 
38*052d3c12SConrad Meyer     if (optPtr->litLengthSum == 0) {  /* first init */
39*052d3c12SConrad Meyer         unsigned u;
400c16b537SWarner Losh         if (srcSize <= 1024) optPtr->staticPrices = 1;
410c16b537SWarner Losh 
420c16b537SWarner Losh         assert(optPtr->litFreq!=NULL);
430c16b537SWarner Losh         for (u=0; u<=MaxLit; u++)
440c16b537SWarner Losh             optPtr->litFreq[u] = 0;
450c16b537SWarner Losh         for (u=0; u<srcSize; u++)
460c16b537SWarner Losh             optPtr->litFreq[src[u]]++;
470c16b537SWarner Losh         optPtr->litSum = 0;
480c16b537SWarner Losh         for (u=0; u<=MaxLit; u++) {
490c16b537SWarner Losh             optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> ZSTD_FREQ_DIV);
500c16b537SWarner Losh             optPtr->litSum += optPtr->litFreq[u];
510c16b537SWarner Losh         }
52*052d3c12SConrad Meyer 
530c16b537SWarner Losh         for (u=0; u<=MaxLL; u++)
540c16b537SWarner Losh             optPtr->litLengthFreq[u] = 1;
55*052d3c12SConrad Meyer         optPtr->litLengthSum = MaxLL+1;
560c16b537SWarner Losh         for (u=0; u<=MaxML; u++)
570c16b537SWarner Losh             optPtr->matchLengthFreq[u] = 1;
58*052d3c12SConrad Meyer         optPtr->matchLengthSum = MaxML+1;
590c16b537SWarner Losh         for (u=0; u<=MaxOff; u++)
600c16b537SWarner Losh             optPtr->offCodeFreq[u] = 1;
61*052d3c12SConrad Meyer         optPtr->offCodeSum = (MaxOff+1);
620c16b537SWarner Losh 
63*052d3c12SConrad Meyer     } else {
64*052d3c12SConrad Meyer         unsigned u;
65*052d3c12SConrad Meyer 
66*052d3c12SConrad Meyer         optPtr->litSum = 0;
670c16b537SWarner Losh         for (u=0; u<=MaxLit; u++) {
680c16b537SWarner Losh             optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1));
690c16b537SWarner Losh             optPtr->litSum += optPtr->litFreq[u];
700c16b537SWarner Losh         }
71*052d3c12SConrad Meyer         optPtr->litLengthSum = 0;
720c16b537SWarner Losh         for (u=0; u<=MaxLL; u++) {
730c16b537SWarner Losh             optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
740c16b537SWarner Losh             optPtr->litLengthSum += optPtr->litLengthFreq[u];
750c16b537SWarner Losh         }
76*052d3c12SConrad Meyer         optPtr->matchLengthSum = 0;
770c16b537SWarner Losh         for (u=0; u<=MaxML; u++) {
780c16b537SWarner Losh             optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
790c16b537SWarner Losh             optPtr->matchLengthSum += optPtr->matchLengthFreq[u];
800c16b537SWarner Losh         }
81*052d3c12SConrad Meyer         optPtr->offCodeSum = 0;
820c16b537SWarner Losh         for (u=0; u<=MaxOff; u++) {
830c16b537SWarner Losh             optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
840c16b537SWarner Losh             optPtr->offCodeSum += optPtr->offCodeFreq[u];
850c16b537SWarner Losh         }
860c16b537SWarner Losh     }
870c16b537SWarner Losh 
880c16b537SWarner Losh     ZSTD_setLog2Prices(optPtr);
890c16b537SWarner Losh }
900c16b537SWarner Losh 
910c16b537SWarner Losh 
92*052d3c12SConrad Meyer /* ZSTD_rawLiteralsCost() :
93*052d3c12SConrad Meyer  * cost of literals (only) in given segment (which length can be null)
94*052d3c12SConrad Meyer  * does not include cost of literalLength symbol */
95*052d3c12SConrad Meyer static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
96*052d3c12SConrad Meyer                                 const optState_t* const optPtr)
970c16b537SWarner Losh {
98*052d3c12SConrad Meyer     if (optPtr->staticPrices) return (litLength*6);  /* 6 bit per literal - no statistic used */
99*052d3c12SConrad Meyer     if (litLength == 0) return 0;
1000c16b537SWarner Losh 
1010c16b537SWarner Losh     /* literals */
102*052d3c12SConrad Meyer     {   U32 u;
103*052d3c12SConrad Meyer         U32 cost = litLength * optPtr->log2litSum;
1040c16b537SWarner Losh         for (u=0; u < litLength; u++)
105*052d3c12SConrad Meyer             cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);
106*052d3c12SConrad Meyer         return cost;
107*052d3c12SConrad Meyer     }
108*052d3c12SConrad Meyer }
1090c16b537SWarner Losh 
110*052d3c12SConrad Meyer /* ZSTD_litLengthPrice() :
111*052d3c12SConrad Meyer  * cost of literalLength symbol */
112*052d3c12SConrad Meyer static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr)
113*052d3c12SConrad Meyer {
114*052d3c12SConrad Meyer     if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1);
1150c16b537SWarner Losh 
1160c16b537SWarner Losh     /* literal Length */
117*052d3c12SConrad Meyer     {   U32 const llCode = ZSTD_LLcode(litLength);
118*052d3c12SConrad Meyer         U32 const price = LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
1190c16b537SWarner Losh         return price;
1200c16b537SWarner Losh     }
121*052d3c12SConrad Meyer }
1220c16b537SWarner Losh 
123*052d3c12SConrad Meyer /* ZSTD_litLengthPrice() :
124*052d3c12SConrad Meyer  * cost of the literal part of a sequence,
125*052d3c12SConrad Meyer  * including literals themselves, and literalLength symbol */
126*052d3c12SConrad Meyer static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength,
127*052d3c12SConrad Meyer                                  const optState_t* const optPtr)
1280c16b537SWarner Losh {
129*052d3c12SConrad Meyer     return ZSTD_rawLiteralsCost(literals, litLength, optPtr)
130*052d3c12SConrad Meyer          + ZSTD_litLengthPrice(litLength, optPtr);
131*052d3c12SConrad Meyer }
1320c16b537SWarner Losh 
133*052d3c12SConrad Meyer /* ZSTD_litLengthContribution() :
134*052d3c12SConrad Meyer  * @return ( cost(litlength) - cost(0) )
135*052d3c12SConrad Meyer  * this value can then be added to rawLiteralsCost()
136*052d3c12SConrad Meyer  * to provide a cost which is directly comparable to a match ending at same position */
137*052d3c12SConrad Meyer static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr)
138*052d3c12SConrad Meyer {
139*052d3c12SConrad Meyer     if (optPtr->staticPrices) return ZSTD_highbit32(litLength+1);
140*052d3c12SConrad Meyer 
141*052d3c12SConrad Meyer     /* literal Length */
142*052d3c12SConrad Meyer     {   U32 const llCode = ZSTD_LLcode(litLength);
143*052d3c12SConrad Meyer         int const contribution = LL_bits[llCode]
144*052d3c12SConrad Meyer                         + ZSTD_highbit32(optPtr->litLengthFreq[0]+1)
145*052d3c12SConrad Meyer                         - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
146*052d3c12SConrad Meyer #if 1
147*052d3c12SConrad Meyer         return contribution;
148*052d3c12SConrad Meyer #else
149*052d3c12SConrad Meyer         return MAX(0, contribution); /* sometimes better, sometimes not ... */
150*052d3c12SConrad Meyer #endif
151*052d3c12SConrad Meyer     }
152*052d3c12SConrad Meyer }
153*052d3c12SConrad Meyer 
154*052d3c12SConrad Meyer /* ZSTD_literalsContribution() :
155*052d3c12SConrad Meyer  * creates a fake cost for the literals part of a sequence
156*052d3c12SConrad Meyer  * which can be compared to the ending cost of a match
157*052d3c12SConrad Meyer  * should a new match start at this position */
158*052d3c12SConrad Meyer static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
159*052d3c12SConrad Meyer                                      const optState_t* const optPtr)
160*052d3c12SConrad Meyer {
161*052d3c12SConrad Meyer     int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr)
162*052d3c12SConrad Meyer                            + ZSTD_litLengthContribution(litLength, optPtr);
163*052d3c12SConrad Meyer     return contribution;
164*052d3c12SConrad Meyer }
165*052d3c12SConrad Meyer 
166*052d3c12SConrad Meyer /* ZSTD_getMatchPrice() :
167*052d3c12SConrad Meyer  * Provides the cost of the match part (offset + matchLength) of a sequence
168*052d3c12SConrad Meyer  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
169*052d3c12SConrad Meyer  * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
170*052d3c12SConrad Meyer FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(
171*052d3c12SConrad Meyer                                     U32 const offset, U32 const matchLength,
172*052d3c12SConrad Meyer                                     const optState_t* const optPtr,
173*052d3c12SConrad Meyer                                     int const optLevel)
174*052d3c12SConrad Meyer {
175*052d3c12SConrad Meyer     U32 price;
176*052d3c12SConrad Meyer     U32 const offCode = ZSTD_highbit32(offset+1);
177*052d3c12SConrad Meyer     U32 const mlBase = matchLength - MINMATCH;
178*052d3c12SConrad Meyer     assert(matchLength >= MINMATCH);
179*052d3c12SConrad Meyer 
180*052d3c12SConrad Meyer     if (optPtr->staticPrices)  /* fixed scheme, do not use statistics */
181*052d3c12SConrad Meyer         return ZSTD_highbit32((U32)mlBase+1) + 16 + offCode;
1820c16b537SWarner Losh 
1830c16b537SWarner Losh     price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
184*052d3c12SConrad Meyer     if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */
1850c16b537SWarner Losh 
1860c16b537SWarner Losh     /* match Length */
187*052d3c12SConrad Meyer     {   U32 const mlCode = ZSTD_MLcode(mlBase);
1880c16b537SWarner Losh         price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1);
1890c16b537SWarner Losh     }
1900c16b537SWarner Losh 
191*052d3c12SConrad Meyer     DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
192*052d3c12SConrad Meyer     return price;
1930c16b537SWarner Losh }
1940c16b537SWarner Losh 
195*052d3c12SConrad Meyer static void ZSTD_updateStats(optState_t* const optPtr,
196*052d3c12SConrad Meyer                              U32 litLength, const BYTE* literals,
197*052d3c12SConrad Meyer                              U32 offsetCode, U32 matchLength)
1980c16b537SWarner Losh {
1990c16b537SWarner Losh     /* literals */
200*052d3c12SConrad Meyer     {   U32 u;
2010c16b537SWarner Losh         for (u=0; u < litLength; u++)
2020c16b537SWarner Losh             optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
203*052d3c12SConrad Meyer         optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
204*052d3c12SConrad Meyer     }
2050c16b537SWarner Losh 
2060c16b537SWarner Losh     /* literal Length */
207*052d3c12SConrad Meyer     {   U32 const llCode = ZSTD_LLcode(litLength);
2080c16b537SWarner Losh         optPtr->litLengthFreq[llCode]++;
2090c16b537SWarner Losh         optPtr->litLengthSum++;
2100c16b537SWarner Losh     }
2110c16b537SWarner Losh 
212*052d3c12SConrad Meyer     /* match offset code (0-2=>repCode; 3+=>offset+2) */
213*052d3c12SConrad Meyer     {   U32 const offCode = ZSTD_highbit32(offsetCode+1);
214*052d3c12SConrad Meyer         assert(offCode <= MaxOff);
2150c16b537SWarner Losh         optPtr->offCodeFreq[offCode]++;
216*052d3c12SConrad Meyer         optPtr->offCodeSum++;
2170c16b537SWarner Losh     }
2180c16b537SWarner Losh 
2190c16b537SWarner Losh     /* match Length */
220*052d3c12SConrad Meyer     {   U32 const mlBase = matchLength - MINMATCH;
221*052d3c12SConrad Meyer         U32 const mlCode = ZSTD_MLcode(mlBase);
2220c16b537SWarner Losh         optPtr->matchLengthFreq[mlCode]++;
2230c16b537SWarner Losh         optPtr->matchLengthSum++;
2240c16b537SWarner Losh     }
2250c16b537SWarner Losh }
2260c16b537SWarner Losh 
2270c16b537SWarner Losh 
228*052d3c12SConrad Meyer /* ZSTD_readMINMATCH() :
229*052d3c12SConrad Meyer  * function safe only for comparisons
230*052d3c12SConrad Meyer  * assumption : memPtr must be at least 4 bytes before end of buffer */
231*052d3c12SConrad Meyer MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
2320c16b537SWarner Losh {
2330c16b537SWarner Losh     switch (length)
2340c16b537SWarner Losh     {
2350c16b537SWarner Losh     default :
2360c16b537SWarner Losh     case 4 : return MEM_read32(memPtr);
2370c16b537SWarner Losh     case 3 : if (MEM_isLittleEndian())
2380c16b537SWarner Losh                 return MEM_read32(memPtr)<<8;
2390c16b537SWarner Losh              else
2400c16b537SWarner Losh                 return MEM_read32(memPtr)>>8;
2410c16b537SWarner Losh     }
2420c16b537SWarner Losh }
2430c16b537SWarner Losh 
2440c16b537SWarner Losh 
2450c16b537SWarner Losh /* Update hashTable3 up to ip (excluded)
2460c16b537SWarner Losh    Assumption : always within prefix (i.e. not within extDict) */
247*052d3c12SConrad Meyer static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* const cctx, const BYTE* const ip)
2480c16b537SWarner Losh {
249*052d3c12SConrad Meyer     U32* const hashTable3  = cctx->hashTable3;
250*052d3c12SConrad Meyer     U32 const hashLog3  = cctx->hashLog3;
251*052d3c12SConrad Meyer     const BYTE* const base = cctx->base;
252*052d3c12SConrad Meyer     U32 idx = cctx->nextToUpdate3;
253*052d3c12SConrad Meyer     U32 const target = cctx->nextToUpdate3 = (U32)(ip - base);
254*052d3c12SConrad Meyer     size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
2550c16b537SWarner Losh 
2560c16b537SWarner Losh     while(idx < target) {
2570c16b537SWarner Losh         hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
2580c16b537SWarner Losh         idx++;
2590c16b537SWarner Losh     }
2600c16b537SWarner Losh 
2610c16b537SWarner Losh     return hashTable3[hash3];
2620c16b537SWarner Losh }
2630c16b537SWarner Losh 
2640c16b537SWarner Losh 
2650c16b537SWarner Losh /*-*************************************
2660c16b537SWarner Losh *  Binary Tree search
2670c16b537SWarner Losh ***************************************/
268*052d3c12SConrad Meyer FORCE_INLINE_TEMPLATE
269*052d3c12SConrad Meyer U32 ZSTD_insertBtAndGetAllMatches (
2700c16b537SWarner Losh                     ZSTD_CCtx* zc,
271*052d3c12SConrad Meyer                     const BYTE* const ip, const BYTE* const iLimit, int const extDict,
272*052d3c12SConrad Meyer                     U32 nbCompares, U32 const mls, U32 const sufficient_len,
273*052d3c12SConrad Meyer                     U32 rep[ZSTD_REP_NUM], U32 const ll0,
274*052d3c12SConrad Meyer                     ZSTD_match_t* matches, const U32 lengthToBeat)
2750c16b537SWarner Losh {
2760c16b537SWarner Losh     const BYTE* const base = zc->base;
277*052d3c12SConrad Meyer     U32 const current = (U32)(ip-base);
278*052d3c12SConrad Meyer     U32 const hashLog = zc->appliedParams.cParams.hashLog;
279*052d3c12SConrad Meyer     U32 const minMatch = (mls==3) ? 3 : 4;
2800c16b537SWarner Losh     U32* const hashTable = zc->hashTable;
281*052d3c12SConrad Meyer     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
2820c16b537SWarner Losh     U32 matchIndex  = hashTable[h];
2830c16b537SWarner Losh     U32* const bt   = zc->chainTable;
284*052d3c12SConrad Meyer     U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
285*052d3c12SConrad Meyer     U32 const btMask= (1U << btLog) - 1;
2860c16b537SWarner Losh     size_t commonLengthSmaller=0, commonLengthLarger=0;
2870c16b537SWarner Losh     const BYTE* const dictBase = zc->dictBase;
288*052d3c12SConrad Meyer     U32 const dictLimit = zc->dictLimit;
2890c16b537SWarner Losh     const BYTE* const dictEnd = dictBase + dictLimit;
2900c16b537SWarner Losh     const BYTE* const prefixStart = base + dictLimit;
291*052d3c12SConrad Meyer     U32 const btLow = btMask >= current ? 0 : current - btMask;
292*052d3c12SConrad Meyer     U32 const windowLow = zc->lowLimit;
2930c16b537SWarner Losh     U32* smallerPtr = bt + 2*(current&btMask);
2940c16b537SWarner Losh     U32* largerPtr  = bt + 2*(current&btMask) + 1;
295*052d3c12SConrad Meyer     U32 matchEndIdx = current+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
2960c16b537SWarner Losh     U32 dummy32;   /* to be nullified at the end */
2970c16b537SWarner Losh     U32 mnum = 0;
2980c16b537SWarner Losh 
299*052d3c12SConrad Meyer     size_t bestLength = lengthToBeat-1;
300*052d3c12SConrad Meyer     DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches");
3010c16b537SWarner Losh 
302*052d3c12SConrad Meyer     /* check repCode */
303*052d3c12SConrad Meyer     {   U32 const lastR = ZSTD_REP_NUM + ll0;
304*052d3c12SConrad Meyer         U32 repCode;
305*052d3c12SConrad Meyer         for (repCode = ll0; repCode < lastR; repCode++) {
306*052d3c12SConrad Meyer             U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
307*052d3c12SConrad Meyer             U32 const repIndex = current - repOffset;
308*052d3c12SConrad Meyer             U32 repLen = 0;
309*052d3c12SConrad Meyer             assert(current >= dictLimit);
310*052d3c12SConrad Meyer             if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) {  /* equivalent to `current > repIndex >= dictLimit` */
311*052d3c12SConrad Meyer                 if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
312*052d3c12SConrad Meyer                     repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
313*052d3c12SConrad Meyer                 }
314*052d3c12SConrad Meyer             } else {  /* repIndex < dictLimit || repIndex >= current */
315*052d3c12SConrad Meyer                 const BYTE* const repMatch = dictBase + repIndex;
316*052d3c12SConrad Meyer                 assert(current >= windowLow);
317*052d3c12SConrad Meyer                 if ( extDict /* this case only valid in extDict mode */
318*052d3c12SConrad Meyer                   && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow)  /* equivalent to `current > repIndex >= windowLow` */
319*052d3c12SConrad Meyer                      & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
320*052d3c12SConrad Meyer                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
321*052d3c12SConrad Meyer                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
322*052d3c12SConrad Meyer             }   }
323*052d3c12SConrad Meyer             /* save longer solution */
324*052d3c12SConrad Meyer             if (repLen > bestLength) {
325*052d3c12SConrad Meyer                 DEBUGLOG(8, "found rep-match %u of length %u",
326*052d3c12SConrad Meyer                             repCode - ll0, (U32)repLen);
327*052d3c12SConrad Meyer                 bestLength = repLen;
328*052d3c12SConrad Meyer                 matches[mnum].off = repCode - ll0;
329*052d3c12SConrad Meyer                 matches[mnum].len = (U32)repLen;
330*052d3c12SConrad Meyer                 mnum++;
331*052d3c12SConrad Meyer                 if ( (repLen > sufficient_len)
332*052d3c12SConrad Meyer                    | (ip+repLen == iLimit) ) {  /* best possible */
333*052d3c12SConrad Meyer                     return mnum;
334*052d3c12SConrad Meyer     }   }   }   }
335*052d3c12SConrad Meyer 
336*052d3c12SConrad Meyer     /* HC3 match finder */
337*052d3c12SConrad Meyer     if ((mls == 3) /*static*/ && (bestLength < mls)) {
3380c16b537SWarner Losh         U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
339*052d3c12SConrad Meyer         if ((matchIndex3 > windowLow)
340*052d3c12SConrad Meyer           & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
341*052d3c12SConrad Meyer             size_t mlen;
342*052d3c12SConrad Meyer             if ((!extDict) /*static*/ || (matchIndex3 >= dictLimit)) {
343*052d3c12SConrad Meyer                 const BYTE* const match = base + matchIndex3;
344*052d3c12SConrad Meyer                 mlen = ZSTD_count(ip, match, iLimit);
3450c16b537SWarner Losh             } else {
346*052d3c12SConrad Meyer                 const BYTE* const match = dictBase + matchIndex3;
347*052d3c12SConrad Meyer                 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
3480c16b537SWarner Losh             }
3490c16b537SWarner Losh 
3500c16b537SWarner Losh             /* save best solution */
351*052d3c12SConrad Meyer             if (mlen >= mls /* == 3 > bestLength */) {
352*052d3c12SConrad Meyer                 DEBUGLOG(8, "found small match with hlog3, of length %u",
353*052d3c12SConrad Meyer                             (U32)mlen);
354*052d3c12SConrad Meyer                 bestLength = mlen;
355*052d3c12SConrad Meyer                 assert(current > matchIndex3);
356*052d3c12SConrad Meyer                 assert(mnum==0);  /* no prior solution */
357*052d3c12SConrad Meyer                 matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
358*052d3c12SConrad Meyer                 matches[0].len = (U32)mlen;
359*052d3c12SConrad Meyer                 mnum = 1;
360*052d3c12SConrad Meyer                 if ( (mlen > sufficient_len) |
361*052d3c12SConrad Meyer                      (ip+mlen == iLimit) ) {  /* best possible length */
362*052d3c12SConrad Meyer                     zc->nextToUpdate = current+1;  /* skip insertion */
363*052d3c12SConrad Meyer                     return 1;
364*052d3c12SConrad Meyer     }   }   }   }
3650c16b537SWarner Losh 
3660c16b537SWarner Losh     hashTable[h] = current;   /* Update Hash Table */
3670c16b537SWarner Losh 
3680c16b537SWarner Losh     while (nbCompares-- && (matchIndex > windowLow)) {
369*052d3c12SConrad Meyer         U32* const nextPtr = bt + 2*(matchIndex & btMask);
3700c16b537SWarner Losh         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
3710c16b537SWarner Losh         const BYTE* match;
372*052d3c12SConrad Meyer         assert(current > matchIndex);
3730c16b537SWarner Losh 
3740c16b537SWarner Losh         if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
375*052d3c12SConrad Meyer             assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
3760c16b537SWarner Losh             match = base + matchIndex;
377*052d3c12SConrad Meyer             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
3780c16b537SWarner Losh         } else {
3790c16b537SWarner Losh             match = dictBase + matchIndex;
3800c16b537SWarner Losh             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
3810c16b537SWarner Losh             if (matchIndex+matchLength >= dictLimit)
382*052d3c12SConrad Meyer                 match = base + matchIndex;   /* prepare for match[matchLength] */
3830c16b537SWarner Losh         }
3840c16b537SWarner Losh 
3850c16b537SWarner Losh         if (matchLength > bestLength) {
386*052d3c12SConrad Meyer             DEBUGLOG(8, "found match of length %u at distance %u",
387*052d3c12SConrad Meyer                     (U32)matchLength, current - matchIndex);
388*052d3c12SConrad Meyer             assert(matchEndIdx > matchIndex);
389*052d3c12SConrad Meyer             if (matchLength > matchEndIdx - matchIndex)
390*052d3c12SConrad Meyer                 matchEndIdx = matchIndex + (U32)matchLength;
3910c16b537SWarner Losh             bestLength = matchLength;
392*052d3c12SConrad Meyer             matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
3930c16b537SWarner Losh             matches[mnum].len = (U32)matchLength;
3940c16b537SWarner Losh             mnum++;
3950c16b537SWarner Losh             if (matchLength > ZSTD_OPT_NUM) break;
396*052d3c12SConrad Meyer             if (ip+matchLength == iLimit) {  /* equal : no way to know if inf or sup */
397*052d3c12SConrad Meyer                 break;   /* drop, to preserve bt consistency (miss a little bit of compression) */
398*052d3c12SConrad Meyer             }
3990c16b537SWarner Losh         }
4000c16b537SWarner Losh 
4010c16b537SWarner Losh         if (match[matchLength] < ip[matchLength]) {
402*052d3c12SConrad Meyer             /* match smaller than current */
4030c16b537SWarner Losh             *smallerPtr = matchIndex;             /* update smaller idx */
4040c16b537SWarner Losh             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
4050c16b537SWarner Losh             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
406*052d3c12SConrad Meyer             smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
407*052d3c12SConrad Meyer             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
4080c16b537SWarner Losh         } else {
4090c16b537SWarner Losh             *largerPtr = matchIndex;
4100c16b537SWarner Losh             commonLengthLarger = matchLength;
4110c16b537SWarner Losh             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
4120c16b537SWarner Losh             largerPtr = nextPtr;
4130c16b537SWarner Losh             matchIndex = nextPtr[0];
4140c16b537SWarner Losh     }   }
4150c16b537SWarner Losh 
4160c16b537SWarner Losh     *smallerPtr = *largerPtr = 0;
4170c16b537SWarner Losh 
418*052d3c12SConrad Meyer     assert(matchEndIdx > current+8);
419*052d3c12SConrad Meyer     zc->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
4200c16b537SWarner Losh     return mnum;
4210c16b537SWarner Losh }
4220c16b537SWarner Losh 
4230c16b537SWarner Losh 
424*052d3c12SConrad Meyer FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
4250c16b537SWarner Losh                         ZSTD_CCtx* zc,   /* Index table will be updated */
426*052d3c12SConrad Meyer                         const BYTE* ip, const BYTE* const iHighLimit, int const extDict,
427*052d3c12SConrad Meyer                         U32 const maxNbAttempts, U32 const matchLengthSearch, U32 const sufficient_len,
428*052d3c12SConrad Meyer                         U32 rep[ZSTD_REP_NUM], U32 const ll0,
429*052d3c12SConrad Meyer                         ZSTD_match_t* matches, U32 const lengthToBeat)
4300c16b537SWarner Losh {
431*052d3c12SConrad Meyer     DEBUGLOG(7, "ZSTD_BtGetAllMatches");
432*052d3c12SConrad Meyer     if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
433*052d3c12SConrad Meyer     if (extDict) ZSTD_updateTree_extDict(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch);
434*052d3c12SConrad Meyer     else ZSTD_updateTree(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch);
4350c16b537SWarner Losh     switch(matchLengthSearch)
4360c16b537SWarner Losh     {
437*052d3c12SConrad Meyer     case 3 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 3, sufficient_len, rep, ll0, matches, lengthToBeat);
4380c16b537SWarner Losh     default :
439*052d3c12SConrad Meyer     case 4 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 4, sufficient_len, rep, ll0, matches, lengthToBeat);
440*052d3c12SConrad Meyer     case 5 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 5, sufficient_len, rep, ll0, matches, lengthToBeat);
4410c16b537SWarner Losh     case 7 :
442*052d3c12SConrad Meyer     case 6 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 6, sufficient_len, rep, ll0, matches, lengthToBeat);
4430c16b537SWarner Losh     }
4440c16b537SWarner Losh }
4450c16b537SWarner Losh 
4460c16b537SWarner Losh 
4470c16b537SWarner Losh /*-*******************************
4480c16b537SWarner Losh *  Optimal parser
4490c16b537SWarner Losh *********************************/
450*052d3c12SConrad Meyer typedef struct repcodes_s {
451*052d3c12SConrad Meyer     U32 rep[3];
452*052d3c12SConrad Meyer } repcodes_t;
453*052d3c12SConrad Meyer 
454*052d3c12SConrad Meyer repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
455*052d3c12SConrad Meyer {
456*052d3c12SConrad Meyer     repcodes_t newReps;
457*052d3c12SConrad Meyer     if (offset >= ZSTD_REP_NUM) {  /* full offset */
458*052d3c12SConrad Meyer         newReps.rep[2] = rep[1];
459*052d3c12SConrad Meyer         newReps.rep[1] = rep[0];
460*052d3c12SConrad Meyer         newReps.rep[0] = offset - ZSTD_REP_MOVE;
461*052d3c12SConrad Meyer     } else {   /* repcode */
462*052d3c12SConrad Meyer         U32 const repCode = offset + ll0;
463*052d3c12SConrad Meyer         if (repCode > 0) {  /* note : if repCode==0, no change */
464*052d3c12SConrad Meyer             U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
465*052d3c12SConrad Meyer             newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
466*052d3c12SConrad Meyer             newReps.rep[1] = rep[0];
467*052d3c12SConrad Meyer             newReps.rep[0] = currentOffset;
468*052d3c12SConrad Meyer         } else {   /* repCode == 0 */
469*052d3c12SConrad Meyer             memcpy(&newReps, rep, sizeof(newReps));
470*052d3c12SConrad Meyer         }
471*052d3c12SConrad Meyer     }
472*052d3c12SConrad Meyer     return newReps;
473*052d3c12SConrad Meyer }
474*052d3c12SConrad Meyer 
475*052d3c12SConrad Meyer 
476*052d3c12SConrad Meyer typedef struct {
477*052d3c12SConrad Meyer     const BYTE* anchor;
478*052d3c12SConrad Meyer     U32 litlen;
479*052d3c12SConrad Meyer     U32 rawLitCost;
480*052d3c12SConrad Meyer } cachedLiteralPrice_t;
481*052d3c12SConrad Meyer 
482*052d3c12SConrad Meyer static U32 ZSTD_rawLiteralsCost_cached(
483*052d3c12SConrad Meyer                             cachedLiteralPrice_t* const cachedLitPrice,
484*052d3c12SConrad Meyer                             const BYTE* const anchor, U32 const litlen,
485*052d3c12SConrad Meyer                             const optState_t* const optStatePtr)
486*052d3c12SConrad Meyer {
487*052d3c12SConrad Meyer     U32 startCost;
488*052d3c12SConrad Meyer     U32 remainingLength;
489*052d3c12SConrad Meyer     const BYTE* startPosition;
490*052d3c12SConrad Meyer 
491*052d3c12SConrad Meyer     if (anchor == cachedLitPrice->anchor) {
492*052d3c12SConrad Meyer         startCost = cachedLitPrice->rawLitCost;
493*052d3c12SConrad Meyer         startPosition = anchor + cachedLitPrice->litlen;
494*052d3c12SConrad Meyer         assert(litlen >= cachedLitPrice->litlen);
495*052d3c12SConrad Meyer         remainingLength = litlen - cachedLitPrice->litlen;
496*052d3c12SConrad Meyer     } else {
497*052d3c12SConrad Meyer         startCost = 0;
498*052d3c12SConrad Meyer         startPosition = anchor;
499*052d3c12SConrad Meyer         remainingLength = litlen;
500*052d3c12SConrad Meyer     }
501*052d3c12SConrad Meyer 
502*052d3c12SConrad Meyer     {   U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr);
503*052d3c12SConrad Meyer         cachedLitPrice->anchor = anchor;
504*052d3c12SConrad Meyer         cachedLitPrice->litlen = litlen;
505*052d3c12SConrad Meyer         cachedLitPrice->rawLitCost = rawLitCost;
506*052d3c12SConrad Meyer         return rawLitCost;
507*052d3c12SConrad Meyer     }
508*052d3c12SConrad Meyer }
509*052d3c12SConrad Meyer 
510*052d3c12SConrad Meyer static U32 ZSTD_fullLiteralsCost_cached(
511*052d3c12SConrad Meyer                             cachedLiteralPrice_t* const cachedLitPrice,
512*052d3c12SConrad Meyer                             const BYTE* const anchor, U32 const litlen,
513*052d3c12SConrad Meyer                             const optState_t* const optStatePtr)
514*052d3c12SConrad Meyer {
515*052d3c12SConrad Meyer     return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
516*052d3c12SConrad Meyer          + ZSTD_litLengthPrice(litlen, optStatePtr);
517*052d3c12SConrad Meyer }
518*052d3c12SConrad Meyer 
519*052d3c12SConrad Meyer static int ZSTD_literalsContribution_cached(
520*052d3c12SConrad Meyer                             cachedLiteralPrice_t* const cachedLitPrice,
521*052d3c12SConrad Meyer                             const BYTE* const anchor, U32 const litlen,
522*052d3c12SConrad Meyer                             const optState_t* const optStatePtr)
523*052d3c12SConrad Meyer {
524*052d3c12SConrad Meyer     int const contribution = ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
525*052d3c12SConrad Meyer                            + ZSTD_litLengthContribution(litlen, optStatePtr);
526*052d3c12SConrad Meyer     return contribution;
527*052d3c12SConrad Meyer }
528*052d3c12SConrad Meyer 
5290c16b537SWarner Losh FORCE_INLINE_TEMPLATE
5300c16b537SWarner Losh size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
531*052d3c12SConrad Meyer                                       const void* src, size_t srcSize,
532*052d3c12SConrad Meyer                                       const int optLevel, const int extDict)
5330c16b537SWarner Losh {
534*052d3c12SConrad Meyer     seqStore_t* const seqStorePtr = &(ctx->seqStore);
535*052d3c12SConrad Meyer     optState_t* const optStatePtr = &(ctx->optState);
5360c16b537SWarner Losh     const BYTE* const istart = (const BYTE*)src;
5370c16b537SWarner Losh     const BYTE* ip = istart;
5380c16b537SWarner Losh     const BYTE* anchor = istart;
5390c16b537SWarner Losh     const BYTE* const iend = istart + srcSize;
5400c16b537SWarner Losh     const BYTE* const ilimit = iend - 8;
5410c16b537SWarner Losh     const BYTE* const base = ctx->base;
5420c16b537SWarner Losh     const BYTE* const prefixStart = base + ctx->dictLimit;
5430c16b537SWarner Losh 
544*052d3c12SConrad Meyer     U32 const maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
545*052d3c12SConrad Meyer     U32 const sufficient_len = MIN(ctx->appliedParams.cParams.targetLength, ZSTD_OPT_NUM -1);
546*052d3c12SConrad Meyer     U32 const mls = ctx->appliedParams.cParams.searchLength;
547*052d3c12SConrad Meyer     U32 const minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
5480c16b537SWarner Losh 
549*052d3c12SConrad Meyer     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
550*052d3c12SConrad Meyer     ZSTD_match_t* const matches = optStatePtr->matchTable;
551*052d3c12SConrad Meyer     cachedLiteralPrice_t cachedLitPrice;
552*052d3c12SConrad Meyer     U32 rep[ZSTD_REP_NUM];
5530c16b537SWarner Losh 
5540c16b537SWarner Losh     /* init */
555*052d3c12SConrad Meyer     DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
5560c16b537SWarner Losh     ctx->nextToUpdate3 = ctx->nextToUpdate;
5570c16b537SWarner Losh     ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
5580c16b537SWarner Losh     ip += (ip==prefixStart);
559*052d3c12SConrad Meyer     { int i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }
560*052d3c12SConrad Meyer     memset(&cachedLitPrice, 0, sizeof(cachedLitPrice));
5610c16b537SWarner Losh 
5620c16b537SWarner Losh     /* Match Loop */
5630c16b537SWarner Losh     while (ip < ilimit) {
564*052d3c12SConrad Meyer         U32 cur, last_pos = 0;
565*052d3c12SConrad Meyer         U32 best_mlen, best_off;
5660c16b537SWarner Losh 
567*052d3c12SConrad Meyer         /* find first match */
568*052d3c12SConrad Meyer         {   U32 const litlen = (U32)(ip - anchor);
569*052d3c12SConrad Meyer             U32 const ll0 = !litlen;
570*052d3c12SConrad Meyer             U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, ip, iend, extDict, maxSearches, mls, sufficient_len, rep, ll0, matches, minMatch);
571*052d3c12SConrad Meyer             if (!nbMatches) { ip++; continue; }
5720c16b537SWarner Losh 
5730c16b537SWarner Losh             /* initialize opt[0] */
5740c16b537SWarner Losh             { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
5750c16b537SWarner Losh             opt[0].mlen = 1;
5760c16b537SWarner Losh             opt[0].litlen = litlen;
5770c16b537SWarner Losh 
578*052d3c12SConrad Meyer             /* large match -> immediate encoding */
579*052d3c12SConrad Meyer             {   U32 const maxML = matches[nbMatches-1].len;
580*052d3c12SConrad Meyer                 DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie",
581*052d3c12SConrad Meyer                             nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart));
5820c16b537SWarner Losh 
583*052d3c12SConrad Meyer                 if (maxML > sufficient_len) {
584*052d3c12SConrad Meyer                     best_mlen = maxML;
585*052d3c12SConrad Meyer                     best_off = matches[nbMatches-1].off;
586*052d3c12SConrad Meyer                     DEBUGLOG(7, "large match (%u>%u), immediate encoding",
587*052d3c12SConrad Meyer                                 best_mlen, sufficient_len);
588*052d3c12SConrad Meyer                     cur = 0;
589*052d3c12SConrad Meyer                     last_pos = 1;
590*052d3c12SConrad Meyer                     goto _shortestPath;
591*052d3c12SConrad Meyer             }   }
592*052d3c12SConrad Meyer 
593*052d3c12SConrad Meyer             /* set prices for first matches starting position == 0 */
594*052d3c12SConrad Meyer             {   U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
595*052d3c12SConrad Meyer                 U32 pos;
596*052d3c12SConrad Meyer                 U32 matchNb;
597*052d3c12SConrad Meyer                 for (pos = 0; pos < minMatch; pos++) {
598*052d3c12SConrad Meyer                     opt[pos].mlen = 1;
599*052d3c12SConrad Meyer                     opt[pos].price = ZSTD_MAX_PRICE;
600*052d3c12SConrad Meyer                 }
601*052d3c12SConrad Meyer                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
602*052d3c12SConrad Meyer                     U32 const offset = matches[matchNb].off;
603*052d3c12SConrad Meyer                     U32 const end = matches[matchNb].len;
604*052d3c12SConrad Meyer                     repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
605*052d3c12SConrad Meyer                     for ( ; pos <= end ; pos++ ) {
606*052d3c12SConrad Meyer                         U32 const matchPrice = literalsPrice + ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
607*052d3c12SConrad Meyer                         DEBUGLOG(7, "rPos:%u => set initial price : %u",
608*052d3c12SConrad Meyer                                     pos, matchPrice);
609*052d3c12SConrad Meyer                         opt[pos].mlen = pos;
610*052d3c12SConrad Meyer                         opt[pos].off = offset;
611*052d3c12SConrad Meyer                         opt[pos].litlen = litlen;
612*052d3c12SConrad Meyer                         opt[pos].price = matchPrice;
613*052d3c12SConrad Meyer                         memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
614*052d3c12SConrad Meyer                 }   }
615*052d3c12SConrad Meyer                 last_pos = pos-1;
616*052d3c12SConrad Meyer             }
6170c16b537SWarner Losh         }
6180c16b537SWarner Losh 
619*052d3c12SConrad Meyer         /* check further positions */
620*052d3c12SConrad Meyer         for (cur = 1; cur <= last_pos; cur++) {
621*052d3c12SConrad Meyer             const BYTE* const inr = ip + cur;
622*052d3c12SConrad Meyer             assert(cur < ZSTD_OPT_NUM);
623*052d3c12SConrad Meyer 
624*052d3c12SConrad Meyer             /* Fix current position with one literal if cheaper */
625*052d3c12SConrad Meyer             {   U32 const litlen = (opt[cur-1].mlen == 1) ? opt[cur-1].litlen + 1 : 1;
626*052d3c12SConrad Meyer                 int price;  /* note : contribution can be negative */
627*052d3c12SConrad Meyer                 if (cur > litlen) {
628*052d3c12SConrad Meyer                     price = opt[cur - litlen].price + ZSTD_literalsContribution(inr-litlen, litlen, optStatePtr);
629*052d3c12SConrad Meyer                 } else {
630*052d3c12SConrad Meyer                     price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
631*052d3c12SConrad Meyer                 }
632*052d3c12SConrad Meyer                 assert(price < 1000000000); /* overflow check */
633*052d3c12SConrad Meyer                 if (price <= opt[cur].price) {
634*052d3c12SConrad Meyer                     DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal",
635*052d3c12SConrad Meyer                                 cur, price, opt[cur].price);
636*052d3c12SConrad Meyer                     opt[cur].mlen = 1;
637*052d3c12SConrad Meyer                     opt[cur].off = 0;
638*052d3c12SConrad Meyer                     opt[cur].litlen = litlen;
639*052d3c12SConrad Meyer                     opt[cur].price = price;
640*052d3c12SConrad Meyer                     memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
641*052d3c12SConrad Meyer             }   }
642*052d3c12SConrad Meyer 
643*052d3c12SConrad Meyer             /* last match must start at a minimum distance of 8 from oend */
644*052d3c12SConrad Meyer             if (inr > ilimit) continue;
6450c16b537SWarner Losh 
6460c16b537SWarner Losh             if (cur == last_pos) break;
6470c16b537SWarner Losh 
648*052d3c12SConrad Meyer              if ( (optLevel==0) /*static*/
649*052d3c12SConrad Meyer                && (opt[cur+1].price <= opt[cur].price) )
650*052d3c12SConrad Meyer                 continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
6510c16b537SWarner Losh 
652*052d3c12SConrad Meyer             {   U32 const ll0 = (opt[cur].mlen != 1);
653*052d3c12SConrad Meyer                 U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0;
654*052d3c12SConrad Meyer                 U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0;
655*052d3c12SConrad Meyer                 U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr);
656*052d3c12SConrad Meyer                 U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, inr, iend, extDict, maxSearches, mls, sufficient_len, opt[cur].rep, ll0, matches, minMatch);
657*052d3c12SConrad Meyer                 U32 matchNb;
658*052d3c12SConrad Meyer                 if (!nbMatches) continue;
6590c16b537SWarner Losh 
660*052d3c12SConrad Meyer                 {   U32 const maxML = matches[nbMatches-1].len;
661*052d3c12SConrad Meyer                     DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u",
662*052d3c12SConrad Meyer                                 cur, nbMatches, maxML);
6630c16b537SWarner Losh 
664*052d3c12SConrad Meyer                     if ( (maxML > sufficient_len)
665*052d3c12SConrad Meyer                        | (cur + maxML >= ZSTD_OPT_NUM) ) {
666*052d3c12SConrad Meyer                         best_mlen = maxML;
667*052d3c12SConrad Meyer                         best_off = matches[nbMatches-1].off;
6680c16b537SWarner Losh                         last_pos = cur + 1;
669*052d3c12SConrad Meyer                         goto _shortestPath;
670*052d3c12SConrad Meyer                     }
6710c16b537SWarner Losh                 }
6720c16b537SWarner Losh 
673*052d3c12SConrad Meyer                 /* set prices using matches found at position == cur */
674*052d3c12SConrad Meyer                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
675*052d3c12SConrad Meyer                     U32 const offset = matches[matchNb].off;
676*052d3c12SConrad Meyer                     repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
677*052d3c12SConrad Meyer                     U32 const lastML = matches[matchNb].len;
678*052d3c12SConrad Meyer                     U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
679*052d3c12SConrad Meyer                     U32 mlen;
6800c16b537SWarner Losh 
681*052d3c12SConrad Meyer                     DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u",
682*052d3c12SConrad Meyer                                 matchNb, matches[matchNb].off, lastML, litlen);
683*052d3c12SConrad Meyer 
684*052d3c12SConrad Meyer                     for (mlen = lastML; mlen >= startML; mlen--) {
685*052d3c12SConrad Meyer                         U32 const pos = cur + mlen;
686*052d3c12SConrad Meyer                         int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
687*052d3c12SConrad Meyer 
688*052d3c12SConrad Meyer                         if ((pos > last_pos) || (price < opt[pos].price)) {
689*052d3c12SConrad Meyer                             DEBUGLOG(7, "rPos:%u => new better price (%u<%u)",
690*052d3c12SConrad Meyer                                         pos, price, opt[pos].price);
691*052d3c12SConrad Meyer                             while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }
692*052d3c12SConrad Meyer                             opt[pos].mlen = mlen;
693*052d3c12SConrad Meyer                             opt[pos].off = offset;
694*052d3c12SConrad Meyer                             opt[pos].litlen = litlen;
695*052d3c12SConrad Meyer                             opt[pos].price = price;
696*052d3c12SConrad Meyer                             memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
6970c16b537SWarner Losh                         } else {
698*052d3c12SConrad Meyer                             if (optLevel==0) break;  /* gets ~+10% speed for about -0.01 ratio loss */
6990c16b537SWarner Losh                         }
7000c16b537SWarner Losh             }   }   }
701*052d3c12SConrad Meyer         }  /* for (cur = 1; cur <= last_pos; cur++) */
7020c16b537SWarner Losh 
7030c16b537SWarner Losh         best_mlen = opt[last_pos].mlen;
7040c16b537SWarner Losh         best_off = opt[last_pos].off;
7050c16b537SWarner Losh         cur = last_pos - best_mlen;
7060c16b537SWarner Losh 
707*052d3c12SConrad Meyer _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
708*052d3c12SConrad Meyer         assert(opt[0].mlen == 1);
7090c16b537SWarner Losh 
710*052d3c12SConrad Meyer         /* reverse traversal */
711*052d3c12SConrad Meyer         DEBUGLOG(7, "start reverse traversal (last_pos:%u, cur:%u)",
712*052d3c12SConrad Meyer                     last_pos, cur);
713*052d3c12SConrad Meyer         {   U32 selectedMatchLength = best_mlen;
714*052d3c12SConrad Meyer             U32 selectedOffset = best_off;
715*052d3c12SConrad Meyer             U32 pos = cur;
7160c16b537SWarner Losh             while (1) {
717*052d3c12SConrad Meyer                 U32 const mlen = opt[pos].mlen;
718*052d3c12SConrad Meyer                 U32 const off = opt[pos].off;
719*052d3c12SConrad Meyer                 opt[pos].mlen = selectedMatchLength;
720*052d3c12SConrad Meyer                 opt[pos].off = selectedOffset;
721*052d3c12SConrad Meyer                 selectedMatchLength = mlen;
722*052d3c12SConrad Meyer                 selectedOffset = off;
723*052d3c12SConrad Meyer                 if (mlen > pos) break;
724*052d3c12SConrad Meyer                 pos -= mlen;
725*052d3c12SConrad Meyer         }   }
7260c16b537SWarner Losh 
727*052d3c12SConrad Meyer         /* save sequences */
728*052d3c12SConrad Meyer         {   U32 pos;
729*052d3c12SConrad Meyer             for (pos=0; pos < last_pos; ) {
730*052d3c12SConrad Meyer                 U32 const llen = (U32)(ip - anchor);
731*052d3c12SConrad Meyer                 U32 const mlen = opt[pos].mlen;
732*052d3c12SConrad Meyer                 U32 const offset = opt[pos].off;
733*052d3c12SConrad Meyer                 if (mlen == 1) { ip++; pos++; continue; }  /* literal position => move on */
734*052d3c12SConrad Meyer                 pos += mlen; ip += mlen;
7350c16b537SWarner Losh 
736*052d3c12SConrad Meyer                 /* repcodes update : like ZSTD_updateRep(), but update in place */
737*052d3c12SConrad Meyer                 if (offset >= ZSTD_REP_NUM) {  /* full offset */
7380c16b537SWarner Losh                     rep[2] = rep[1];
7390c16b537SWarner Losh                     rep[1] = rep[0];
740*052d3c12SConrad Meyer                     rep[0] = offset - ZSTD_REP_MOVE;
741*052d3c12SConrad Meyer                 } else {   /* repcode */
742*052d3c12SConrad Meyer                     U32 const repCode = offset + (llen==0);
743*052d3c12SConrad Meyer                     if (repCode) {  /* note : if repCode==0, no change */
744*052d3c12SConrad Meyer                         U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
745*052d3c12SConrad Meyer                         if (repCode >= 2) rep[2] = rep[1];
7460c16b537SWarner Losh                         rep[1] = rep[0];
747*052d3c12SConrad Meyer                         rep[0] = currentOffset;
7480c16b537SWarner Losh                     }
7490c16b537SWarner Losh                 }
7500c16b537SWarner Losh 
751*052d3c12SConrad Meyer                 ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen);
752*052d3c12SConrad Meyer                 ZSTD_storeSeq(seqStorePtr, llen, anchor, offset, mlen-MINMATCH);
753*052d3c12SConrad Meyer                 anchor = ip;
754*052d3c12SConrad Meyer         }   }
755*052d3c12SConrad Meyer         ZSTD_setLog2Prices(optStatePtr);
756*052d3c12SConrad Meyer     }   /* while (ip < ilimit) */
7570c16b537SWarner Losh 
7580c16b537SWarner Losh     /* Save reps for next block */
7590c16b537SWarner Losh     { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }
7600c16b537SWarner Losh 
7610c16b537SWarner Losh     /* Return the last literals size */
7620c16b537SWarner Losh     return iend - anchor;
7630c16b537SWarner Losh }
7640c16b537SWarner Losh 
7650c16b537SWarner Losh 
7660c16b537SWarner Losh size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
7670c16b537SWarner Losh {
768*052d3c12SConrad Meyer     DEBUGLOG(5, "ZSTD_compressBlock_btopt");
769*052d3c12SConrad Meyer     return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
7700c16b537SWarner Losh }
7710c16b537SWarner Losh 
7720c16b537SWarner Losh size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
7730c16b537SWarner Losh {
774*052d3c12SConrad Meyer     return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
7750c16b537SWarner Losh }
7760c16b537SWarner Losh 
7770c16b537SWarner Losh size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
7780c16b537SWarner Losh {
779*052d3c12SConrad Meyer     return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
7800c16b537SWarner Losh }
7810c16b537SWarner Losh 
7820c16b537SWarner Losh size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
7830c16b537SWarner Losh {
784*052d3c12SConrad Meyer     return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
7850c16b537SWarner Losh }
786