xref: /linux/lib/zstd/compress/zstd_opt.c (revision 7ae9fb1b7ecbb5d85d07857943f677fd1a559b18)
1 /*
2  * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 #include "zstd_compress_internal.h"
12 #include "hist.h"
13 #include "zstd_opt.h"
14 
15 
16 #define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17 #define ZSTD_MAX_PRICE     (1<<30)
18 
19 #define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
20 
21 
22 /*-*************************************
23 *  Price functions for optimal parser
24 ***************************************/
25 
26 #if 0    /* approximation at bit level (for tests) */
27 #  define BITCOST_ACCURACY 0
28 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
29 #  define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
30 #elif 0  /* fractional bit accuracy (for tests) */
31 #  define BITCOST_ACCURACY 8
32 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
33 #  define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
34 #else    /* opt==approx, ultra==accurate */
35 #  define BITCOST_ACCURACY 8
36 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
37 #  define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
38 #endif
39 
ZSTD_bitWeight(U32 stat)40 MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
41 {
42     return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
43 }
44 
ZSTD_fracWeight(U32 rawStat)45 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
46 {
47     U32 const stat = rawStat + 1;
48     U32 const hb = ZSTD_highbit32(stat);
49     U32 const BWeight = hb * BITCOST_MULTIPLIER;
50     U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
51     U32 const weight = BWeight + FWeight;
52     assert(hb + BITCOST_ACCURACY < 31);
53     return weight;
54 }
55 
56 #if (DEBUGLEVEL>=2)
57 /* debugging function,
58  * @return price in bytes as fractional value
59  * for debug messages only */
ZSTD_fCost(U32 price)60 MEM_STATIC double ZSTD_fCost(U32 price)
61 {
62     return (double)price / (BITCOST_MULTIPLIER*8);
63 }
64 #endif
65 
ZSTD_compressedLiterals(optState_t const * const optPtr)66 static int ZSTD_compressedLiterals(optState_t const* const optPtr)
67 {
68     return optPtr->literalCompressionMode != ZSTD_ps_disable;
69 }
70 
ZSTD_setBasePrices(optState_t * optPtr,int optLevel)71 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
72 {
73     if (ZSTD_compressedLiterals(optPtr))
74         optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
75     optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
76     optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
77     optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
78 }
79 
80 
sum_u32(const unsigned table[],size_t nbElts)81 static U32 sum_u32(const unsigned table[], size_t nbElts)
82 {
83     size_t n;
84     U32 total = 0;
85     for (n=0; n<nbElts; n++) {
86         total += table[n];
87     }
88     return total;
89 }
90 
ZSTD_downscaleStats(unsigned * table,U32 lastEltIndex,U32 shift)91 static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
92 {
93     U32 s, sum=0;
94     DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
95     assert(shift < 30);
96     for (s=0; s<lastEltIndex+1; s++) {
97         table[s] = 1 + (table[s] >> shift);
98         sum += table[s];
99     }
100     return sum;
101 }
102 
103 /* ZSTD_scaleStats() :
104  * reduce all elements in table is sum too large
105  * return the resulting sum of elements */
ZSTD_scaleStats(unsigned * table,U32 lastEltIndex,U32 logTarget)106 static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
107 {
108     U32 const prevsum = sum_u32(table, lastEltIndex+1);
109     U32 const factor = prevsum >> logTarget;
110     DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
111     assert(logTarget < 30);
112     if (factor <= 1) return prevsum;
113     return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
114 }
115 
116 /* ZSTD_rescaleFreqs() :
117  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
118  *    take hints from dictionary if there is one
119  *    and init from zero if there is none,
120  *    using src for literals stats, and baseline stats for sequence symbols
121  * otherwise downscale existing stats, to be used as seed for next block.
122  */
123 static void
ZSTD_rescaleFreqs(optState_t * const optPtr,const BYTE * const src,size_t const srcSize,int const optLevel)124 ZSTD_rescaleFreqs(optState_t* const optPtr,
125             const BYTE* const src, size_t const srcSize,
126                   int const optLevel)
127 {
128     int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
129     DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
130     optPtr->priceType = zop_dynamic;
131 
132     if (optPtr->litLengthSum == 0) {  /* first block : init */
133         if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
134             DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
135             optPtr->priceType = zop_predef;
136         }
137 
138         assert(optPtr->symbolCosts != NULL);
139         if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
140             /* huffman table presumed generated by dictionary */
141             optPtr->priceType = zop_dynamic;
142 
143             if (compressedLiterals) {
144                 unsigned lit;
145                 assert(optPtr->litFreq != NULL);
146                 optPtr->litSum = 0;
147                 for (lit=0; lit<=MaxLit; lit++) {
148                     U32 const scaleLog = 11;   /* scale to 2K */
149                     U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
150                     assert(bitCost <= scaleLog);
151                     optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
152                     optPtr->litSum += optPtr->litFreq[lit];
153             }   }
154 
155             {   unsigned ll;
156                 FSE_CState_t llstate;
157                 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
158                 optPtr->litLengthSum = 0;
159                 for (ll=0; ll<=MaxLL; ll++) {
160                     U32 const scaleLog = 10;   /* scale to 1K */
161                     U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
162                     assert(bitCost < scaleLog);
163                     optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
164                     optPtr->litLengthSum += optPtr->litLengthFreq[ll];
165             }   }
166 
167             {   unsigned ml;
168                 FSE_CState_t mlstate;
169                 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
170                 optPtr->matchLengthSum = 0;
171                 for (ml=0; ml<=MaxML; ml++) {
172                     U32 const scaleLog = 10;
173                     U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
174                     assert(bitCost < scaleLog);
175                     optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
176                     optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
177             }   }
178 
179             {   unsigned of;
180                 FSE_CState_t ofstate;
181                 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
182                 optPtr->offCodeSum = 0;
183                 for (of=0; of<=MaxOff; of++) {
184                     U32 const scaleLog = 10;
185                     U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
186                     assert(bitCost < scaleLog);
187                     optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
188                     optPtr->offCodeSum += optPtr->offCodeFreq[of];
189             }   }
190 
191         } else {  /* not a dictionary */
192 
193             assert(optPtr->litFreq != NULL);
194             if (compressedLiterals) {
195                 unsigned lit = MaxLit;
196                 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
197                 optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
198             }
199 
200             {   unsigned const baseLLfreqs[MaxLL+1] = {
201                     4, 2, 1, 1, 1, 1, 1, 1,
202                     1, 1, 1, 1, 1, 1, 1, 1,
203                     1, 1, 1, 1, 1, 1, 1, 1,
204                     1, 1, 1, 1, 1, 1, 1, 1,
205                     1, 1, 1, 1
206                 };
207                 ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
208                 optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
209             }
210 
211             {   unsigned ml;
212                 for (ml=0; ml<=MaxML; ml++)
213                     optPtr->matchLengthFreq[ml] = 1;
214             }
215             optPtr->matchLengthSum = MaxML+1;
216 
217             {   unsigned const baseOFCfreqs[MaxOff+1] = {
218                     6, 2, 1, 1, 2, 3, 4, 4,
219                     4, 3, 2, 1, 1, 1, 1, 1,
220                     1, 1, 1, 1, 1, 1, 1, 1,
221                     1, 1, 1, 1, 1, 1, 1, 1
222                 };
223                 ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
224                 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
225             }
226 
227 
228         }
229 
230     } else {   /* new block : re-use previous statistics, scaled down */
231 
232         if (compressedLiterals)
233             optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
234         optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
235         optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
236         optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
237     }
238 
239     ZSTD_setBasePrices(optPtr, optLevel);
240 }
241 
242 /* ZSTD_rawLiteralsCost() :
243  * price of literals (only) in specified segment (which length can be 0).
244  * does not include price of literalLength symbol */
ZSTD_rawLiteralsCost(const BYTE * const literals,U32 const litLength,const optState_t * const optPtr,int optLevel)245 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
246                                 const optState_t* const optPtr,
247                                 int optLevel)
248 {
249     if (litLength == 0) return 0;
250 
251     if (!ZSTD_compressedLiterals(optPtr))
252         return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
253 
254     if (optPtr->priceType == zop_predef)
255         return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
256 
257     /* dynamic statistics */
258     {   U32 price = litLength * optPtr->litSumBasePrice;
259         U32 u;
260         for (u=0; u < litLength; u++) {
261             assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice);   /* literal cost should never be negative */
262             price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
263         }
264         return price;
265     }
266 }
267 
268 /* ZSTD_litLengthPrice() :
269  * cost of literalLength symbol */
ZSTD_litLengthPrice(U32 const litLength,const optState_t * const optPtr,int optLevel)270 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
271 {
272     assert(litLength <= ZSTD_BLOCKSIZE_MAX);
273     if (optPtr->priceType == zop_predef)
274         return WEIGHT(litLength, optLevel);
275     /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
276      * because it isn't representable in the zstd format. So instead just
277      * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
278      * would be all literals.
279      */
280     if (litLength == ZSTD_BLOCKSIZE_MAX)
281         return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
282 
283     /* dynamic statistics */
284     {   U32 const llCode = ZSTD_LLcode(litLength);
285         return (LL_bits[llCode] * BITCOST_MULTIPLIER)
286              + optPtr->litLengthSumBasePrice
287              - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
288     }
289 }
290 
291 /* ZSTD_getMatchPrice() :
292  * Provides the cost of the match part (offset + matchLength) of a sequence
293  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
294  * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
295  * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
296  */
297 FORCE_INLINE_TEMPLATE U32
ZSTD_getMatchPrice(U32 const offcode,U32 const matchLength,const optState_t * const optPtr,int const optLevel)298 ZSTD_getMatchPrice(U32 const offcode,
299                    U32 const matchLength,
300              const optState_t* const optPtr,
301                    int const optLevel)
302 {
303     U32 price;
304     U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
305     U32 const mlBase = matchLength - MINMATCH;
306     assert(matchLength >= MINMATCH);
307 
308     if (optPtr->priceType == zop_predef)  /* fixed scheme, do not use statistics */
309         return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
310 
311     /* dynamic statistics */
312     price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
313     if ((optLevel<2) /*static*/ && offCode >= 20)
314         price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
315 
316     /* match Length */
317     {   U32 const mlCode = ZSTD_MLcode(mlBase);
318         price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
319     }
320 
321     price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
322 
323     DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
324     return price;
325 }
326 
327 /* ZSTD_updateStats() :
328  * assumption : literals + litLengtn <= iend */
ZSTD_updateStats(optState_t * const optPtr,U32 litLength,const BYTE * literals,U32 offsetCode,U32 matchLength)329 static void ZSTD_updateStats(optState_t* const optPtr,
330                              U32 litLength, const BYTE* literals,
331                              U32 offsetCode, U32 matchLength)
332 {
333     /* literals */
334     if (ZSTD_compressedLiterals(optPtr)) {
335         U32 u;
336         for (u=0; u < litLength; u++)
337             optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
338         optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
339     }
340 
341     /* literal Length */
342     {   U32 const llCode = ZSTD_LLcode(litLength);
343         optPtr->litLengthFreq[llCode]++;
344         optPtr->litLengthSum++;
345     }
346 
347     /* offset code : expected to follow storeSeq() numeric representation */
348     {   U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
349         assert(offCode <= MaxOff);
350         optPtr->offCodeFreq[offCode]++;
351         optPtr->offCodeSum++;
352     }
353 
354     /* match Length */
355     {   U32 const mlBase = matchLength - MINMATCH;
356         U32 const mlCode = ZSTD_MLcode(mlBase);
357         optPtr->matchLengthFreq[mlCode]++;
358         optPtr->matchLengthSum++;
359     }
360 }
361 
362 
363 /* ZSTD_readMINMATCH() :
364  * function safe only for comparisons
365  * assumption : memPtr must be at least 4 bytes before end of buffer */
ZSTD_readMINMATCH(const void * memPtr,U32 length)366 MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
367 {
368     switch (length)
369     {
370     default :
371     case 4 : return MEM_read32(memPtr);
372     case 3 : if (MEM_isLittleEndian())
373                 return MEM_read32(memPtr)<<8;
374              else
375                 return MEM_read32(memPtr)>>8;
376     }
377 }
378 
379 
380 /* Update hashTable3 up to ip (excluded)
381    Assumption : always within prefix (i.e. not within extDict) */
ZSTD_insertAndFindFirstIndexHash3(const ZSTD_matchState_t * ms,U32 * nextToUpdate3,const BYTE * const ip)382 static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
383                                               U32* nextToUpdate3,
384                                               const BYTE* const ip)
385 {
386     U32* const hashTable3 = ms->hashTable3;
387     U32 const hashLog3 = ms->hashLog3;
388     const BYTE* const base = ms->window.base;
389     U32 idx = *nextToUpdate3;
390     U32 const target = (U32)(ip - base);
391     size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
392     assert(hashLog3 > 0);
393 
394     while(idx < target) {
395         hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
396         idx++;
397     }
398 
399     *nextToUpdate3 = target;
400     return hashTable3[hash3];
401 }
402 
403 
404 /*-*************************************
405 *  Binary Tree search
406 ***************************************/
407 /* ZSTD_insertBt1() : add one or multiple positions to tree.
408  * @param ip assumed <= iend-8 .
409  * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
410  * @return : nb of positions added */
ZSTD_insertBt1(const ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,U32 const target,U32 const mls,const int extDict)411 static U32 ZSTD_insertBt1(
412                 const ZSTD_matchState_t* ms,
413                 const BYTE* const ip, const BYTE* const iend,
414                 U32 const target,
415                 U32 const mls, const int extDict)
416 {
417     const ZSTD_compressionParameters* const cParams = &ms->cParams;
418     U32*   const hashTable = ms->hashTable;
419     U32    const hashLog = cParams->hashLog;
420     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
421     U32*   const bt = ms->chainTable;
422     U32    const btLog  = cParams->chainLog - 1;
423     U32    const btMask = (1 << btLog) - 1;
424     U32 matchIndex = hashTable[h];
425     size_t commonLengthSmaller=0, commonLengthLarger=0;
426     const BYTE* const base = ms->window.base;
427     const BYTE* const dictBase = ms->window.dictBase;
428     const U32 dictLimit = ms->window.dictLimit;
429     const BYTE* const dictEnd = dictBase + dictLimit;
430     const BYTE* const prefixStart = base + dictLimit;
431     const BYTE* match;
432     const U32 curr = (U32)(ip-base);
433     const U32 btLow = btMask >= curr ? 0 : curr - btMask;
434     U32* smallerPtr = bt + 2*(curr&btMask);
435     U32* largerPtr  = smallerPtr + 1;
436     U32 dummy32;   /* to be nullified at the end */
437     /* windowLow is based on target because
438      * we only need positions that will be in the window at the end of the tree update.
439      */
440     U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
441     U32 matchEndIdx = curr+8+1;
442     size_t bestLength = 8;
443     U32 nbCompares = 1U << cParams->searchLog;
444 #ifdef ZSTD_C_PREDICT
445     U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
446     U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
447     predictedSmall += (predictedSmall>0);
448     predictedLarge += (predictedLarge>0);
449 #endif /* ZSTD_C_PREDICT */
450 
451     DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
452 
453     assert(curr <= target);
454     assert(ip <= iend-8);   /* required for h calculation */
455     hashTable[h] = curr;   /* Update Hash Table */
456 
457     assert(windowLow > 0);
458     for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
459         U32* const nextPtr = bt + 2*(matchIndex & btMask);
460         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
461         assert(matchIndex < curr);
462 
463 #ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
464         const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
465         if (matchIndex == predictedSmall) {
466             /* no need to check length, result known */
467             *smallerPtr = matchIndex;
468             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
469             smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
470             matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
471             predictedSmall = predictPtr[1] + (predictPtr[1]>0);
472             continue;
473         }
474         if (matchIndex == predictedLarge) {
475             *largerPtr = matchIndex;
476             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
477             largerPtr = nextPtr;
478             matchIndex = nextPtr[0];
479             predictedLarge = predictPtr[0] + (predictPtr[0]>0);
480             continue;
481         }
482 #endif
483 
484         if (!extDict || (matchIndex+matchLength >= dictLimit)) {
485             assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
486             match = base + matchIndex;
487             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
488         } else {
489             match = dictBase + matchIndex;
490             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
491             if (matchIndex+matchLength >= dictLimit)
492                 match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
493         }
494 
495         if (matchLength > bestLength) {
496             bestLength = matchLength;
497             if (matchLength > matchEndIdx - matchIndex)
498                 matchEndIdx = matchIndex + (U32)matchLength;
499         }
500 
501         if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
502             break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
503         }
504 
505         if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
506             /* match is smaller than current */
507             *smallerPtr = matchIndex;             /* update smaller idx */
508             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
509             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
510             smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
511             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
512         } else {
513             /* match is larger than current */
514             *largerPtr = matchIndex;
515             commonLengthLarger = matchLength;
516             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
517             largerPtr = nextPtr;
518             matchIndex = nextPtr[0];
519     }   }
520 
521     *smallerPtr = *largerPtr = 0;
522     {   U32 positions = 0;
523         if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
524         assert(matchEndIdx > curr + 8);
525         return MAX(positions, matchEndIdx - (curr + 8));
526     }
527 }
528 
529 FORCE_INLINE_TEMPLATE
ZSTD_updateTree_internal(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,const U32 mls,const ZSTD_dictMode_e dictMode)530 void ZSTD_updateTree_internal(
531                 ZSTD_matchState_t* ms,
532                 const BYTE* const ip, const BYTE* const iend,
533                 const U32 mls, const ZSTD_dictMode_e dictMode)
534 {
535     const BYTE* const base = ms->window.base;
536     U32 const target = (U32)(ip - base);
537     U32 idx = ms->nextToUpdate;
538     DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
539                 idx, target, dictMode);
540 
541     while(idx < target) {
542         U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
543         assert(idx < (U32)(idx + forward));
544         idx += forward;
545     }
546     assert((size_t)(ip - base) <= (size_t)(U32)(-1));
547     assert((size_t)(iend - base) <= (size_t)(U32)(-1));
548     ms->nextToUpdate = target;
549 }
550 
ZSTD_updateTree(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * iend)551 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
552     ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
553 }
554 
555 FORCE_INLINE_TEMPLATE
ZSTD_insertBtAndGetAllMatches(ZSTD_match_t * matches,ZSTD_matchState_t * ms,U32 * nextToUpdate3,const BYTE * const ip,const BYTE * const iLimit,const ZSTD_dictMode_e dictMode,const U32 rep[ZSTD_REP_NUM],U32 const ll0,const U32 lengthToBeat,U32 const mls)556 U32 ZSTD_insertBtAndGetAllMatches (
557                     ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
558                     ZSTD_matchState_t* ms,
559                     U32* nextToUpdate3,
560                     const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
561                     const U32 rep[ZSTD_REP_NUM],
562                     U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
563                     const U32 lengthToBeat,
564                     U32 const mls /* template */)
565 {
566     const ZSTD_compressionParameters* const cParams = &ms->cParams;
567     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
568     const BYTE* const base = ms->window.base;
569     U32 const curr = (U32)(ip-base);
570     U32 const hashLog = cParams->hashLog;
571     U32 const minMatch = (mls==3) ? 3 : 4;
572     U32* const hashTable = ms->hashTable;
573     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
574     U32 matchIndex  = hashTable[h];
575     U32* const bt   = ms->chainTable;
576     U32 const btLog = cParams->chainLog - 1;
577     U32 const btMask= (1U << btLog) - 1;
578     size_t commonLengthSmaller=0, commonLengthLarger=0;
579     const BYTE* const dictBase = ms->window.dictBase;
580     U32 const dictLimit = ms->window.dictLimit;
581     const BYTE* const dictEnd = dictBase + dictLimit;
582     const BYTE* const prefixStart = base + dictLimit;
583     U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
584     U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
585     U32 const matchLow = windowLow ? windowLow : 1;
586     U32* smallerPtr = bt + 2*(curr&btMask);
587     U32* largerPtr  = bt + 2*(curr&btMask) + 1;
588     U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
589     U32 dummy32;   /* to be nullified at the end */
590     U32 mnum = 0;
591     U32 nbCompares = 1U << cParams->searchLog;
592 
593     const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
594     const ZSTD_compressionParameters* const dmsCParams =
595                                       dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
596     const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
597     const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
598     U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
599     U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
600     U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
601     U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
602     U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
603     U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
604     U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
605 
606     size_t bestLength = lengthToBeat-1;
607     DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
608 
609     /* check repCode */
610     assert(ll0 <= 1);   /* necessarily 1 or 0 */
611     {   U32 const lastR = ZSTD_REP_NUM + ll0;
612         U32 repCode;
613         for (repCode = ll0; repCode < lastR; repCode++) {
614             U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
615             U32 const repIndex = curr - repOffset;
616             U32 repLen = 0;
617             assert(curr >= dictLimit);
618             if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
619                 /* We must validate the repcode offset because when we're using a dictionary the
620                  * valid offset range shrinks when the dictionary goes out of bounds.
621                  */
622                 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
623                     repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
624                 }
625             } else {  /* repIndex < dictLimit || repIndex >= curr */
626                 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
627                                              dmsBase + repIndex - dmsIndexDelta :
628                                              dictBase + repIndex;
629                 assert(curr >= windowLow);
630                 if ( dictMode == ZSTD_extDict
631                   && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
632                      & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
633                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
634                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
635                 }
636                 if (dictMode == ZSTD_dictMatchState
637                   && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
638                      & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
639                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
640                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
641             }   }
642             /* save longer solution */
643             if (repLen > bestLength) {
644                 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
645                             repCode, ll0, repOffset, repLen);
646                 bestLength = repLen;
647                 matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
648                 matches[mnum].len = (U32)repLen;
649                 mnum++;
650                 if ( (repLen > sufficient_len)
651                    | (ip+repLen == iLimit) ) {  /* best possible */
652                     return mnum;
653     }   }   }   }
654 
655     /* HC3 match finder */
656     if ((mls == 3) /*static*/ && (bestLength < mls)) {
657         U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
658         if ((matchIndex3 >= matchLow)
659           & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
660             size_t mlen;
661             if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
662                 const BYTE* const match = base + matchIndex3;
663                 mlen = ZSTD_count(ip, match, iLimit);
664             } else {
665                 const BYTE* const match = dictBase + matchIndex3;
666                 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
667             }
668 
669             /* save best solution */
670             if (mlen >= mls /* == 3 > bestLength */) {
671                 DEBUGLOG(8, "found small match with hlog3, of length %u",
672                             (U32)mlen);
673                 bestLength = mlen;
674                 assert(curr > matchIndex3);
675                 assert(mnum==0);  /* no prior solution */
676                 matches[0].off = STORE_OFFSET(curr - matchIndex3);
677                 matches[0].len = (U32)mlen;
678                 mnum = 1;
679                 if ( (mlen > sufficient_len) |
680                      (ip+mlen == iLimit) ) {  /* best possible length */
681                     ms->nextToUpdate = curr+1;  /* skip insertion */
682                     return 1;
683         }   }   }
684         /* no dictMatchState lookup: dicts don't have a populated HC3 table */
685     }  /* if (mls == 3) */
686 
687     hashTable[h] = curr;   /* Update Hash Table */
688 
689     for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
690         U32* const nextPtr = bt + 2*(matchIndex & btMask);
691         const BYTE* match;
692         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
693         assert(curr > matchIndex);
694 
695         if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
696             assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
697             match = base + matchIndex;
698             if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
699             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
700         } else {
701             match = dictBase + matchIndex;
702             assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
703             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
704             if (matchIndex+matchLength >= dictLimit)
705                 match = base + matchIndex;   /* prepare for match[matchLength] read */
706         }
707 
708         if (matchLength > bestLength) {
709             DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
710                     (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
711             assert(matchEndIdx > matchIndex);
712             if (matchLength > matchEndIdx - matchIndex)
713                 matchEndIdx = matchIndex + (U32)matchLength;
714             bestLength = matchLength;
715             matches[mnum].off = STORE_OFFSET(curr - matchIndex);
716             matches[mnum].len = (U32)matchLength;
717             mnum++;
718             if ( (matchLength > ZSTD_OPT_NUM)
719                | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
720                 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
721                 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
722         }   }
723 
724         if (match[matchLength] < ip[matchLength]) {
725             /* match smaller than current */
726             *smallerPtr = matchIndex;             /* update smaller idx */
727             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
728             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
729             smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
730             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
731         } else {
732             *largerPtr = matchIndex;
733             commonLengthLarger = matchLength;
734             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
735             largerPtr = nextPtr;
736             matchIndex = nextPtr[0];
737     }   }
738 
739     *smallerPtr = *largerPtr = 0;
740 
741     assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
742     if (dictMode == ZSTD_dictMatchState && nbCompares) {
743         size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
744         U32 dictMatchIndex = dms->hashTable[dmsH];
745         const U32* const dmsBt = dms->chainTable;
746         commonLengthSmaller = commonLengthLarger = 0;
747         for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
748             const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
749             size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
750             const BYTE* match = dmsBase + dictMatchIndex;
751             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
752             if (dictMatchIndex+matchLength >= dmsHighLimit)
753                 match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
754 
755             if (matchLength > bestLength) {
756                 matchIndex = dictMatchIndex + dmsIndexDelta;
757                 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
758                         (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
759                 if (matchLength > matchEndIdx - matchIndex)
760                     matchEndIdx = matchIndex + (U32)matchLength;
761                 bestLength = matchLength;
762                 matches[mnum].off = STORE_OFFSET(curr - matchIndex);
763                 matches[mnum].len = (U32)matchLength;
764                 mnum++;
765                 if ( (matchLength > ZSTD_OPT_NUM)
766                    | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
767                     break;   /* drop, to guarantee consistency (miss a little bit of compression) */
768             }   }
769 
770             if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
771             if (match[matchLength] < ip[matchLength]) {
772                 commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
773                 dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
774             } else {
775                 /* match is larger than current */
776                 commonLengthLarger = matchLength;
777                 dictMatchIndex = nextPtr[0];
778     }   }   }  /* if (dictMode == ZSTD_dictMatchState) */
779 
780     assert(matchEndIdx > curr+8);
781     ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
782     return mnum;
783 }
784 
785 typedef U32 (*ZSTD_getAllMatchesFn)(
786     ZSTD_match_t*,
787     ZSTD_matchState_t*,
788     U32*,
789     const BYTE*,
790     const BYTE*,
791     const U32 rep[ZSTD_REP_NUM],
792     U32 const ll0,
793     U32 const lengthToBeat);
794 
ZSTD_btGetAllMatches_internal(ZSTD_match_t * matches,ZSTD_matchState_t * ms,U32 * nextToUpdate3,const BYTE * ip,const BYTE * const iHighLimit,const U32 rep[ZSTD_REP_NUM],U32 const ll0,U32 const lengthToBeat,const ZSTD_dictMode_e dictMode,const U32 mls)795 FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
796         ZSTD_match_t* matches,
797         ZSTD_matchState_t* ms,
798         U32* nextToUpdate3,
799         const BYTE* ip,
800         const BYTE* const iHighLimit,
801         const U32 rep[ZSTD_REP_NUM],
802         U32 const ll0,
803         U32 const lengthToBeat,
804         const ZSTD_dictMode_e dictMode,
805         const U32 mls)
806 {
807     assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
808     DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
809     if (ip < ms->window.base + ms->nextToUpdate)
810         return 0;   /* skipped area */
811     ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
812     return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
813 }
814 
815 #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
816 
817 #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
818     static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
819             ZSTD_match_t* matches,                             \
820             ZSTD_matchState_t* ms,                             \
821             U32* nextToUpdate3,                                \
822             const BYTE* ip,                                    \
823             const BYTE* const iHighLimit,                      \
824             const U32 rep[ZSTD_REP_NUM],                       \
825             U32 const ll0,                                     \
826             U32 const lengthToBeat)                            \
827     {                                                          \
828         return ZSTD_btGetAllMatches_internal(                  \
829                 matches, ms, nextToUpdate3, ip, iHighLimit,    \
830                 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
831     }
832 
833 #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode)  \
834     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3)  \
835     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4)  \
836     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5)  \
837     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
838 
839 GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)840 GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
841 GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
842 
843 #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode)  \
844     {                                            \
845         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
846         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
847         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
848         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
849     }
850 
851 static ZSTD_getAllMatchesFn
852 ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
853 {
854     ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
855         ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
856         ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
857         ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
858     };
859     U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
860     assert((U32)dictMode < 3);
861     assert(mls - 3 < 4);
862     return getAllMatchesFns[(int)dictMode][mls - 3];
863 }
864 
865 /* ***********************
866 *  LDM helper functions  *
867 *************************/
868 
869 /* Struct containing info needed to make decision about ldm inclusion */
870 typedef struct {
871     rawSeqStore_t seqStore;   /* External match candidates store for this block */
872     U32 startPosInBlock;      /* Start position of the current match candidate */
873     U32 endPosInBlock;        /* End position of the current match candidate */
874     U32 offset;               /* Offset of the match candidate */
875 } ZSTD_optLdm_t;
876 
877 /* ZSTD_optLdm_skipRawSeqStoreBytes():
878  * Moves forward in @rawSeqStore by @nbBytes,
879  * which will update the fields 'pos' and 'posInSequence'.
880  */
ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t * rawSeqStore,size_t nbBytes)881 static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
882 {
883     U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
884     while (currPos && rawSeqStore->pos < rawSeqStore->size) {
885         rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
886         if (currPos >= currSeq.litLength + currSeq.matchLength) {
887             currPos -= currSeq.litLength + currSeq.matchLength;
888             rawSeqStore->pos++;
889         } else {
890             rawSeqStore->posInSequence = currPos;
891             break;
892         }
893     }
894     if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
895         rawSeqStore->posInSequence = 0;
896     }
897 }
898 
899 /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
900  * Calculates the beginning and end of the next match in the current block.
901  * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
902  */
903 static void
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t * optLdm,U32 currPosInBlock,U32 blockBytesRemaining)904 ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
905                                        U32 blockBytesRemaining)
906 {
907     rawSeq currSeq;
908     U32 currBlockEndPos;
909     U32 literalsBytesRemaining;
910     U32 matchBytesRemaining;
911 
912     /* Setting match end position to MAX to ensure we never use an LDM during this block */
913     if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
914         optLdm->startPosInBlock = UINT_MAX;
915         optLdm->endPosInBlock = UINT_MAX;
916         return;
917     }
918     /* Calculate appropriate bytes left in matchLength and litLength
919      * after adjusting based on ldmSeqStore->posInSequence */
920     currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
921     assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
922     currBlockEndPos = currPosInBlock + blockBytesRemaining;
923     literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
924             currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
925             0;
926     matchBytesRemaining = (literalsBytesRemaining == 0) ?
927             currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
928             currSeq.matchLength;
929 
930     /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
931     if (literalsBytesRemaining >= blockBytesRemaining) {
932         optLdm->startPosInBlock = UINT_MAX;
933         optLdm->endPosInBlock = UINT_MAX;
934         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
935         return;
936     }
937 
938     /* Matches may be < MINMATCH by this process. In that case, we will reject them
939        when we are deciding whether or not to add the ldm */
940     optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
941     optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
942     optLdm->offset = currSeq.offset;
943 
944     if (optLdm->endPosInBlock > currBlockEndPos) {
945         /* Match ends after the block ends, we can't use the whole match */
946         optLdm->endPosInBlock = currBlockEndPos;
947         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
948     } else {
949         /* Consume nb of bytes equal to size of sequence left */
950         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
951     }
952 }
953 
954 /* ZSTD_optLdm_maybeAddMatch():
955  * Adds a match if it's long enough,
956  * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
957  * into 'matches'. Maintains the correct ordering of 'matches'.
958  */
ZSTD_optLdm_maybeAddMatch(ZSTD_match_t * matches,U32 * nbMatches,const ZSTD_optLdm_t * optLdm,U32 currPosInBlock)959 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
960                                       const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
961 {
962     U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
963     /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
964     U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
965 
966     /* Ensure that current block position is not outside of the match */
967     if (currPosInBlock < optLdm->startPosInBlock
968       || currPosInBlock >= optLdm->endPosInBlock
969       || candidateMatchLength < MINMATCH) {
970         return;
971     }
972 
973     if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
974         U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
975         DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
976                  candidateOffCode, candidateMatchLength, currPosInBlock);
977         matches[*nbMatches].len = candidateMatchLength;
978         matches[*nbMatches].off = candidateOffCode;
979         (*nbMatches)++;
980     }
981 }
982 
983 /* ZSTD_optLdm_processMatchCandidate():
984  * Wrapper function to update ldm seq store and call ldm functions as necessary.
985  */
986 static void
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t * optLdm,ZSTD_match_t * matches,U32 * nbMatches,U32 currPosInBlock,U32 remainingBytes)987 ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
988                                   ZSTD_match_t* matches, U32* nbMatches,
989                                   U32 currPosInBlock, U32 remainingBytes)
990 {
991     if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
992         return;
993     }
994 
995     if (currPosInBlock >= optLdm->endPosInBlock) {
996         if (currPosInBlock > optLdm->endPosInBlock) {
997             /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
998              * at the end of a match from the ldm seq store, and will often be some bytes
999              * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1000              */
1001             U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1002             ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1003         }
1004         ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1005     }
1006     ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
1007 }
1008 
1009 
1010 /*-*******************************
1011 *  Optimal parser
1012 *********************************/
1013 
ZSTD_totalLen(ZSTD_optimal_t sol)1014 static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
1015 {
1016     return sol.litlen + sol.mlen;
1017 }
1018 
1019 #if 0 /* debug */
1020 
1021 static void
1022 listStats(const U32* table, int lastEltID)
1023 {
1024     int const nbElts = lastEltID + 1;
1025     int enb;
1026     for (enb=0; enb < nbElts; enb++) {
1027         (void)table;
1028         /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
1029         RAWLOG(2, "%4i,", table[enb]);
1030     }
1031     RAWLOG(2, " \n");
1032 }
1033 
1034 #endif
1035 
1036 FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const int optLevel,const ZSTD_dictMode_e dictMode)1037 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1038                                seqStore_t* seqStore,
1039                                U32 rep[ZSTD_REP_NUM],
1040                          const void* src, size_t srcSize,
1041                          const int optLevel,
1042                          const ZSTD_dictMode_e dictMode)
1043 {
1044     optState_t* const optStatePtr = &ms->opt;
1045     const BYTE* const istart = (const BYTE*)src;
1046     const BYTE* ip = istart;
1047     const BYTE* anchor = istart;
1048     const BYTE* const iend = istart + srcSize;
1049     const BYTE* const ilimit = iend - 8;
1050     const BYTE* const base = ms->window.base;
1051     const BYTE* const prefixStart = base + ms->window.dictLimit;
1052     const ZSTD_compressionParameters* const cParams = &ms->cParams;
1053 
1054     ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1055 
1056     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1057     U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1058     U32 nextToUpdate3 = ms->nextToUpdate;
1059 
1060     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1061     ZSTD_match_t* const matches = optStatePtr->matchTable;
1062     ZSTD_optimal_t lastSequence;
1063     ZSTD_optLdm_t optLdm;
1064 
1065     optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1066     optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1067     ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1068 
1069     /* init */
1070     DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1071                 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1072     assert(optLevel <= 2);
1073     ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1074     ip += (ip==prefixStart);
1075 
1076     /* Match Loop */
1077     while (ip < ilimit) {
1078         U32 cur, last_pos = 0;
1079 
1080         /* find first match */
1081         {   U32 const litlen = (U32)(ip - anchor);
1082             U32 const ll0 = !litlen;
1083             U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1084             ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1085                                               (U32)(ip-istart), (U32)(iend - ip));
1086             if (!nbMatches) { ip++; continue; }
1087 
1088             /* initialize opt[0] */
1089             { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
1090             opt[0].mlen = 0;  /* means is_a_literal */
1091             opt[0].litlen = litlen;
1092             /* We don't need to include the actual price of the literals because
1093              * it is static for the duration of the forward pass, and is included
1094              * in every price. We include the literal length to avoid negative
1095              * prices when we subtract the previous literal length.
1096              */
1097             opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1098 
1099             /* large match -> immediate encoding */
1100             {   U32 const maxML = matches[nbMatches-1].len;
1101                 U32 const maxOffcode = matches[nbMatches-1].off;
1102                 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
1103                             nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
1104 
1105                 if (maxML > sufficient_len) {
1106                     lastSequence.litlen = litlen;
1107                     lastSequence.mlen = maxML;
1108                     lastSequence.off = maxOffcode;
1109                     DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1110                                 maxML, sufficient_len);
1111                     cur = 0;
1112                     last_pos = ZSTD_totalLen(lastSequence);
1113                     goto _shortestPath;
1114             }   }
1115 
1116             /* set prices for first matches starting position == 0 */
1117             assert(opt[0].price >= 0);
1118             {   U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1119                 U32 pos;
1120                 U32 matchNb;
1121                 for (pos = 1; pos < minMatch; pos++) {
1122                     opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
1123                 }
1124                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1125                     U32 const offcode = matches[matchNb].off;
1126                     U32 const end = matches[matchNb].len;
1127                     for ( ; pos <= end ; pos++ ) {
1128                         U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
1129                         U32 const sequencePrice = literalsPrice + matchPrice;
1130                         DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1131                                     pos, ZSTD_fCost(sequencePrice));
1132                         opt[pos].mlen = pos;
1133                         opt[pos].off = offcode;
1134                         opt[pos].litlen = litlen;
1135                         opt[pos].price = (int)sequencePrice;
1136                 }   }
1137                 last_pos = pos-1;
1138             }
1139         }
1140 
1141         /* check further positions */
1142         for (cur = 1; cur <= last_pos; cur++) {
1143             const BYTE* const inr = ip + cur;
1144             assert(cur < ZSTD_OPT_NUM);
1145             DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
1146 
1147             /* Fix current position with one literal if cheaper */
1148             {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1149                 int const price = opt[cur-1].price
1150                                 + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1151                                 + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1152                                 - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1153                 assert(price < 1000000000); /* overflow check */
1154                 if (price <= opt[cur].price) {
1155                     DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1156                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1157                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1158                     opt[cur].mlen = 0;
1159                     opt[cur].off = 0;
1160                     opt[cur].litlen = litlen;
1161                     opt[cur].price = price;
1162                 } else {
1163                     DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
1164                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
1165                                 opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
1166                 }
1167             }
1168 
1169             /* Set the repcodes of the current position. We must do it here
1170              * because we rely on the repcodes of the 2nd to last sequence being
1171              * correct to set the next chunks repcodes during the backward
1172              * traversal.
1173              */
1174             ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1175             assert(cur >= opt[cur].mlen);
1176             if (opt[cur].mlen != 0) {
1177                 U32 const prev = cur - opt[cur].mlen;
1178                 repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1179                 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1180             } else {
1181                 ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1182             }
1183 
1184             /* last match must start at a minimum distance of 8 from oend */
1185             if (inr > ilimit) continue;
1186 
1187             if (cur == last_pos) break;
1188 
1189             if ( (optLevel==0) /*static_test*/
1190               && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1191                 DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
1192                 continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1193             }
1194 
1195             assert(opt[cur].price >= 0);
1196             {   U32 const ll0 = (opt[cur].mlen != 0);
1197                 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1198                 U32 const previousPrice = (U32)opt[cur].price;
1199                 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1200                 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1201                 U32 matchNb;
1202 
1203                 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1204                                                   (U32)(inr-istart), (U32)(iend-inr));
1205 
1206                 if (!nbMatches) {
1207                     DEBUGLOG(7, "rPos:%u : no match found", cur);
1208                     continue;
1209                 }
1210 
1211                 {   U32 const maxML = matches[nbMatches-1].len;
1212                     DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
1213                                 inr-istart, cur, nbMatches, maxML);
1214 
1215                     if ( (maxML > sufficient_len)
1216                       || (cur + maxML >= ZSTD_OPT_NUM) ) {
1217                         lastSequence.mlen = maxML;
1218                         lastSequence.off = matches[nbMatches-1].off;
1219                         lastSequence.litlen = litlen;
1220                         cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
1221                         last_pos = cur + ZSTD_totalLen(lastSequence);
1222                         if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
1223                         goto _shortestPath;
1224                 }   }
1225 
1226                 /* set prices using matches found at position == cur */
1227                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1228                     U32 const offset = matches[matchNb].off;
1229                     U32 const lastML = matches[matchNb].len;
1230                     U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1231                     U32 mlen;
1232 
1233                     DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
1234                                 matchNb, matches[matchNb].off, lastML, litlen);
1235 
1236                     for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
1237                         U32 const pos = cur + mlen;
1238                         int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1239 
1240                         if ((pos > last_pos) || (price < opt[pos].price)) {
1241                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1242                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1243                             while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
1244                             opt[pos].mlen = mlen;
1245                             opt[pos].off = offset;
1246                             opt[pos].litlen = litlen;
1247                             opt[pos].price = price;
1248                         } else {
1249                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1250                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1251                             if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1252                         }
1253             }   }   }
1254         }  /* for (cur = 1; cur <= last_pos; cur++) */
1255 
1256         lastSequence = opt[last_pos];
1257         cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
1258         assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
1259 
1260 _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
1261         assert(opt[0].mlen == 0);
1262 
1263         /* Set the next chunk's repcodes based on the repcodes of the beginning
1264          * of the last match, and the last sequence. This avoids us having to
1265          * update them while traversing the sequences.
1266          */
1267         if (lastSequence.mlen != 0) {
1268             repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1269             ZSTD_memcpy(rep, &reps, sizeof(reps));
1270         } else {
1271             ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1272         }
1273 
1274         {   U32 const storeEnd = cur + 1;
1275             U32 storeStart = storeEnd;
1276             U32 seqPos = cur;
1277 
1278             DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1279                         last_pos, cur); (void)last_pos;
1280             assert(storeEnd < ZSTD_OPT_NUM);
1281             DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1282                         storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
1283             opt[storeEnd] = lastSequence;
1284             while (seqPos > 0) {
1285                 U32 const backDist = ZSTD_totalLen(opt[seqPos]);
1286                 storeStart--;
1287                 DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1288                             seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
1289                 opt[storeStart] = opt[seqPos];
1290                 seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
1291             }
1292 
1293             /* save sequences */
1294             DEBUGLOG(6, "sending selected sequences into seqStore")
1295             {   U32 storePos;
1296                 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1297                     U32 const llen = opt[storePos].litlen;
1298                     U32 const mlen = opt[storePos].mlen;
1299                     U32 const offCode = opt[storePos].off;
1300                     U32 const advance = llen + mlen;
1301                     DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1302                                 anchor - istart, (unsigned)llen, (unsigned)mlen);
1303 
1304                     if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
1305                         assert(storePos == storeEnd);   /* must be last sequence */
1306                         ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
1307                         continue;   /* will finish */
1308                     }
1309 
1310                     assert(anchor + llen <= iend);
1311                     ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1312                     ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
1313                     anchor += advance;
1314                     ip = anchor;
1315             }   }
1316             ZSTD_setBasePrices(optStatePtr, optLevel);
1317         }
1318     }   /* while (ip < ilimit) */
1319 
1320     /* Return the last literals size */
1321     return (size_t)(iend - anchor);
1322 }
1323 
ZSTD_compressBlock_opt0(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const ZSTD_dictMode_e dictMode)1324 static size_t ZSTD_compressBlock_opt0(
1325         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1326         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1327 {
1328     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1329 }
1330 
ZSTD_compressBlock_opt2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const ZSTD_dictMode_e dictMode)1331 static size_t ZSTD_compressBlock_opt2(
1332         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1333         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1334 {
1335     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1336 }
1337 
ZSTD_compressBlock_btopt(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1338 size_t ZSTD_compressBlock_btopt(
1339         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1340         const void* src, size_t srcSize)
1341 {
1342     DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1343     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1344 }
1345 
1346 
1347 
1348 
1349 /* ZSTD_initStats_ultra():
1350  * make a first compression pass, just to seed stats with more accurate starting values.
1351  * only works on first block, with no dictionary and no ldm.
1352  * this function cannot error, hence its contract must be respected.
1353  */
1354 static void
ZSTD_initStats_ultra(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1355 ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1356                      seqStore_t* seqStore,
1357                      U32 rep[ZSTD_REP_NUM],
1358                const void* src, size_t srcSize)
1359 {
1360     U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
1361     ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1362 
1363     DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1364     assert(ms->opt.litLengthSum == 0);    /* first block */
1365     assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
1366     assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
1367     assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
1368 
1369     ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
1370 
1371     /* invalidate first scan from history */
1372     ZSTD_resetSeqStore(seqStore);
1373     ms->window.base -= srcSize;
1374     ms->window.dictLimit += (U32)srcSize;
1375     ms->window.lowLimit = ms->window.dictLimit;
1376     ms->nextToUpdate = ms->window.dictLimit;
1377 
1378 }
1379 
ZSTD_compressBlock_btultra(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1380 size_t ZSTD_compressBlock_btultra(
1381         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1382         const void* src, size_t srcSize)
1383 {
1384     DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1385     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1386 }
1387 
ZSTD_compressBlock_btultra2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1388 size_t ZSTD_compressBlock_btultra2(
1389         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1390         const void* src, size_t srcSize)
1391 {
1392     U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1393     DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1394 
1395     /* 2-pass strategy:
1396      * this strategy makes a first pass over first block to collect statistics
1397      * and seed next round's statistics with it.
1398      * After 1st pass, function forgets everything, and starts a new block.
1399      * Consequently, this can only work if no data has been previously loaded in tables,
1400      * aka, no dictionary, no prefix, no ldm preprocessing.
1401      * The compression ratio gain is generally small (~0.5% on first block),
1402      * the cost is 2x cpu time on first block. */
1403     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1404     if ( (ms->opt.litLengthSum==0)   /* first block */
1405       && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
1406       && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
1407       && (curr == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
1408       && (srcSize > ZSTD_PREDEF_THRESHOLD)
1409       ) {
1410         ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1411     }
1412 
1413     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1414 }
1415 
ZSTD_compressBlock_btopt_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1416 size_t ZSTD_compressBlock_btopt_dictMatchState(
1417         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1418         const void* src, size_t srcSize)
1419 {
1420     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1421 }
1422 
ZSTD_compressBlock_btultra_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1423 size_t ZSTD_compressBlock_btultra_dictMatchState(
1424         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1425         const void* src, size_t srcSize)
1426 {
1427     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1428 }
1429 
ZSTD_compressBlock_btopt_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1430 size_t ZSTD_compressBlock_btopt_extDict(
1431         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1432         const void* src, size_t srcSize)
1433 {
1434     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1435 }
1436 
ZSTD_compressBlock_btultra_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1437 size_t ZSTD_compressBlock_btultra_extDict(
1438         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1439         const void* src, size_t srcSize)
1440 {
1441     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1442 }
1443 
1444 /* note : no btultra2 variant for extDict nor dictMatchState,
1445  * because btultra2 is not meant to work with dictionaries
1446  * and is only specific for the first block (no prefix) */
1447