xref: /linux/lib/zstd/compress/zstd_opt.c (revision e61f33273ca755b3e2ebee4520a76097199dc7a8)
1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
2 /*
3  * Copyright (c) Meta Platforms, Inc. and affiliates.
4  * All rights reserved.
5  *
6  * This source code is licensed under both the BSD-style license (found in the
7  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8  * in the COPYING file in the root directory of this source tree).
9  * You may select, at your option, one of the above-listed licenses.
10  */
11 
12 #include "zstd_compress_internal.h"
13 #include "hist.h"
14 #include "zstd_opt.h"
15 
16 #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
17  || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
18  || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
19 
20 #define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
21 #define ZSTD_MAX_PRICE     (1<<30)
22 
23 #define ZSTD_PREDEF_THRESHOLD 8   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
24 
25 
26 /*-*************************************
27 *  Price functions for optimal parser
28 ***************************************/
29 
30 #if 0    /* approximation at bit level (for tests) */
31 #  define BITCOST_ACCURACY 0
32 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
33 #  define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
34 #elif 0  /* fractional bit accuracy (for tests) */
35 #  define BITCOST_ACCURACY 8
36 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
37 #  define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
38 #else    /* opt==approx, ultra==accurate */
39 #  define BITCOST_ACCURACY 8
40 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
41 #  define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
42 #endif
43 
44 /* ZSTD_bitWeight() :
45  * provide estimated "cost" of a stat in full bits only */
ZSTD_bitWeight(U32 stat)46 MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
47 {
48     return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
49 }
50 
51 /* ZSTD_fracWeight() :
52  * provide fractional-bit "cost" of a stat,
53  * using linear interpolation approximation */
ZSTD_fracWeight(U32 rawStat)54 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
55 {
56     U32 const stat = rawStat + 1;
57     U32 const hb = ZSTD_highbit32(stat);
58     U32 const BWeight = hb * BITCOST_MULTIPLIER;
59     /* Fweight was meant for "Fractional weight"
60      * but it's effectively a value between 1 and 2
61      * using fixed point arithmetic */
62     U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
63     U32 const weight = BWeight + FWeight;
64     assert(hb + BITCOST_ACCURACY < 31);
65     return weight;
66 }
67 
68 #if (DEBUGLEVEL>=2)
69 /* debugging function,
70  * @return price in bytes as fractional value
71  * for debug messages only */
ZSTD_fCost(int price)72 MEM_STATIC double ZSTD_fCost(int price)
73 {
74     return (double)price / (BITCOST_MULTIPLIER*8);
75 }
76 #endif
77 
ZSTD_compressedLiterals(optState_t const * const optPtr)78 static int ZSTD_compressedLiterals(optState_t const* const optPtr)
79 {
80     return optPtr->literalCompressionMode != ZSTD_ps_disable;
81 }
82 
ZSTD_setBasePrices(optState_t * optPtr,int optLevel)83 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
84 {
85     if (ZSTD_compressedLiterals(optPtr))
86         optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
87     optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
88     optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
89     optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
90 }
91 
92 
sum_u32(const unsigned table[],size_t nbElts)93 static U32 sum_u32(const unsigned table[], size_t nbElts)
94 {
95     size_t n;
96     U32 total = 0;
97     for (n=0; n<nbElts; n++) {
98         total += table[n];
99     }
100     return total;
101 }
102 
103 typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
104 
105 static U32
ZSTD_downscaleStats(unsigned * table,U32 lastEltIndex,U32 shift,base_directive_e base1)106 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
107 {
108     U32 s, sum=0;
109     DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
110             (unsigned)lastEltIndex+1, (unsigned)shift );
111     assert(shift < 30);
112     for (s=0; s<lastEltIndex+1; s++) {
113         unsigned const base = base1 ? 1 : (table[s]>0);
114         unsigned const newStat = base + (table[s] >> shift);
115         sum += newStat;
116         table[s] = newStat;
117     }
118     return sum;
119 }
120 
121 /* ZSTD_scaleStats() :
122  * reduce all elt frequencies in table if sum too large
123  * return the resulting sum of elements */
ZSTD_scaleStats(unsigned * table,U32 lastEltIndex,U32 logTarget)124 static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
125 {
126     U32 const prevsum = sum_u32(table, lastEltIndex+1);
127     U32 const factor = prevsum >> logTarget;
128     DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
129     assert(logTarget < 30);
130     if (factor <= 1) return prevsum;
131     return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
132 }
133 
134 /* ZSTD_rescaleFreqs() :
135  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
136  *    take hints from dictionary if there is one
137  *    and init from zero if there is none,
138  *    using src for literals stats, and baseline stats for sequence symbols
139  * otherwise downscale existing stats, to be used as seed for next block.
140  */
141 static void
ZSTD_rescaleFreqs(optState_t * const optPtr,const BYTE * const src,size_t const srcSize,int const optLevel)142 ZSTD_rescaleFreqs(optState_t* const optPtr,
143             const BYTE* const src, size_t const srcSize,
144                   int const optLevel)
145 {
146     int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
147     DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
148     optPtr->priceType = zop_dynamic;
149 
150     if (optPtr->litLengthSum == 0) {  /* no literals stats collected -> first block assumed -> init */
151 
152         /* heuristic: use pre-defined stats for too small inputs */
153         if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
154             DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
155             optPtr->priceType = zop_predef;
156         }
157 
158         assert(optPtr->symbolCosts != NULL);
159         if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
160 
161             /* huffman stats covering the full value set : table presumed generated by dictionary */
162             optPtr->priceType = zop_dynamic;
163 
164             if (compressedLiterals) {
165                 /* generate literals statistics from huffman table */
166                 unsigned lit;
167                 assert(optPtr->litFreq != NULL);
168                 optPtr->litSum = 0;
169                 for (lit=0; lit<=MaxLit; lit++) {
170                     U32 const scaleLog = 11;   /* scale to 2K */
171                     U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
172                     assert(bitCost <= scaleLog);
173                     optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
174                     optPtr->litSum += optPtr->litFreq[lit];
175             }   }
176 
177             {   unsigned ll;
178                 FSE_CState_t llstate;
179                 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
180                 optPtr->litLengthSum = 0;
181                 for (ll=0; ll<=MaxLL; ll++) {
182                     U32 const scaleLog = 10;   /* scale to 1K */
183                     U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
184                     assert(bitCost < scaleLog);
185                     optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
186                     optPtr->litLengthSum += optPtr->litLengthFreq[ll];
187             }   }
188 
189             {   unsigned ml;
190                 FSE_CState_t mlstate;
191                 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
192                 optPtr->matchLengthSum = 0;
193                 for (ml=0; ml<=MaxML; ml++) {
194                     U32 const scaleLog = 10;
195                     U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
196                     assert(bitCost < scaleLog);
197                     optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
198                     optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
199             }   }
200 
201             {   unsigned of;
202                 FSE_CState_t ofstate;
203                 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
204                 optPtr->offCodeSum = 0;
205                 for (of=0; of<=MaxOff; of++) {
206                     U32 const scaleLog = 10;
207                     U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
208                     assert(bitCost < scaleLog);
209                     optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
210                     optPtr->offCodeSum += optPtr->offCodeFreq[of];
211             }   }
212 
213         } else {  /* first block, no dictionary */
214 
215             assert(optPtr->litFreq != NULL);
216             if (compressedLiterals) {
217                 /* base initial cost of literals on direct frequency within src */
218                 unsigned lit = MaxLit;
219                 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
220                 optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
221             }
222 
223             {   unsigned const baseLLfreqs[MaxLL+1] = {
224                     4, 2, 1, 1, 1, 1, 1, 1,
225                     1, 1, 1, 1, 1, 1, 1, 1,
226                     1, 1, 1, 1, 1, 1, 1, 1,
227                     1, 1, 1, 1, 1, 1, 1, 1,
228                     1, 1, 1, 1
229                 };
230                 ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
231                 optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
232             }
233 
234             {   unsigned ml;
235                 for (ml=0; ml<=MaxML; ml++)
236                     optPtr->matchLengthFreq[ml] = 1;
237             }
238             optPtr->matchLengthSum = MaxML+1;
239 
240             {   unsigned const baseOFCfreqs[MaxOff+1] = {
241                     6, 2, 1, 1, 2, 3, 4, 4,
242                     4, 3, 2, 1, 1, 1, 1, 1,
243                     1, 1, 1, 1, 1, 1, 1, 1,
244                     1, 1, 1, 1, 1, 1, 1, 1
245                 };
246                 ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
247                 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
248             }
249 
250         }
251 
252     } else {   /* new block : scale down accumulated statistics */
253 
254         if (compressedLiterals)
255             optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
256         optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
257         optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
258         optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
259     }
260 
261     ZSTD_setBasePrices(optPtr, optLevel);
262 }
263 
264 /* ZSTD_rawLiteralsCost() :
265  * price of literals (only) in specified segment (which length can be 0).
266  * does not include price of literalLength symbol */
ZSTD_rawLiteralsCost(const BYTE * const literals,U32 const litLength,const optState_t * const optPtr,int optLevel)267 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
268                                 const optState_t* const optPtr,
269                                 int optLevel)
270 {
271     DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
272     if (litLength == 0) return 0;
273 
274     if (!ZSTD_compressedLiterals(optPtr))
275         return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
276 
277     if (optPtr->priceType == zop_predef)
278         return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
279 
280     /* dynamic statistics */
281     {   U32 price = optPtr->litSumBasePrice * litLength;
282         U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
283         U32 u;
284         assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
285         for (u=0; u < litLength; u++) {
286             U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
287             if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
288             price -= litPrice;
289         }
290         return price;
291     }
292 }
293 
294 /* ZSTD_litLengthPrice() :
295  * cost of literalLength symbol */
ZSTD_litLengthPrice(U32 const litLength,const optState_t * const optPtr,int optLevel)296 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
297 {
298     assert(litLength <= ZSTD_BLOCKSIZE_MAX);
299     if (optPtr->priceType == zop_predef)
300         return WEIGHT(litLength, optLevel);
301 
302     /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
303      * because it isn't representable in the zstd format.
304      * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
305      * In such a case, the block would be all literals.
306      */
307     if (litLength == ZSTD_BLOCKSIZE_MAX)
308         return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
309 
310     /* dynamic statistics */
311     {   U32 const llCode = ZSTD_LLcode(litLength);
312         return (LL_bits[llCode] * BITCOST_MULTIPLIER)
313              + optPtr->litLengthSumBasePrice
314              - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
315     }
316 }
317 
318 /* ZSTD_getMatchPrice() :
319  * Provides the cost of the match part (offset + matchLength) of a sequence.
320  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
321  * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
322  * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
323  */
324 FORCE_INLINE_TEMPLATE U32
ZSTD_getMatchPrice(U32 const offBase,U32 const matchLength,const optState_t * const optPtr,int const optLevel)325 ZSTD_getMatchPrice(U32 const offBase,
326                    U32 const matchLength,
327              const optState_t* const optPtr,
328                    int const optLevel)
329 {
330     U32 price;
331     U32 const offCode = ZSTD_highbit32(offBase);
332     U32 const mlBase = matchLength - MINMATCH;
333     assert(matchLength >= MINMATCH);
334 
335     if (optPtr->priceType == zop_predef)  /* fixed scheme, does not use statistics */
336         return WEIGHT(mlBase, optLevel)
337              + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
338 
339     /* dynamic statistics */
340     price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
341     if ((optLevel<2) /*static*/ && offCode >= 20)
342         price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
343 
344     /* match Length */
345     {   U32 const mlCode = ZSTD_MLcode(mlBase);
346         price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
347     }
348 
349     price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
350 
351     DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
352     return price;
353 }
354 
355 /* ZSTD_updateStats() :
356  * assumption : literals + litLength <= iend */
ZSTD_updateStats(optState_t * const optPtr,U32 litLength,const BYTE * literals,U32 offBase,U32 matchLength)357 static void ZSTD_updateStats(optState_t* const optPtr,
358                              U32 litLength, const BYTE* literals,
359                              U32 offBase, U32 matchLength)
360 {
361     /* literals */
362     if (ZSTD_compressedLiterals(optPtr)) {
363         U32 u;
364         for (u=0; u < litLength; u++)
365             optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
366         optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
367     }
368 
369     /* literal Length */
370     {   U32 const llCode = ZSTD_LLcode(litLength);
371         optPtr->litLengthFreq[llCode]++;
372         optPtr->litLengthSum++;
373     }
374 
375     /* offset code : follows storeSeq() numeric representation */
376     {   U32 const offCode = ZSTD_highbit32(offBase);
377         assert(offCode <= MaxOff);
378         optPtr->offCodeFreq[offCode]++;
379         optPtr->offCodeSum++;
380     }
381 
382     /* match Length */
383     {   U32 const mlBase = matchLength - MINMATCH;
384         U32 const mlCode = ZSTD_MLcode(mlBase);
385         optPtr->matchLengthFreq[mlCode]++;
386         optPtr->matchLengthSum++;
387     }
388 }
389 
390 
391 /* ZSTD_readMINMATCH() :
392  * function safe only for comparisons
393  * assumption : memPtr must be at least 4 bytes before end of buffer */
ZSTD_readMINMATCH(const void * memPtr,U32 length)394 MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
395 {
396     switch (length)
397     {
398     default :
399     case 4 : return MEM_read32(memPtr);
400     case 3 : if (MEM_isLittleEndian())
401                 return MEM_read32(memPtr)<<8;
402              else
403                 return MEM_read32(memPtr)>>8;
404     }
405 }
406 
407 
408 /* Update hashTable3 up to ip (excluded)
409    Assumption : always within prefix (i.e. not within extDict) */
410 static
411 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_insertAndFindFirstIndexHash3(const ZSTD_MatchState_t * ms,U32 * nextToUpdate3,const BYTE * const ip)412 U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_MatchState_t* ms,
413                                        U32* nextToUpdate3,
414                                        const BYTE* const ip)
415 {
416     U32* const hashTable3 = ms->hashTable3;
417     U32 const hashLog3 = ms->hashLog3;
418     const BYTE* const base = ms->window.base;
419     U32 idx = *nextToUpdate3;
420     U32 const target = (U32)(ip - base);
421     size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
422     assert(hashLog3 > 0);
423 
424     while(idx < target) {
425         hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
426         idx++;
427     }
428 
429     *nextToUpdate3 = target;
430     return hashTable3[hash3];
431 }
432 
433 
434 /*-*************************************
435 *  Binary Tree search
436 ***************************************/
437 /* ZSTD_insertBt1() : add one or multiple positions to tree.
438  * @param ip assumed <= iend-8 .
439  * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
440  * @return : nb of positions added */
441 static
442 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_insertBt1(const ZSTD_MatchState_t * ms,const BYTE * const ip,const BYTE * const iend,U32 const target,U32 const mls,const int extDict)443 U32 ZSTD_insertBt1(
444                 const ZSTD_MatchState_t* ms,
445                 const BYTE* const ip, const BYTE* const iend,
446                 U32 const target,
447                 U32 const mls, const int extDict)
448 {
449     const ZSTD_compressionParameters* const cParams = &ms->cParams;
450     U32*   const hashTable = ms->hashTable;
451     U32    const hashLog = cParams->hashLog;
452     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
453     U32*   const bt = ms->chainTable;
454     U32    const btLog  = cParams->chainLog - 1;
455     U32    const btMask = (1 << btLog) - 1;
456     U32 matchIndex = hashTable[h];
457     size_t commonLengthSmaller=0, commonLengthLarger=0;
458     const BYTE* const base = ms->window.base;
459     const BYTE* const dictBase = ms->window.dictBase;
460     const U32 dictLimit = ms->window.dictLimit;
461     const BYTE* const dictEnd = dictBase + dictLimit;
462     const BYTE* const prefixStart = base + dictLimit;
463     const BYTE* match;
464     const U32 curr = (U32)(ip-base);
465     const U32 btLow = btMask >= curr ? 0 : curr - btMask;
466     U32* smallerPtr = bt + 2*(curr&btMask);
467     U32* largerPtr  = smallerPtr + 1;
468     U32 dummy32;   /* to be nullified at the end */
469     /* windowLow is based on target because
470      * we only need positions that will be in the window at the end of the tree update.
471      */
472     U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
473     U32 matchEndIdx = curr+8+1;
474     size_t bestLength = 8;
475     U32 nbCompares = 1U << cParams->searchLog;
476 #ifdef ZSTD_C_PREDICT
477     U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
478     U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
479     predictedSmall += (predictedSmall>0);
480     predictedLarge += (predictedLarge>0);
481 #endif /* ZSTD_C_PREDICT */
482 
483     DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
484 
485     assert(curr <= target);
486     assert(ip <= iend-8);   /* required for h calculation */
487     hashTable[h] = curr;   /* Update Hash Table */
488 
489     assert(windowLow > 0);
490     for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
491         U32* const nextPtr = bt + 2*(matchIndex & btMask);
492         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
493         assert(matchIndex < curr);
494 
495 #ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
496         const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
497         if (matchIndex == predictedSmall) {
498             /* no need to check length, result known */
499             *smallerPtr = matchIndex;
500             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
501             smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
502             matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
503             predictedSmall = predictPtr[1] + (predictPtr[1]>0);
504             continue;
505         }
506         if (matchIndex == predictedLarge) {
507             *largerPtr = matchIndex;
508             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
509             largerPtr = nextPtr;
510             matchIndex = nextPtr[0];
511             predictedLarge = predictPtr[0] + (predictPtr[0]>0);
512             continue;
513         }
514 #endif
515 
516         if (!extDict || (matchIndex+matchLength >= dictLimit)) {
517             assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
518             match = base + matchIndex;
519             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
520         } else {
521             match = dictBase + matchIndex;
522             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
523             if (matchIndex+matchLength >= dictLimit)
524                 match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
525         }
526 
527         if (matchLength > bestLength) {
528             bestLength = matchLength;
529             if (matchLength > matchEndIdx - matchIndex)
530                 matchEndIdx = matchIndex + (U32)matchLength;
531         }
532 
533         if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
534             break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
535         }
536 
537         if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
538             /* match is smaller than current */
539             *smallerPtr = matchIndex;             /* update smaller idx */
540             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
541             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
542             smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
543             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
544         } else {
545             /* match is larger than current */
546             *largerPtr = matchIndex;
547             commonLengthLarger = matchLength;
548             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
549             largerPtr = nextPtr;
550             matchIndex = nextPtr[0];
551     }   }
552 
553     *smallerPtr = *largerPtr = 0;
554     {   U32 positions = 0;
555         if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
556         assert(matchEndIdx > curr + 8);
557         return MAX(positions, matchEndIdx - (curr + 8));
558     }
559 }
560 
561 FORCE_INLINE_TEMPLATE
562 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_updateTree_internal(ZSTD_MatchState_t * ms,const BYTE * const ip,const BYTE * const iend,const U32 mls,const ZSTD_dictMode_e dictMode)563 void ZSTD_updateTree_internal(
564                 ZSTD_MatchState_t* ms,
565                 const BYTE* const ip, const BYTE* const iend,
566                 const U32 mls, const ZSTD_dictMode_e dictMode)
567 {
568     const BYTE* const base = ms->window.base;
569     U32 const target = (U32)(ip - base);
570     U32 idx = ms->nextToUpdate;
571     DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
572                 idx, target, dictMode);
573 
574     while(idx < target) {
575         U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
576         assert(idx < (U32)(idx + forward));
577         idx += forward;
578     }
579     assert((size_t)(ip - base) <= (size_t)(U32)(-1));
580     assert((size_t)(iend - base) <= (size_t)(U32)(-1));
581     ms->nextToUpdate = target;
582 }
583 
ZSTD_updateTree(ZSTD_MatchState_t * ms,const BYTE * ip,const BYTE * iend)584 void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend) {
585     ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
586 }
587 
588 FORCE_INLINE_TEMPLATE
589 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
590 U32
ZSTD_insertBtAndGetAllMatches(ZSTD_match_t * matches,ZSTD_MatchState_t * ms,U32 * nextToUpdate3,const BYTE * const ip,const BYTE * const iLimit,const ZSTD_dictMode_e dictMode,const U32 rep[ZSTD_REP_NUM],const U32 ll0,const U32 lengthToBeat,const U32 mls)591 ZSTD_insertBtAndGetAllMatches (
592                 ZSTD_match_t* matches,  /* store result (found matches) in this table (presumed large enough) */
593                 ZSTD_MatchState_t* ms,
594                 U32* nextToUpdate3,
595                 const BYTE* const ip, const BYTE* const iLimit,
596                 const ZSTD_dictMode_e dictMode,
597                 const U32 rep[ZSTD_REP_NUM],
598                 const U32 ll0,  /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
599                 const U32 lengthToBeat,
600                 const U32 mls /* template */)
601 {
602     const ZSTD_compressionParameters* const cParams = &ms->cParams;
603     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
604     const BYTE* const base = ms->window.base;
605     U32 const curr = (U32)(ip-base);
606     U32 const hashLog = cParams->hashLog;
607     U32 const minMatch = (mls==3) ? 3 : 4;
608     U32* const hashTable = ms->hashTable;
609     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
610     U32 matchIndex  = hashTable[h];
611     U32* const bt   = ms->chainTable;
612     U32 const btLog = cParams->chainLog - 1;
613     U32 const btMask= (1U << btLog) - 1;
614     size_t commonLengthSmaller=0, commonLengthLarger=0;
615     const BYTE* const dictBase = ms->window.dictBase;
616     U32 const dictLimit = ms->window.dictLimit;
617     const BYTE* const dictEnd = dictBase + dictLimit;
618     const BYTE* const prefixStart = base + dictLimit;
619     U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
620     U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
621     U32 const matchLow = windowLow ? windowLow : 1;
622     U32* smallerPtr = bt + 2*(curr&btMask);
623     U32* largerPtr  = bt + 2*(curr&btMask) + 1;
624     U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
625     U32 dummy32;   /* to be nullified at the end */
626     U32 mnum = 0;
627     U32 nbCompares = 1U << cParams->searchLog;
628 
629     const ZSTD_MatchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
630     const ZSTD_compressionParameters* const dmsCParams =
631                                       dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
632     const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
633     const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
634     U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
635     U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
636     U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
637     U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
638     U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
639     U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
640     U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
641 
642     size_t bestLength = lengthToBeat-1;
643     DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
644 
645     /* check repCode */
646     assert(ll0 <= 1);   /* necessarily 1 or 0 */
647     {   U32 const lastR = ZSTD_REP_NUM + ll0;
648         U32 repCode;
649         for (repCode = ll0; repCode < lastR; repCode++) {
650             U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
651             U32 const repIndex = curr - repOffset;
652             U32 repLen = 0;
653             assert(curr >= dictLimit);
654             if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
655                 /* We must validate the repcode offset because when we're using a dictionary the
656                  * valid offset range shrinks when the dictionary goes out of bounds.
657                  */
658                 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
659                     repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
660                 }
661             } else {  /* repIndex < dictLimit || repIndex >= curr */
662                 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
663                                              dmsBase + repIndex - dmsIndexDelta :
664                                              dictBase + repIndex;
665                 assert(curr >= windowLow);
666                 if ( dictMode == ZSTD_extDict
667                   && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
668                      & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
669                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
670                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
671                 }
672                 if (dictMode == ZSTD_dictMatchState
673                   && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
674                      & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
675                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
676                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
677             }   }
678             /* save longer solution */
679             if (repLen > bestLength) {
680                 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
681                             repCode, ll0, repOffset, repLen);
682                 bestLength = repLen;
683                 matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
684                 matches[mnum].len = (U32)repLen;
685                 mnum++;
686                 if ( (repLen > sufficient_len)
687                    | (ip+repLen == iLimit) ) {  /* best possible */
688                     return mnum;
689     }   }   }   }
690 
691     /* HC3 match finder */
692     if ((mls == 3) /*static*/ && (bestLength < mls)) {
693         U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
694         if ((matchIndex3 >= matchLow)
695           & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
696             size_t mlen;
697             if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
698                 const BYTE* const match = base + matchIndex3;
699                 mlen = ZSTD_count(ip, match, iLimit);
700             } else {
701                 const BYTE* const match = dictBase + matchIndex3;
702                 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
703             }
704 
705             /* save best solution */
706             if (mlen >= mls /* == 3 > bestLength */) {
707                 DEBUGLOG(8, "found small match with hlog3, of length %u",
708                             (U32)mlen);
709                 bestLength = mlen;
710                 assert(curr > matchIndex3);
711                 assert(mnum==0);  /* no prior solution */
712                 matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
713                 matches[0].len = (U32)mlen;
714                 mnum = 1;
715                 if ( (mlen > sufficient_len) |
716                      (ip+mlen == iLimit) ) {  /* best possible length */
717                     ms->nextToUpdate = curr+1;  /* skip insertion */
718                     return 1;
719         }   }   }
720         /* no dictMatchState lookup: dicts don't have a populated HC3 table */
721     }  /* if (mls == 3) */
722 
723     hashTable[h] = curr;   /* Update Hash Table */
724 
725     for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
726         U32* const nextPtr = bt + 2*(matchIndex & btMask);
727         const BYTE* match;
728         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
729         assert(curr > matchIndex);
730 
731         if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
732             assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
733             match = base + matchIndex;
734             if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
735             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
736         } else {
737             match = dictBase + matchIndex;
738             assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
739             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
740             if (matchIndex+matchLength >= dictLimit)
741                 match = base + matchIndex;   /* prepare for match[matchLength] read */
742         }
743 
744         if (matchLength > bestLength) {
745             DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
746                     (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
747             assert(matchEndIdx > matchIndex);
748             if (matchLength > matchEndIdx - matchIndex)
749                 matchEndIdx = matchIndex + (U32)matchLength;
750             bestLength = matchLength;
751             matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
752             matches[mnum].len = (U32)matchLength;
753             mnum++;
754             if ( (matchLength > ZSTD_OPT_NUM)
755                | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
756                 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
757                 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
758         }   }
759 
760         if (match[matchLength] < ip[matchLength]) {
761             /* match smaller than current */
762             *smallerPtr = matchIndex;             /* update smaller idx */
763             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
764             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
765             smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
766             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
767         } else {
768             *largerPtr = matchIndex;
769             commonLengthLarger = matchLength;
770             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
771             largerPtr = nextPtr;
772             matchIndex = nextPtr[0];
773     }   }
774 
775     *smallerPtr = *largerPtr = 0;
776 
777     assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
778     if (dictMode == ZSTD_dictMatchState && nbCompares) {
779         size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
780         U32 dictMatchIndex = dms->hashTable[dmsH];
781         const U32* const dmsBt = dms->chainTable;
782         commonLengthSmaller = commonLengthLarger = 0;
783         for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
784             const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
785             size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
786             const BYTE* match = dmsBase + dictMatchIndex;
787             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
788             if (dictMatchIndex+matchLength >= dmsHighLimit)
789                 match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
790 
791             if (matchLength > bestLength) {
792                 matchIndex = dictMatchIndex + dmsIndexDelta;
793                 DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
794                         (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
795                 if (matchLength > matchEndIdx - matchIndex)
796                     matchEndIdx = matchIndex + (U32)matchLength;
797                 bestLength = matchLength;
798                 matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
799                 matches[mnum].len = (U32)matchLength;
800                 mnum++;
801                 if ( (matchLength > ZSTD_OPT_NUM)
802                    | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
803                     break;   /* drop, to guarantee consistency (miss a little bit of compression) */
804             }   }
805 
806             if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
807             if (match[matchLength] < ip[matchLength]) {
808                 commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
809                 dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
810             } else {
811                 /* match is larger than current */
812                 commonLengthLarger = matchLength;
813                 dictMatchIndex = nextPtr[0];
814     }   }   }  /* if (dictMode == ZSTD_dictMatchState) */
815 
816     assert(matchEndIdx > curr+8);
817     ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
818     return mnum;
819 }
820 
821 typedef U32 (*ZSTD_getAllMatchesFn)(
822     ZSTD_match_t*,
823     ZSTD_MatchState_t*,
824     U32*,
825     const BYTE*,
826     const BYTE*,
827     const U32 rep[ZSTD_REP_NUM],
828     U32 const ll0,
829     U32 const lengthToBeat);
830 
831 FORCE_INLINE_TEMPLATE
832 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_btGetAllMatches_internal(ZSTD_match_t * matches,ZSTD_MatchState_t * ms,U32 * nextToUpdate3,const BYTE * ip,const BYTE * const iHighLimit,const U32 rep[ZSTD_REP_NUM],U32 const ll0,U32 const lengthToBeat,const ZSTD_dictMode_e dictMode,const U32 mls)833 U32 ZSTD_btGetAllMatches_internal(
834         ZSTD_match_t* matches,
835         ZSTD_MatchState_t* ms,
836         U32* nextToUpdate3,
837         const BYTE* ip,
838         const BYTE* const iHighLimit,
839         const U32 rep[ZSTD_REP_NUM],
840         U32 const ll0,
841         U32 const lengthToBeat,
842         const ZSTD_dictMode_e dictMode,
843         const U32 mls)
844 {
845     assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
846     DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
847     if (ip < ms->window.base + ms->nextToUpdate)
848         return 0;   /* skipped area */
849     ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
850     return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
851 }
852 
853 #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
854 
855 #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
856     static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
857             ZSTD_match_t* matches,                             \
858             ZSTD_MatchState_t* ms,                             \
859             U32* nextToUpdate3,                                \
860             const BYTE* ip,                                    \
861             const BYTE* const iHighLimit,                      \
862             const U32 rep[ZSTD_REP_NUM],                       \
863             U32 const ll0,                                     \
864             U32 const lengthToBeat)                            \
865     {                                                          \
866         return ZSTD_btGetAllMatches_internal(                  \
867                 matches, ms, nextToUpdate3, ip, iHighLimit,    \
868                 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
869     }
870 
871 #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode)  \
872     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3)  \
873     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4)  \
874     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5)  \
875     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
876 
877 GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)878 GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
879 GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
880 
881 #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode)  \
882     {                                            \
883         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
884         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
885         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
886         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
887     }
888 
889 static ZSTD_getAllMatchesFn
890 ZSTD_selectBtGetAllMatches(ZSTD_MatchState_t const* ms, ZSTD_dictMode_e const dictMode)
891 {
892     ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
893         ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
894         ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
895         ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
896     };
897     U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
898     assert((U32)dictMode < 3);
899     assert(mls - 3 < 4);
900     return getAllMatchesFns[(int)dictMode][mls - 3];
901 }
902 
903 /* ***********************
904 *  LDM helper functions  *
905 *************************/
906 
907 /* Struct containing info needed to make decision about ldm inclusion */
908 typedef struct {
909     RawSeqStore_t seqStore;   /* External match candidates store for this block */
910     U32 startPosInBlock;      /* Start position of the current match candidate */
911     U32 endPosInBlock;        /* End position of the current match candidate */
912     U32 offset;               /* Offset of the match candidate */
913 } ZSTD_optLdm_t;
914 
915 /* ZSTD_optLdm_skipRawSeqStoreBytes():
916  * Moves forward in @rawSeqStore by @nbBytes,
917  * which will update the fields 'pos' and 'posInSequence'.
918  */
ZSTD_optLdm_skipRawSeqStoreBytes(RawSeqStore_t * rawSeqStore,size_t nbBytes)919 static void ZSTD_optLdm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes)
920 {
921     U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
922     while (currPos && rawSeqStore->pos < rawSeqStore->size) {
923         rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
924         if (currPos >= currSeq.litLength + currSeq.matchLength) {
925             currPos -= currSeq.litLength + currSeq.matchLength;
926             rawSeqStore->pos++;
927         } else {
928             rawSeqStore->posInSequence = currPos;
929             break;
930         }
931     }
932     if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
933         rawSeqStore->posInSequence = 0;
934     }
935 }
936 
937 /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
938  * Calculates the beginning and end of the next match in the current block.
939  * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
940  */
941 static void
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t * optLdm,U32 currPosInBlock,U32 blockBytesRemaining)942 ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
943                                        U32 blockBytesRemaining)
944 {
945     rawSeq currSeq;
946     U32 currBlockEndPos;
947     U32 literalsBytesRemaining;
948     U32 matchBytesRemaining;
949 
950     /* Setting match end position to MAX to ensure we never use an LDM during this block */
951     if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
952         optLdm->startPosInBlock = UINT_MAX;
953         optLdm->endPosInBlock = UINT_MAX;
954         return;
955     }
956     /* Calculate appropriate bytes left in matchLength and litLength
957      * after adjusting based on ldmSeqStore->posInSequence */
958     currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
959     assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
960     currBlockEndPos = currPosInBlock + blockBytesRemaining;
961     literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
962             currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
963             0;
964     matchBytesRemaining = (literalsBytesRemaining == 0) ?
965             currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
966             currSeq.matchLength;
967 
968     /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
969     if (literalsBytesRemaining >= blockBytesRemaining) {
970         optLdm->startPosInBlock = UINT_MAX;
971         optLdm->endPosInBlock = UINT_MAX;
972         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
973         return;
974     }
975 
976     /* Matches may be < minMatch by this process. In that case, we will reject them
977        when we are deciding whether or not to add the ldm */
978     optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
979     optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
980     optLdm->offset = currSeq.offset;
981 
982     if (optLdm->endPosInBlock > currBlockEndPos) {
983         /* Match ends after the block ends, we can't use the whole match */
984         optLdm->endPosInBlock = currBlockEndPos;
985         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
986     } else {
987         /* Consume nb of bytes equal to size of sequence left */
988         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
989     }
990 }
991 
992 /* ZSTD_optLdm_maybeAddMatch():
993  * Adds a match if it's long enough,
994  * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
995  * into 'matches'. Maintains the correct ordering of 'matches'.
996  */
ZSTD_optLdm_maybeAddMatch(ZSTD_match_t * matches,U32 * nbMatches,const ZSTD_optLdm_t * optLdm,U32 currPosInBlock,U32 minMatch)997 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
998                                       const ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
999                                       U32 minMatch)
1000 {
1001     U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
1002     /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
1003     U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
1004 
1005     /* Ensure that current block position is not outside of the match */
1006     if (currPosInBlock < optLdm->startPosInBlock
1007       || currPosInBlock >= optLdm->endPosInBlock
1008       || candidateMatchLength < minMatch) {
1009         return;
1010     }
1011 
1012     if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
1013         U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
1014         DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1015                  candidateOffBase, candidateMatchLength, currPosInBlock);
1016         matches[*nbMatches].len = candidateMatchLength;
1017         matches[*nbMatches].off = candidateOffBase;
1018         (*nbMatches)++;
1019     }
1020 }
1021 
1022 /* ZSTD_optLdm_processMatchCandidate():
1023  * Wrapper function to update ldm seq store and call ldm functions as necessary.
1024  */
1025 static void
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t * optLdm,ZSTD_match_t * matches,U32 * nbMatches,U32 currPosInBlock,U32 remainingBytes,U32 minMatch)1026 ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1027                                   ZSTD_match_t* matches, U32* nbMatches,
1028                                   U32 currPosInBlock, U32 remainingBytes,
1029                                   U32 minMatch)
1030 {
1031     if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
1032         return;
1033     }
1034 
1035     if (currPosInBlock >= optLdm->endPosInBlock) {
1036         if (currPosInBlock > optLdm->endPosInBlock) {
1037             /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
1038              * at the end of a match from the ldm seq store, and will often be some bytes
1039              * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1040              */
1041             U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1042             ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1043         }
1044         ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1045     }
1046     ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock, minMatch);
1047 }
1048 
1049 
1050 /*-*******************************
1051 *  Optimal parser
1052 *********************************/
1053 
1054 #if 0 /* debug */
1055 
1056 static void
1057 listStats(const U32* table, int lastEltID)
1058 {
1059     int const nbElts = lastEltID + 1;
1060     int enb;
1061     for (enb=0; enb < nbElts; enb++) {
1062         (void)table;
1063         /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
1064         RAWLOG(2, "%4i,", table[enb]);
1065     }
1066     RAWLOG(2, " \n");
1067 }
1068 
1069 #endif
1070 
1071 #define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
1072 #define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
1073 #define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
1074 
1075 FORCE_INLINE_TEMPLATE
1076 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1077 size_t
ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const int optLevel,const ZSTD_dictMode_e dictMode)1078 ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t* ms,
1079                                SeqStore_t* seqStore,
1080                                U32 rep[ZSTD_REP_NUM],
1081                          const void* src, size_t srcSize,
1082                          const int optLevel,
1083                          const ZSTD_dictMode_e dictMode)
1084 {
1085     optState_t* const optStatePtr = &ms->opt;
1086     const BYTE* const istart = (const BYTE*)src;
1087     const BYTE* ip = istart;
1088     const BYTE* anchor = istart;
1089     const BYTE* const iend = istart + srcSize;
1090     const BYTE* const ilimit = iend - 8;
1091     const BYTE* const base = ms->window.base;
1092     const BYTE* const prefixStart = base + ms->window.dictLimit;
1093     const ZSTD_compressionParameters* const cParams = &ms->cParams;
1094 
1095     ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1096 
1097     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1098     U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1099     U32 nextToUpdate3 = ms->nextToUpdate;
1100 
1101     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1102     ZSTD_match_t* const matches = optStatePtr->matchTable;
1103     ZSTD_optimal_t lastStretch;
1104     ZSTD_optLdm_t optLdm;
1105 
1106     ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
1107 
1108     optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1109     optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1110     ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1111 
1112     /* init */
1113     DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1114                 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1115     assert(optLevel <= 2);
1116     ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1117     ip += (ip==prefixStart);
1118 
1119     /* Match Loop */
1120     while (ip < ilimit) {
1121         U32 cur, last_pos = 0;
1122 
1123         /* find first match */
1124         {   U32 const litlen = (U32)(ip - anchor);
1125             U32 const ll0 = !litlen;
1126             U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1127             ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1128                                               (U32)(ip-istart), (U32)(iend-ip),
1129                                               minMatch);
1130             if (!nbMatches) {
1131                 DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
1132                 ip++;
1133                 continue;
1134             }
1135 
1136             /* Match found: let's store this solution, and eventually find more candidates.
1137              * During this forward pass, @opt is used to store stretches,
1138              * defined as "a match followed by N literals".
1139              * Note how this is different from a Sequence, which is "N literals followed by a match".
1140              * Storing stretches allows us to store different match predecessors
1141              * for each literal position part of a literals run. */
1142 
1143             /* initialize opt[0] */
1144             opt[0].mlen = 0;  /* there are only literals so far */
1145             opt[0].litlen = litlen;
1146             /* No need to include the actual price of the literals before the first match
1147              * because it is static for the duration of the forward pass, and is included
1148              * in every subsequent price. But, we include the literal length because
1149              * the cost variation of litlen depends on the value of litlen.
1150              */
1151             opt[0].price = LL_PRICE(litlen);
1152             ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
1153             ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
1154 
1155             /* large match -> immediate encoding */
1156             {   U32 const maxML = matches[nbMatches-1].len;
1157                 U32 const maxOffBase = matches[nbMatches-1].off;
1158                 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1159                             nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
1160 
1161                 if (maxML > sufficient_len) {
1162                     lastStretch.litlen = 0;
1163                     lastStretch.mlen = maxML;
1164                     lastStretch.off = maxOffBase;
1165                     DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
1166                                 maxML, sufficient_len);
1167                     cur = 0;
1168                     last_pos = maxML;
1169                     goto _shortestPath;
1170             }   }
1171 
1172             /* set prices for first matches starting position == 0 */
1173             assert(opt[0].price >= 0);
1174             {   U32 pos;
1175                 U32 matchNb;
1176                 for (pos = 1; pos < minMatch; pos++) {
1177                     opt[pos].price = ZSTD_MAX_PRICE;
1178                     opt[pos].mlen = 0;
1179                     opt[pos].litlen = litlen + pos;
1180                 }
1181                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1182                     U32 const offBase = matches[matchNb].off;
1183                     U32 const end = matches[matchNb].len;
1184                     for ( ; pos <= end ; pos++ ) {
1185                         int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1186                         int const sequencePrice = opt[0].price + matchPrice;
1187                         DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1188                                     pos, ZSTD_fCost(sequencePrice));
1189                         opt[pos].mlen = pos;
1190                         opt[pos].off = offBase;
1191                         opt[pos].litlen = 0; /* end of match */
1192                         opt[pos].price = sequencePrice + LL_PRICE(0);
1193                     }
1194                 }
1195                 last_pos = pos-1;
1196                 opt[pos].price = ZSTD_MAX_PRICE;
1197             }
1198         }
1199 
1200         /* check further positions */
1201         for (cur = 1; cur <= last_pos; cur++) {
1202             const BYTE* const inr = ip + cur;
1203             assert(cur <= ZSTD_OPT_NUM);
1204             DEBUGLOG(7, "cPos:%i==rPos:%u", (int)(inr-istart), cur);
1205 
1206             /* Fix current position with one literal if cheaper */
1207             {   U32 const litlen = opt[cur-1].litlen + 1;
1208                 int const price = opt[cur-1].price
1209                                 + LIT_PRICE(ip+cur-1)
1210                                 + LL_INCPRICE(litlen);
1211                 assert(price < 1000000000); /* overflow check */
1212                 if (price <= opt[cur].price) {
1213                     ZSTD_optimal_t const prevMatch = opt[cur];
1214                     DEBUGLOG(7, "cPos:%i==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1215                                 (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1216                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1217                     opt[cur] = opt[cur-1];
1218                     opt[cur].litlen = litlen;
1219                     opt[cur].price = price;
1220                     if ( (optLevel >= 1) /* additional check only for higher modes */
1221                       && (prevMatch.litlen == 0) /* replace a match */
1222                       && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
1223                       && LIKELY(ip + cur < iend)
1224                     ) {
1225                         /* check next position, in case it would be cheaper */
1226                         int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
1227                         int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
1228                         DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
1229                                 cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
1230                         if ( (with1literal < withMoreLiterals)
1231                           && (with1literal < opt[cur+1].price) ) {
1232                             /* update offset history - before it disappears */
1233                             U32 const prev = cur - prevMatch.mlen;
1234                             Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
1235                             assert(cur >= prevMatch.mlen);
1236                             DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
1237                                         ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
1238                                         newReps.rep[0], newReps.rep[1], newReps.rep[2] );
1239                             opt[cur+1] = prevMatch;  /* mlen & offbase */
1240                             ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(Repcodes_t));
1241                             opt[cur+1].litlen = 1;
1242                             opt[cur+1].price = with1literal;
1243                             if (last_pos < cur+1) last_pos = cur+1;
1244                         }
1245                     }
1246                 } else {
1247                     DEBUGLOG(7, "cPos:%i==rPos:%u : literal would cost more (%.2f>%.2f)",
1248                                 (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
1249                 }
1250             }
1251 
1252             /* Offset history is not updated during match comparison.
1253              * Do it here, now that the match is selected and confirmed.
1254              */
1255             ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(Repcodes_t));
1256             assert(cur >= opt[cur].mlen);
1257             if (opt[cur].litlen == 0) {
1258                 /* just finished a match => alter offset history */
1259                 U32 const prev = cur - opt[cur].mlen;
1260                 Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
1261                 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(Repcodes_t));
1262             }
1263 
1264             /* last match must start at a minimum distance of 8 from oend */
1265             if (inr > ilimit) continue;
1266 
1267             if (cur == last_pos) break;
1268 
1269             if ( (optLevel==0) /*static_test*/
1270               && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1271                 DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
1272                 continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1273             }
1274 
1275             assert(opt[cur].price >= 0);
1276             {   U32 const ll0 = (opt[cur].litlen == 0);
1277                 int const previousPrice = opt[cur].price;
1278                 int const basePrice = previousPrice + LL_PRICE(0);
1279                 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1280                 U32 matchNb;
1281 
1282                 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1283                                                   (U32)(inr-istart), (U32)(iend-inr),
1284                                                   minMatch);
1285 
1286                 if (!nbMatches) {
1287                     DEBUGLOG(7, "rPos:%u : no match found", cur);
1288                     continue;
1289                 }
1290 
1291                 {   U32 const longestML = matches[nbMatches-1].len;
1292                     DEBUGLOG(7, "cPos:%i==rPos:%u, found %u matches, of longest ML=%u",
1293                                 (int)(inr-istart), cur, nbMatches, longestML);
1294 
1295                     if ( (longestML > sufficient_len)
1296                       || (cur + longestML >= ZSTD_OPT_NUM)
1297                       || (ip + cur + longestML >= iend) ) {
1298                         lastStretch.mlen = longestML;
1299                         lastStretch.off = matches[nbMatches-1].off;
1300                         lastStretch.litlen = 0;
1301                         last_pos = cur + longestML;
1302                         goto _shortestPath;
1303                 }   }
1304 
1305                 /* set prices using matches found at position == cur */
1306                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1307                     U32 const offset = matches[matchNb].off;
1308                     U32 const lastML = matches[matchNb].len;
1309                     U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1310                     U32 mlen;
1311 
1312                     DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1313                                 matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
1314 
1315                     for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
1316                         U32 const pos = cur + mlen;
1317                         int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1318 
1319                         if ((pos > last_pos) || (price < opt[pos].price)) {
1320                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1321                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1322                             while (last_pos < pos) {
1323                                 /* fill empty positions, for future comparisons */
1324                                 last_pos++;
1325                                 opt[last_pos].price = ZSTD_MAX_PRICE;
1326                                 opt[last_pos].litlen = !0;  /* just needs to be != 0, to mean "not an end of match" */
1327                             }
1328                             opt[pos].mlen = mlen;
1329                             opt[pos].off = offset;
1330                             opt[pos].litlen = 0;
1331                             opt[pos].price = price;
1332                         } else {
1333                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1334                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1335                             if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1336                         }
1337             }   }   }
1338             opt[last_pos+1].price = ZSTD_MAX_PRICE;
1339         }  /* for (cur = 1; cur <= last_pos; cur++) */
1340 
1341         lastStretch = opt[last_pos];
1342         assert(cur >= lastStretch.mlen);
1343         cur = last_pos - lastStretch.mlen;
1344 
1345 _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
1346         assert(opt[0].mlen == 0);
1347         assert(last_pos >= lastStretch.mlen);
1348         assert(cur == last_pos - lastStretch.mlen);
1349 
1350         if (lastStretch.mlen==0) {
1351             /* no solution : all matches have been converted into literals */
1352             assert(lastStretch.litlen == (ip - anchor) + last_pos);
1353             ip += last_pos;
1354             continue;
1355         }
1356         assert(lastStretch.off > 0);
1357 
1358         /* Update offset history */
1359         if (lastStretch.litlen == 0) {
1360             /* finishing on a match : update offset history */
1361             Repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
1362             ZSTD_memcpy(rep, &reps, sizeof(Repcodes_t));
1363         } else {
1364             ZSTD_memcpy(rep, lastStretch.rep, sizeof(Repcodes_t));
1365             assert(cur >= lastStretch.litlen);
1366             cur -= lastStretch.litlen;
1367         }
1368 
1369         /* Let's write the shortest path solution.
1370          * It is stored in @opt in reverse order,
1371          * starting from @storeEnd (==cur+2),
1372          * effectively partially @opt overwriting.
1373          * Content is changed too:
1374          * - So far, @opt stored stretches, aka a match followed by literals
1375          * - Now, it will store sequences, aka literals followed by a match
1376          */
1377         {   U32 const storeEnd = cur + 2;
1378             U32 storeStart = storeEnd;
1379             U32 stretchPos = cur;
1380 
1381             DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1382                         last_pos, cur); (void)last_pos;
1383             assert(storeEnd < ZSTD_OPT_SIZE);
1384             DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1385                         storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
1386             if (lastStretch.litlen > 0) {
1387                 /* last "sequence" is unfinished: just a bunch of literals */
1388                 opt[storeEnd].litlen = lastStretch.litlen;
1389                 opt[storeEnd].mlen = 0;
1390                 storeStart = storeEnd-1;
1391                 opt[storeStart] = lastStretch;
1392             } {
1393                 opt[storeEnd] = lastStretch;  /* note: litlen will be fixed */
1394                 storeStart = storeEnd;
1395             }
1396             while (1) {
1397                 ZSTD_optimal_t nextStretch = opt[stretchPos];
1398                 opt[storeStart].litlen = nextStretch.litlen;
1399                 DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
1400                             opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
1401                 if (nextStretch.mlen == 0) {
1402                     /* reaching beginning of segment */
1403                     break;
1404                 }
1405                 storeStart--;
1406                 opt[storeStart] = nextStretch; /* note: litlen will be fixed */
1407                 assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
1408                 stretchPos -= nextStretch.litlen + nextStretch.mlen;
1409             }
1410 
1411             /* save sequences */
1412             DEBUGLOG(6, "sending selected sequences into seqStore");
1413             {   U32 storePos;
1414                 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1415                     U32 const llen = opt[storePos].litlen;
1416                     U32 const mlen = opt[storePos].mlen;
1417                     U32 const offBase = opt[storePos].off;
1418                     U32 const advance = llen + mlen;
1419                     DEBUGLOG(6, "considering seq starting at %i, llen=%u, mlen=%u",
1420                                 (int)(anchor - istart), (unsigned)llen, (unsigned)mlen);
1421 
1422                     if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
1423                         assert(storePos == storeEnd);   /* must be last sequence */
1424                         ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
1425                         continue;   /* will finish */
1426                     }
1427 
1428                     assert(anchor + llen <= iend);
1429                     ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1430                     ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1431                     anchor += advance;
1432                     ip = anchor;
1433             }   }
1434             DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
1435 
1436             /* update all costs */
1437             ZSTD_setBasePrices(optStatePtr, optLevel);
1438         }
1439     }   /* while (ip < ilimit) */
1440 
1441     /* Return the last literals size */
1442     return (size_t)(iend - anchor);
1443 }
1444 #endif /* build exclusions */
1445 
1446 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
ZSTD_compressBlock_opt0(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const ZSTD_dictMode_e dictMode)1447 static size_t ZSTD_compressBlock_opt0(
1448         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1449         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1450 {
1451     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1452 }
1453 #endif
1454 
1455 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
ZSTD_compressBlock_opt2(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const ZSTD_dictMode_e dictMode)1456 static size_t ZSTD_compressBlock_opt2(
1457         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1458         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1459 {
1460     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1461 }
1462 #endif
1463 
1464 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
ZSTD_compressBlock_btopt(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1465 size_t ZSTD_compressBlock_btopt(
1466         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1467         const void* src, size_t srcSize)
1468 {
1469     DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1470     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1471 }
1472 #endif
1473 
1474 
1475 
1476 
1477 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1478 /* ZSTD_initStats_ultra():
1479  * make a first compression pass, just to seed stats with more accurate starting values.
1480  * only works on first block, with no dictionary and no ldm.
1481  * this function cannot error out, its narrow contract must be respected.
1482  */
1483 static
1484 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_initStats_ultra(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1485 void ZSTD_initStats_ultra(ZSTD_MatchState_t* ms,
1486                           SeqStore_t* seqStore,
1487                           U32 rep[ZSTD_REP_NUM],
1488                     const void* src, size_t srcSize)
1489 {
1490     U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
1491     ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1492 
1493     DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1494     assert(ms->opt.litLengthSum == 0);    /* first block */
1495     assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
1496     assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
1497     assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
1498 
1499     ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
1500 
1501     /* invalidate first scan from history, only keep entropy stats */
1502     ZSTD_resetSeqStore(seqStore);
1503     ms->window.base -= srcSize;
1504     ms->window.dictLimit += (U32)srcSize;
1505     ms->window.lowLimit = ms->window.dictLimit;
1506     ms->nextToUpdate = ms->window.dictLimit;
1507 
1508 }
1509 
ZSTD_compressBlock_btultra(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1510 size_t ZSTD_compressBlock_btultra(
1511         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1512         const void* src, size_t srcSize)
1513 {
1514     DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1515     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1516 }
1517 
ZSTD_compressBlock_btultra2(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1518 size_t ZSTD_compressBlock_btultra2(
1519         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1520         const void* src, size_t srcSize)
1521 {
1522     U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1523     DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1524 
1525     /* 2-passes strategy:
1526      * this strategy makes a first pass over first block to collect statistics
1527      * in order to seed next round's statistics with it.
1528      * After 1st pass, function forgets history, and starts a new block.
1529      * Consequently, this can only work if no data has been previously loaded in tables,
1530      * aka, no dictionary, no prefix, no ldm preprocessing.
1531      * The compression ratio gain is generally small (~0.5% on first block),
1532      * the cost is 2x cpu time on first block. */
1533     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1534     if ( (ms->opt.litLengthSum==0)   /* first block */
1535       && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
1536       && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
1537       && (curr == ms->window.dictLimit)    /* start of frame, nothing already loaded nor skipped */
1538       && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1539       ) {
1540         ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1541     }
1542 
1543     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1544 }
1545 #endif
1546 
1547 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
ZSTD_compressBlock_btopt_dictMatchState(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1548 size_t ZSTD_compressBlock_btopt_dictMatchState(
1549         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1550         const void* src, size_t srcSize)
1551 {
1552     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1553 }
1554 
ZSTD_compressBlock_btopt_extDict(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1555 size_t ZSTD_compressBlock_btopt_extDict(
1556         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1557         const void* src, size_t srcSize)
1558 {
1559     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1560 }
1561 #endif
1562 
1563 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
ZSTD_compressBlock_btultra_dictMatchState(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1564 size_t ZSTD_compressBlock_btultra_dictMatchState(
1565         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1566         const void* src, size_t srcSize)
1567 {
1568     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1569 }
1570 
ZSTD_compressBlock_btultra_extDict(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1571 size_t ZSTD_compressBlock_btultra_extDict(
1572         ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1573         const void* src, size_t srcSize)
1574 {
1575     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1576 }
1577 #endif
1578 
1579 /* note : no btultra2 variant for extDict nor dictMatchState,
1580  * because btultra2 is not meant to work with dictionaries
1581  * and is only specific for the first block (no prefix) */
1582