xref: /freebsd/sys/contrib/zstd/lib/compress/zstd_opt.c (revision 63a938566d524836885917d95bd491aa4400b181)
1 /*
2  * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 #include "zstd_compress_internal.h"
12 #include "zstd_opt.h"
13 #include "zstd_lazy.h"   /* ZSTD_updateTree, ZSTD_updateTree_extDict */
14 
15 
16 #define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */
17 #define ZSTD_FREQ_DIV       4   /* log factor when using previous stats to init next stats */
18 #define ZSTD_MAX_PRICE     (1<<30)
19 
20 
21 /*-*************************************
22 *  Price functions for optimal parser
23 ***************************************/
24 static void ZSTD_setLog2Prices(optState_t* optPtr)
25 {
26     optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1);
27     optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1);
28     optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1);
29     optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1);
30 }
31 
32 
33 static void ZSTD_rescaleFreqs(optState_t* const optPtr,
34                               const BYTE* const src, size_t const srcSize)
35 {
36     optPtr->staticPrices = 0;
37 
38     if (optPtr->litLengthSum == 0) {  /* first init */
39         unsigned u;
40         if (srcSize <= 1024) optPtr->staticPrices = 1;
41 
42         assert(optPtr->litFreq!=NULL);
43         for (u=0; u<=MaxLit; u++)
44             optPtr->litFreq[u] = 0;
45         for (u=0; u<srcSize; u++)
46             optPtr->litFreq[src[u]]++;
47         optPtr->litSum = 0;
48         for (u=0; u<=MaxLit; u++) {
49             optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> ZSTD_FREQ_DIV);
50             optPtr->litSum += optPtr->litFreq[u];
51         }
52 
53         for (u=0; u<=MaxLL; u++)
54             optPtr->litLengthFreq[u] = 1;
55         optPtr->litLengthSum = MaxLL+1;
56         for (u=0; u<=MaxML; u++)
57             optPtr->matchLengthFreq[u] = 1;
58         optPtr->matchLengthSum = MaxML+1;
59         for (u=0; u<=MaxOff; u++)
60             optPtr->offCodeFreq[u] = 1;
61         optPtr->offCodeSum = (MaxOff+1);
62 
63     } else {
64         unsigned u;
65 
66         optPtr->litSum = 0;
67         for (u=0; u<=MaxLit; u++) {
68             optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1));
69             optPtr->litSum += optPtr->litFreq[u];
70         }
71         optPtr->litLengthSum = 0;
72         for (u=0; u<=MaxLL; u++) {
73             optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
74             optPtr->litLengthSum += optPtr->litLengthFreq[u];
75         }
76         optPtr->matchLengthSum = 0;
77         for (u=0; u<=MaxML; u++) {
78             optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
79             optPtr->matchLengthSum += optPtr->matchLengthFreq[u];
80         }
81         optPtr->offCodeSum = 0;
82         for (u=0; u<=MaxOff; u++) {
83             optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
84             optPtr->offCodeSum += optPtr->offCodeFreq[u];
85         }
86     }
87 
88     ZSTD_setLog2Prices(optPtr);
89 }
90 
91 
92 /* ZSTD_rawLiteralsCost() :
93  * cost of literals (only) in given segment (which length can be null)
94  * does not include cost of literalLength symbol */
95 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
96                                 const optState_t* const optPtr)
97 {
98     if (optPtr->staticPrices) return (litLength*6);  /* 6 bit per literal - no statistic used */
99     if (litLength == 0) return 0;
100 
101     /* literals */
102     {   U32 u;
103         U32 cost = litLength * optPtr->log2litSum;
104         for (u=0; u < litLength; u++)
105             cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);
106         return cost;
107     }
108 }
109 
110 /* ZSTD_litLengthPrice() :
111  * cost of literalLength symbol */
112 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr)
113 {
114     if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1);
115 
116     /* literal Length */
117     {   U32 const llCode = ZSTD_LLcode(litLength);
118         U32 const price = LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
119         return price;
120     }
121 }
122 
123 /* ZSTD_litLengthPrice() :
124  * cost of the literal part of a sequence,
125  * including literals themselves, and literalLength symbol */
126 static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength,
127                                  const optState_t* const optPtr)
128 {
129     return ZSTD_rawLiteralsCost(literals, litLength, optPtr)
130          + ZSTD_litLengthPrice(litLength, optPtr);
131 }
132 
133 /* ZSTD_litLengthContribution() :
134  * @return ( cost(litlength) - cost(0) )
135  * this value can then be added to rawLiteralsCost()
136  * to provide a cost which is directly comparable to a match ending at same position */
137 static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr)
138 {
139     if (optPtr->staticPrices) return ZSTD_highbit32(litLength+1);
140 
141     /* literal Length */
142     {   U32 const llCode = ZSTD_LLcode(litLength);
143         int const contribution = LL_bits[llCode]
144                         + ZSTD_highbit32(optPtr->litLengthFreq[0]+1)
145                         - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
146 #if 1
147         return contribution;
148 #else
149         return MAX(0, contribution); /* sometimes better, sometimes not ... */
150 #endif
151     }
152 }
153 
154 /* ZSTD_literalsContribution() :
155  * creates a fake cost for the literals part of a sequence
156  * which can be compared to the ending cost of a match
157  * should a new match start at this position */
158 static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
159                                      const optState_t* const optPtr)
160 {
161     int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr)
162                            + ZSTD_litLengthContribution(litLength, optPtr);
163     return contribution;
164 }
165 
166 /* ZSTD_getMatchPrice() :
167  * Provides the cost of the match part (offset + matchLength) of a sequence
168  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
169  * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
170 FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(
171                                     U32 const offset, U32 const matchLength,
172                                     const optState_t* const optPtr,
173                                     int const optLevel)
174 {
175     U32 price;
176     U32 const offCode = ZSTD_highbit32(offset+1);
177     U32 const mlBase = matchLength - MINMATCH;
178     assert(matchLength >= MINMATCH);
179 
180     if (optPtr->staticPrices)  /* fixed scheme, do not use statistics */
181         return ZSTD_highbit32((U32)mlBase+1) + 16 + offCode;
182 
183     price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
184     if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */
185 
186     /* match Length */
187     {   U32 const mlCode = ZSTD_MLcode(mlBase);
188         price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1);
189     }
190 
191     DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
192     return price;
193 }
194 
195 static void ZSTD_updateStats(optState_t* const optPtr,
196                              U32 litLength, const BYTE* literals,
197                              U32 offsetCode, U32 matchLength)
198 {
199     /* literals */
200     {   U32 u;
201         for (u=0; u < litLength; u++)
202             optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
203         optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
204     }
205 
206     /* literal Length */
207     {   U32 const llCode = ZSTD_LLcode(litLength);
208         optPtr->litLengthFreq[llCode]++;
209         optPtr->litLengthSum++;
210     }
211 
212     /* match offset code (0-2=>repCode; 3+=>offset+2) */
213     {   U32 const offCode = ZSTD_highbit32(offsetCode+1);
214         assert(offCode <= MaxOff);
215         optPtr->offCodeFreq[offCode]++;
216         optPtr->offCodeSum++;
217     }
218 
219     /* match Length */
220     {   U32 const mlBase = matchLength - MINMATCH;
221         U32 const mlCode = ZSTD_MLcode(mlBase);
222         optPtr->matchLengthFreq[mlCode]++;
223         optPtr->matchLengthSum++;
224     }
225 }
226 
227 
228 /* ZSTD_readMINMATCH() :
229  * function safe only for comparisons
230  * assumption : memPtr must be at least 4 bytes before end of buffer */
231 MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
232 {
233     switch (length)
234     {
235     default :
236     case 4 : return MEM_read32(memPtr);
237     case 3 : if (MEM_isLittleEndian())
238                 return MEM_read32(memPtr)<<8;
239              else
240                 return MEM_read32(memPtr)>>8;
241     }
242 }
243 
244 
245 /* Update hashTable3 up to ip (excluded)
246    Assumption : always within prefix (i.e. not within extDict) */
247 static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* const cctx, const BYTE* const ip)
248 {
249     U32* const hashTable3  = cctx->hashTable3;
250     U32 const hashLog3  = cctx->hashLog3;
251     const BYTE* const base = cctx->base;
252     U32 idx = cctx->nextToUpdate3;
253     U32 const target = cctx->nextToUpdate3 = (U32)(ip - base);
254     size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
255 
256     while(idx < target) {
257         hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
258         idx++;
259     }
260 
261     return hashTable3[hash3];
262 }
263 
264 
265 /*-*************************************
266 *  Binary Tree search
267 ***************************************/
268 FORCE_INLINE_TEMPLATE
269 U32 ZSTD_insertBtAndGetAllMatches (
270                     ZSTD_CCtx* zc,
271                     const BYTE* const ip, const BYTE* const iLimit, int const extDict,
272                     U32 nbCompares, U32 const mls, U32 const sufficient_len,
273                     U32 rep[ZSTD_REP_NUM], U32 const ll0,
274                     ZSTD_match_t* matches, const U32 lengthToBeat)
275 {
276     const BYTE* const base = zc->base;
277     U32 const current = (U32)(ip-base);
278     U32 const hashLog = zc->appliedParams.cParams.hashLog;
279     U32 const minMatch = (mls==3) ? 3 : 4;
280     U32* const hashTable = zc->hashTable;
281     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
282     U32 matchIndex  = hashTable[h];
283     U32* const bt   = zc->chainTable;
284     U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
285     U32 const btMask= (1U << btLog) - 1;
286     size_t commonLengthSmaller=0, commonLengthLarger=0;
287     const BYTE* const dictBase = zc->dictBase;
288     U32 const dictLimit = zc->dictLimit;
289     const BYTE* const dictEnd = dictBase + dictLimit;
290     const BYTE* const prefixStart = base + dictLimit;
291     U32 const btLow = btMask >= current ? 0 : current - btMask;
292     U32 const windowLow = zc->lowLimit;
293     U32* smallerPtr = bt + 2*(current&btMask);
294     U32* largerPtr  = bt + 2*(current&btMask) + 1;
295     U32 matchEndIdx = current+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
296     U32 dummy32;   /* to be nullified at the end */
297     U32 mnum = 0;
298 
299     size_t bestLength = lengthToBeat-1;
300     DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches");
301 
302     /* check repCode */
303     {   U32 const lastR = ZSTD_REP_NUM + ll0;
304         U32 repCode;
305         for (repCode = ll0; repCode < lastR; repCode++) {
306             U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
307             U32 const repIndex = current - repOffset;
308             U32 repLen = 0;
309             assert(current >= dictLimit);
310             if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) {  /* equivalent to `current > repIndex >= dictLimit` */
311                 if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
312                     repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
313                 }
314             } else {  /* repIndex < dictLimit || repIndex >= current */
315                 const BYTE* const repMatch = dictBase + repIndex;
316                 assert(current >= windowLow);
317                 if ( extDict /* this case only valid in extDict mode */
318                   && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow)  /* equivalent to `current > repIndex >= windowLow` */
319                      & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
320                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
321                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
322             }   }
323             /* save longer solution */
324             if (repLen > bestLength) {
325                 DEBUGLOG(8, "found rep-match %u of length %u",
326                             repCode - ll0, (U32)repLen);
327                 bestLength = repLen;
328                 matches[mnum].off = repCode - ll0;
329                 matches[mnum].len = (U32)repLen;
330                 mnum++;
331                 if ( (repLen > sufficient_len)
332                    | (ip+repLen == iLimit) ) {  /* best possible */
333                     return mnum;
334     }   }   }   }
335 
336     /* HC3 match finder */
337     if ((mls == 3) /*static*/ && (bestLength < mls)) {
338         U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
339         if ((matchIndex3 > windowLow)
340           & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
341             size_t mlen;
342             if ((!extDict) /*static*/ || (matchIndex3 >= dictLimit)) {
343                 const BYTE* const match = base + matchIndex3;
344                 mlen = ZSTD_count(ip, match, iLimit);
345             } else {
346                 const BYTE* const match = dictBase + matchIndex3;
347                 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
348             }
349 
350             /* save best solution */
351             if (mlen >= mls /* == 3 > bestLength */) {
352                 DEBUGLOG(8, "found small match with hlog3, of length %u",
353                             (U32)mlen);
354                 bestLength = mlen;
355                 assert(current > matchIndex3);
356                 assert(mnum==0);  /* no prior solution */
357                 matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
358                 matches[0].len = (U32)mlen;
359                 mnum = 1;
360                 if ( (mlen > sufficient_len) |
361                      (ip+mlen == iLimit) ) {  /* best possible length */
362                     zc->nextToUpdate = current+1;  /* skip insertion */
363                     return 1;
364     }   }   }   }
365 
366     hashTable[h] = current;   /* Update Hash Table */
367 
368     while (nbCompares-- && (matchIndex > windowLow)) {
369         U32* const nextPtr = bt + 2*(matchIndex & btMask);
370         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
371         const BYTE* match;
372         assert(current > matchIndex);
373 
374         if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
375             assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
376             match = base + matchIndex;
377             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
378         } else {
379             match = dictBase + matchIndex;
380             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
381             if (matchIndex+matchLength >= dictLimit)
382                 match = base + matchIndex;   /* prepare for match[matchLength] */
383         }
384 
385         if (matchLength > bestLength) {
386             DEBUGLOG(8, "found match of length %u at distance %u",
387                     (U32)matchLength, current - matchIndex);
388             assert(matchEndIdx > matchIndex);
389             if (matchLength > matchEndIdx - matchIndex)
390                 matchEndIdx = matchIndex + (U32)matchLength;
391             bestLength = matchLength;
392             matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
393             matches[mnum].len = (U32)matchLength;
394             mnum++;
395             if (matchLength > ZSTD_OPT_NUM) break;
396             if (ip+matchLength == iLimit) {  /* equal : no way to know if inf or sup */
397                 break;   /* drop, to preserve bt consistency (miss a little bit of compression) */
398             }
399         }
400 
401         if (match[matchLength] < ip[matchLength]) {
402             /* match smaller than current */
403             *smallerPtr = matchIndex;             /* update smaller idx */
404             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
405             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
406             smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
407             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
408         } else {
409             *largerPtr = matchIndex;
410             commonLengthLarger = matchLength;
411             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
412             largerPtr = nextPtr;
413             matchIndex = nextPtr[0];
414     }   }
415 
416     *smallerPtr = *largerPtr = 0;
417 
418     assert(matchEndIdx > current+8);
419     zc->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
420     return mnum;
421 }
422 
423 
424 FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
425                         ZSTD_CCtx* zc,   /* Index table will be updated */
426                         const BYTE* ip, const BYTE* const iHighLimit, int const extDict,
427                         U32 const maxNbAttempts, U32 const matchLengthSearch, U32 const sufficient_len,
428                         U32 rep[ZSTD_REP_NUM], U32 const ll0,
429                         ZSTD_match_t* matches, U32 const lengthToBeat)
430 {
431     DEBUGLOG(7, "ZSTD_BtGetAllMatches");
432     if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
433     if (extDict) ZSTD_updateTree_extDict(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch);
434     else ZSTD_updateTree(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch);
435     switch(matchLengthSearch)
436     {
437     case 3 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 3, sufficient_len, rep, ll0, matches, lengthToBeat);
438     default :
439     case 4 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 4, sufficient_len, rep, ll0, matches, lengthToBeat);
440     case 5 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 5, sufficient_len, rep, ll0, matches, lengthToBeat);
441     case 7 :
442     case 6 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 6, sufficient_len, rep, ll0, matches, lengthToBeat);
443     }
444 }
445 
446 
447 /*-*******************************
448 *  Optimal parser
449 *********************************/
450 typedef struct repcodes_s {
451     U32 rep[3];
452 } repcodes_t;
453 
454 repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
455 {
456     repcodes_t newReps;
457     if (offset >= ZSTD_REP_NUM) {  /* full offset */
458         newReps.rep[2] = rep[1];
459         newReps.rep[1] = rep[0];
460         newReps.rep[0] = offset - ZSTD_REP_MOVE;
461     } else {   /* repcode */
462         U32 const repCode = offset + ll0;
463         if (repCode > 0) {  /* note : if repCode==0, no change */
464             U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
465             newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
466             newReps.rep[1] = rep[0];
467             newReps.rep[0] = currentOffset;
468         } else {   /* repCode == 0 */
469             memcpy(&newReps, rep, sizeof(newReps));
470         }
471     }
472     return newReps;
473 }
474 
475 
476 typedef struct {
477     const BYTE* anchor;
478     U32 litlen;
479     U32 rawLitCost;
480 } cachedLiteralPrice_t;
481 
482 static U32 ZSTD_rawLiteralsCost_cached(
483                             cachedLiteralPrice_t* const cachedLitPrice,
484                             const BYTE* const anchor, U32 const litlen,
485                             const optState_t* const optStatePtr)
486 {
487     U32 startCost;
488     U32 remainingLength;
489     const BYTE* startPosition;
490 
491     if (anchor == cachedLitPrice->anchor) {
492         startCost = cachedLitPrice->rawLitCost;
493         startPosition = anchor + cachedLitPrice->litlen;
494         assert(litlen >= cachedLitPrice->litlen);
495         remainingLength = litlen - cachedLitPrice->litlen;
496     } else {
497         startCost = 0;
498         startPosition = anchor;
499         remainingLength = litlen;
500     }
501 
502     {   U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr);
503         cachedLitPrice->anchor = anchor;
504         cachedLitPrice->litlen = litlen;
505         cachedLitPrice->rawLitCost = rawLitCost;
506         return rawLitCost;
507     }
508 }
509 
510 static U32 ZSTD_fullLiteralsCost_cached(
511                             cachedLiteralPrice_t* const cachedLitPrice,
512                             const BYTE* const anchor, U32 const litlen,
513                             const optState_t* const optStatePtr)
514 {
515     return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
516          + ZSTD_litLengthPrice(litlen, optStatePtr);
517 }
518 
519 static int ZSTD_literalsContribution_cached(
520                             cachedLiteralPrice_t* const cachedLitPrice,
521                             const BYTE* const anchor, U32 const litlen,
522                             const optState_t* const optStatePtr)
523 {
524     int const contribution = ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
525                            + ZSTD_litLengthContribution(litlen, optStatePtr);
526     return contribution;
527 }
528 
529 FORCE_INLINE_TEMPLATE
530 size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
531                                       const void* src, size_t srcSize,
532                                       const int optLevel, const int extDict)
533 {
534     seqStore_t* const seqStorePtr = &(ctx->seqStore);
535     optState_t* const optStatePtr = &(ctx->optState);
536     const BYTE* const istart = (const BYTE*)src;
537     const BYTE* ip = istart;
538     const BYTE* anchor = istart;
539     const BYTE* const iend = istart + srcSize;
540     const BYTE* const ilimit = iend - 8;
541     const BYTE* const base = ctx->base;
542     const BYTE* const prefixStart = base + ctx->dictLimit;
543 
544     U32 const maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
545     U32 const sufficient_len = MIN(ctx->appliedParams.cParams.targetLength, ZSTD_OPT_NUM -1);
546     U32 const mls = ctx->appliedParams.cParams.searchLength;
547     U32 const minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
548 
549     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
550     ZSTD_match_t* const matches = optStatePtr->matchTable;
551     cachedLiteralPrice_t cachedLitPrice;
552     U32 rep[ZSTD_REP_NUM];
553 
554     /* init */
555     DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
556     ctx->nextToUpdate3 = ctx->nextToUpdate;
557     ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
558     ip += (ip==prefixStart);
559     { int i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }
560     memset(&cachedLitPrice, 0, sizeof(cachedLitPrice));
561 
562     /* Match Loop */
563     while (ip < ilimit) {
564         U32 cur, last_pos = 0;
565         U32 best_mlen, best_off;
566 
567         /* find first match */
568         {   U32 const litlen = (U32)(ip - anchor);
569             U32 const ll0 = !litlen;
570             U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, ip, iend, extDict, maxSearches, mls, sufficient_len, rep, ll0, matches, minMatch);
571             if (!nbMatches) { ip++; continue; }
572 
573             /* initialize opt[0] */
574             { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
575             opt[0].mlen = 1;
576             opt[0].litlen = litlen;
577 
578             /* large match -> immediate encoding */
579             {   U32 const maxML = matches[nbMatches-1].len;
580                 DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie",
581                             nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart));
582 
583                 if (maxML > sufficient_len) {
584                     best_mlen = maxML;
585                     best_off = matches[nbMatches-1].off;
586                     DEBUGLOG(7, "large match (%u>%u), immediate encoding",
587                                 best_mlen, sufficient_len);
588                     cur = 0;
589                     last_pos = 1;
590                     goto _shortestPath;
591             }   }
592 
593             /* set prices for first matches starting position == 0 */
594             {   U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
595                 U32 pos;
596                 U32 matchNb;
597                 for (pos = 0; pos < minMatch; pos++) {
598                     opt[pos].mlen = 1;
599                     opt[pos].price = ZSTD_MAX_PRICE;
600                 }
601                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
602                     U32 const offset = matches[matchNb].off;
603                     U32 const end = matches[matchNb].len;
604                     repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
605                     for ( ; pos <= end ; pos++ ) {
606                         U32 const matchPrice = literalsPrice + ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
607                         DEBUGLOG(7, "rPos:%u => set initial price : %u",
608                                     pos, matchPrice);
609                         opt[pos].mlen = pos;
610                         opt[pos].off = offset;
611                         opt[pos].litlen = litlen;
612                         opt[pos].price = matchPrice;
613                         memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
614                 }   }
615                 last_pos = pos-1;
616             }
617         }
618 
619         /* check further positions */
620         for (cur = 1; cur <= last_pos; cur++) {
621             const BYTE* const inr = ip + cur;
622             assert(cur < ZSTD_OPT_NUM);
623 
624             /* Fix current position with one literal if cheaper */
625             {   U32 const litlen = (opt[cur-1].mlen == 1) ? opt[cur-1].litlen + 1 : 1;
626                 int price;  /* note : contribution can be negative */
627                 if (cur > litlen) {
628                     price = opt[cur - litlen].price + ZSTD_literalsContribution(inr-litlen, litlen, optStatePtr);
629                 } else {
630                     price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
631                 }
632                 assert(price < 1000000000); /* overflow check */
633                 if (price <= opt[cur].price) {
634                     DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal",
635                                 cur, price, opt[cur].price);
636                     opt[cur].mlen = 1;
637                     opt[cur].off = 0;
638                     opt[cur].litlen = litlen;
639                     opt[cur].price = price;
640                     memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
641             }   }
642 
643             /* last match must start at a minimum distance of 8 from oend */
644             if (inr > ilimit) continue;
645 
646             if (cur == last_pos) break;
647 
648              if ( (optLevel==0) /*static*/
649                && (opt[cur+1].price <= opt[cur].price) )
650                 continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
651 
652             {   U32 const ll0 = (opt[cur].mlen != 1);
653                 U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0;
654                 U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0;
655                 U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr);
656                 U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, inr, iend, extDict, maxSearches, mls, sufficient_len, opt[cur].rep, ll0, matches, minMatch);
657                 U32 matchNb;
658                 if (!nbMatches) continue;
659 
660                 {   U32 const maxML = matches[nbMatches-1].len;
661                     DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u",
662                                 cur, nbMatches, maxML);
663 
664                     if ( (maxML > sufficient_len)
665                        | (cur + maxML >= ZSTD_OPT_NUM) ) {
666                         best_mlen = maxML;
667                         best_off = matches[nbMatches-1].off;
668                         last_pos = cur + 1;
669                         goto _shortestPath;
670                     }
671                 }
672 
673                 /* set prices using matches found at position == cur */
674                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
675                     U32 const offset = matches[matchNb].off;
676                     repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
677                     U32 const lastML = matches[matchNb].len;
678                     U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
679                     U32 mlen;
680 
681                     DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u",
682                                 matchNb, matches[matchNb].off, lastML, litlen);
683 
684                     for (mlen = lastML; mlen >= startML; mlen--) {
685                         U32 const pos = cur + mlen;
686                         int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
687 
688                         if ((pos > last_pos) || (price < opt[pos].price)) {
689                             DEBUGLOG(7, "rPos:%u => new better price (%u<%u)",
690                                         pos, price, opt[pos].price);
691                             while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }
692                             opt[pos].mlen = mlen;
693                             opt[pos].off = offset;
694                             opt[pos].litlen = litlen;
695                             opt[pos].price = price;
696                             memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
697                         } else {
698                             if (optLevel==0) break;  /* gets ~+10% speed for about -0.01 ratio loss */
699                         }
700             }   }   }
701         }  /* for (cur = 1; cur <= last_pos; cur++) */
702 
703         best_mlen = opt[last_pos].mlen;
704         best_off = opt[last_pos].off;
705         cur = last_pos - best_mlen;
706 
707 _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
708         assert(opt[0].mlen == 1);
709 
710         /* reverse traversal */
711         DEBUGLOG(7, "start reverse traversal (last_pos:%u, cur:%u)",
712                     last_pos, cur);
713         {   U32 selectedMatchLength = best_mlen;
714             U32 selectedOffset = best_off;
715             U32 pos = cur;
716             while (1) {
717                 U32 const mlen = opt[pos].mlen;
718                 U32 const off = opt[pos].off;
719                 opt[pos].mlen = selectedMatchLength;
720                 opt[pos].off = selectedOffset;
721                 selectedMatchLength = mlen;
722                 selectedOffset = off;
723                 if (mlen > pos) break;
724                 pos -= mlen;
725         }   }
726 
727         /* save sequences */
728         {   U32 pos;
729             for (pos=0; pos < last_pos; ) {
730                 U32 const llen = (U32)(ip - anchor);
731                 U32 const mlen = opt[pos].mlen;
732                 U32 const offset = opt[pos].off;
733                 if (mlen == 1) { ip++; pos++; continue; }  /* literal position => move on */
734                 pos += mlen; ip += mlen;
735 
736                 /* repcodes update : like ZSTD_updateRep(), but update in place */
737                 if (offset >= ZSTD_REP_NUM) {  /* full offset */
738                     rep[2] = rep[1];
739                     rep[1] = rep[0];
740                     rep[0] = offset - ZSTD_REP_MOVE;
741                 } else {   /* repcode */
742                     U32 const repCode = offset + (llen==0);
743                     if (repCode) {  /* note : if repCode==0, no change */
744                         U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
745                         if (repCode >= 2) rep[2] = rep[1];
746                         rep[1] = rep[0];
747                         rep[0] = currentOffset;
748                     }
749                 }
750 
751                 ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen);
752                 ZSTD_storeSeq(seqStorePtr, llen, anchor, offset, mlen-MINMATCH);
753                 anchor = ip;
754         }   }
755         ZSTD_setLog2Prices(optStatePtr);
756     }   /* while (ip < ilimit) */
757 
758     /* Save reps for next block */
759     { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }
760 
761     /* Return the last literals size */
762     return iend - anchor;
763 }
764 
765 
766 size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
767 {
768     DEBUGLOG(5, "ZSTD_compressBlock_btopt");
769     return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
770 }
771 
772 size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
773 {
774     return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
775 }
776 
777 size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
778 {
779     return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
780 }
781 
782 size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
783 {
784     return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
785 }
786