xref: /freebsd/sys/contrib/zstd/lib/compress/zstd_opt.c (revision d8a0fe102c0cfdfcd5b818f850eff09d8536c9bc)
1 /*
2  * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 #include "zstd_opt.h"
12 #include "zstd_lazy.h"
13 
14 
15 #define ZSTD_LITFREQ_ADD    2
16 #define ZSTD_FREQ_DIV       4
17 #define ZSTD_MAX_PRICE      (1<<30)
18 
19 /*-*************************************
20 *  Price functions for optimal parser
21 ***************************************/
22 static void ZSTD_setLog2Prices(optState_t* optPtr)
23 {
24     optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1);
25     optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1);
26     optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1);
27     optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1);
28     optPtr->factor = 1 + ((optPtr->litSum>>5) / optPtr->litLengthSum) + ((optPtr->litSum<<1) / (optPtr->litSum + optPtr->matchSum));
29 }
30 
31 
32 static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSize)
33 {
34     unsigned u;
35 
36     optPtr->cachedLiterals = NULL;
37     optPtr->cachedPrice = optPtr->cachedLitLength = 0;
38     optPtr->staticPrices = 0;
39 
40     if (optPtr->litLengthSum == 0) {
41         if (srcSize <= 1024) optPtr->staticPrices = 1;
42 
43         assert(optPtr->litFreq!=NULL);
44         for (u=0; u<=MaxLit; u++)
45             optPtr->litFreq[u] = 0;
46         for (u=0; u<srcSize; u++)
47             optPtr->litFreq[src[u]]++;
48 
49         optPtr->litSum = 0;
50         optPtr->litLengthSum = MaxLL+1;
51         optPtr->matchLengthSum = MaxML+1;
52         optPtr->offCodeSum = (MaxOff+1);
53         optPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits);
54 
55         for (u=0; u<=MaxLit; u++) {
56             optPtr->litFreq[u] = 1 + (optPtr->litFreq[u]>>ZSTD_FREQ_DIV);
57             optPtr->litSum += optPtr->litFreq[u];
58         }
59         for (u=0; u<=MaxLL; u++)
60             optPtr->litLengthFreq[u] = 1;
61         for (u=0; u<=MaxML; u++)
62             optPtr->matchLengthFreq[u] = 1;
63         for (u=0; u<=MaxOff; u++)
64             optPtr->offCodeFreq[u] = 1;
65     } else {
66         optPtr->matchLengthSum = 0;
67         optPtr->litLengthSum = 0;
68         optPtr->offCodeSum = 0;
69         optPtr->matchSum = 0;
70         optPtr->litSum = 0;
71 
72         for (u=0; u<=MaxLit; u++) {
73             optPtr->litFreq[u] = 1 + (optPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1));
74             optPtr->litSum += optPtr->litFreq[u];
75         }
76         for (u=0; u<=MaxLL; u++) {
77             optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
78             optPtr->litLengthSum += optPtr->litLengthFreq[u];
79         }
80         for (u=0; u<=MaxML; u++) {
81             optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
82             optPtr->matchLengthSum += optPtr->matchLengthFreq[u];
83             optPtr->matchSum += optPtr->matchLengthFreq[u] * (u + 3);
84         }
85         optPtr->matchSum *= ZSTD_LITFREQ_ADD;
86         for (u=0; u<=MaxOff; u++) {
87             optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
88             optPtr->offCodeSum += optPtr->offCodeFreq[u];
89         }
90     }
91 
92     ZSTD_setLog2Prices(optPtr);
93 }
94 
95 
96 static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 litLength, const BYTE* literals)
97 {
98     U32 price, u;
99 
100     if (optPtr->staticPrices)
101         return ZSTD_highbit32((U32)litLength+1) + (litLength*6);
102 
103     if (litLength == 0)
104         return optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[0]+1);
105 
106     /* literals */
107     if (optPtr->cachedLiterals == literals) {
108         U32 const additional = litLength - optPtr->cachedLitLength;
109         const BYTE* literals2 = optPtr->cachedLiterals + optPtr->cachedLitLength;
110         price = optPtr->cachedPrice + additional * optPtr->log2litSum;
111         for (u=0; u < additional; u++)
112             price -= ZSTD_highbit32(optPtr->litFreq[literals2[u]]+1);
113         optPtr->cachedPrice = price;
114         optPtr->cachedLitLength = litLength;
115     } else {
116         price = litLength * optPtr->log2litSum;
117         for (u=0; u < litLength; u++)
118             price -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);
119 
120         if (litLength >= 12) {
121             optPtr->cachedLiterals = literals;
122             optPtr->cachedPrice = price;
123             optPtr->cachedLitLength = litLength;
124         }
125     }
126 
127     /* literal Length */
128     {   const BYTE LL_deltaCode = 19;
129         const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
130         price += LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
131     }
132 
133     return price;
134 }
135 
136 
137 FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
138 {
139     /* offset */
140     U32 price;
141     BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
142 
143     if (optPtr->staticPrices)
144         return ZSTD_getLiteralPrice(optPtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode;
145 
146     price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
147     if (!ultra && offCode >= 20) price += (offCode-19)*2;
148 
149     /* match Length */
150     {   const BYTE ML_deltaCode = 36;
151         const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
152         price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1);
153     }
154 
155     return price + ZSTD_getLiteralPrice(optPtr, litLength, literals) + optPtr->factor;
156 }
157 
158 
159 static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
160 {
161     U32 u;
162 
163     /* literals */
164     optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
165     for (u=0; u < litLength; u++)
166         optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
167 
168     /* literal Length */
169     {   const BYTE LL_deltaCode = 19;
170         const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
171         optPtr->litLengthFreq[llCode]++;
172         optPtr->litLengthSum++;
173     }
174 
175     /* match offset */
176     {   BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
177         optPtr->offCodeSum++;
178         optPtr->offCodeFreq[offCode]++;
179     }
180 
181     /* match Length */
182     {   const BYTE ML_deltaCode = 36;
183         const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
184         optPtr->matchLengthFreq[mlCode]++;
185         optPtr->matchLengthSum++;
186     }
187 
188     ZSTD_setLog2Prices(optPtr);
189 }
190 
191 
192 #define SET_PRICE(pos, mlen_, offset_, litlen_, price_)   \
193     {                                                 \
194         while (last_pos < pos)  { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \
195         opt[pos].mlen = mlen_;                         \
196         opt[pos].off = offset_;                        \
197         opt[pos].litlen = litlen_;                     \
198         opt[pos].price = price_;                       \
199     }
200 
201 
202 /* function safe only for comparisons */
203 static U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
204 {
205     switch (length)
206     {
207     default :
208     case 4 : return MEM_read32(memPtr);
209     case 3 : if (MEM_isLittleEndian())
210                 return MEM_read32(memPtr)<<8;
211              else
212                 return MEM_read32(memPtr)>>8;
213     }
214 }
215 
216 
217 /* Update hashTable3 up to ip (excluded)
218    Assumption : always within prefix (i.e. not within extDict) */
219 static
220 U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
221 {
222     U32* const hashTable3  = zc->hashTable3;
223     U32 const hashLog3  = zc->hashLog3;
224     const BYTE* const base = zc->base;
225     U32 idx = zc->nextToUpdate3;
226     const U32 target = zc->nextToUpdate3 = (U32)(ip - base);
227     const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3);
228 
229     while(idx < target) {
230         hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
231         idx++;
232     }
233 
234     return hashTable3[hash3];
235 }
236 
237 
238 /*-*************************************
239 *  Binary Tree search
240 ***************************************/
241 static U32 ZSTD_insertBtAndGetAllMatches (
242                         ZSTD_CCtx* zc,
243                         const BYTE* const ip, const BYTE* const iLimit,
244                         U32 nbCompares, const U32 mls,
245                         U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen)
246 {
247     const BYTE* const base = zc->base;
248     const U32 current = (U32)(ip-base);
249     const U32 hashLog = zc->appliedParams.cParams.hashLog;
250     const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
251     U32* const hashTable = zc->hashTable;
252     U32 matchIndex  = hashTable[h];
253     U32* const bt   = zc->chainTable;
254     const U32 btLog = zc->appliedParams.cParams.chainLog - 1;
255     const U32 btMask= (1U << btLog) - 1;
256     size_t commonLengthSmaller=0, commonLengthLarger=0;
257     const BYTE* const dictBase = zc->dictBase;
258     const U32 dictLimit = zc->dictLimit;
259     const BYTE* const dictEnd = dictBase + dictLimit;
260     const BYTE* const prefixStart = base + dictLimit;
261     const U32 btLow = btMask >= current ? 0 : current - btMask;
262     const U32 windowLow = zc->lowLimit;
263     U32* smallerPtr = bt + 2*(current&btMask);
264     U32* largerPtr  = bt + 2*(current&btMask) + 1;
265     U32 matchEndIdx = current+8;
266     U32 dummy32;   /* to be nullified at the end */
267     U32 mnum = 0;
268 
269     const U32 minMatch = (mls == 3) ? 3 : 4;
270     size_t bestLength = minMatchLen-1;
271 
272     if (minMatch == 3) { /* HC3 match finder */
273         U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
274         if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) {
275             const BYTE* match;
276             size_t currentMl=0;
277             if ((!extDict) || matchIndex3 >= dictLimit) {
278                 match = base + matchIndex3;
279                 if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
280             } else {
281                 match = dictBase + matchIndex3;
282                 if (ZSTD_readMINMATCH(match, MINMATCH) == ZSTD_readMINMATCH(ip, MINMATCH))    /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
283                     currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
284             }
285 
286             /* save best solution */
287             if (currentMl > bestLength) {
288                 bestLength = currentMl;
289                 matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3;
290                 matches[mnum].len = (U32)currentMl;
291                 mnum++;
292                 if (currentMl > ZSTD_OPT_NUM) goto update;
293                 if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/
294             }
295         }
296     }
297 
298     hashTable[h] = current;   /* Update Hash Table */
299 
300     while (nbCompares-- && (matchIndex > windowLow)) {
301         U32* nextPtr = bt + 2*(matchIndex & btMask);
302         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
303         const BYTE* match;
304 
305         if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
306             match = base + matchIndex;
307             if (match[matchLength] == ip[matchLength]) {
308                 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1;
309             }
310         } else {
311             match = dictBase + matchIndex;
312             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
313             if (matchIndex+matchLength >= dictLimit)
314                 match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
315         }
316 
317         if (matchLength > bestLength) {
318             if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength;
319             bestLength = matchLength;
320             matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex;
321             matches[mnum].len = (U32)matchLength;
322             mnum++;
323             if (matchLength > ZSTD_OPT_NUM) break;
324             if (ip+matchLength == iLimit)   /* equal : no way to know if inf or sup */
325                 break;   /* drop, to guarantee consistency (miss a little bit of compression) */
326         }
327 
328         if (match[matchLength] < ip[matchLength]) {
329             /* match is smaller than current */
330             *smallerPtr = matchIndex;             /* update smaller idx */
331             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
332             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
333             smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
334             matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
335         } else {
336             /* match is larger than current */
337             *largerPtr = matchIndex;
338             commonLengthLarger = matchLength;
339             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
340             largerPtr = nextPtr;
341             matchIndex = nextPtr[0];
342     }   }
343 
344     *smallerPtr = *largerPtr = 0;
345 
346 update:
347     zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
348     return mnum;
349 }
350 
351 
352 /** Tree updater, providing best match */
353 static U32 ZSTD_BtGetAllMatches (
354                         ZSTD_CCtx* zc,
355                         const BYTE* const ip, const BYTE* const iLimit,
356                         const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
357 {
358     if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
359     ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
360     return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
361 }
362 
363 
364 static U32 ZSTD_BtGetAllMatches_selectMLS (
365                         ZSTD_CCtx* zc,   /* Index table will be updated */
366                         const BYTE* ip, const BYTE* const iHighLimit,
367                         const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
368 {
369     switch(matchLengthSearch)
370     {
371     case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
372     default :
373     case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
374     case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
375     case 7 :
376     case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
377     }
378 }
379 
380 /** Tree updater, providing best match */
381 static U32 ZSTD_BtGetAllMatches_extDict (
382                         ZSTD_CCtx* zc,
383                         const BYTE* const ip, const BYTE* const iLimit,
384                         const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
385 {
386     if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
387     ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
388     return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
389 }
390 
391 
392 static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
393                         ZSTD_CCtx* zc,   /* Index table will be updated */
394                         const BYTE* ip, const BYTE* const iHighLimit,
395                         const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
396 {
397     switch(matchLengthSearch)
398     {
399     case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
400     default :
401     case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
402     case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
403     case 7 :
404     case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
405     }
406 }
407 
408 
409 /*-*******************************
410 *  Optimal parser
411 *********************************/
412 FORCE_INLINE_TEMPLATE
413 size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
414                                       const void* src, size_t srcSize, const int ultra)
415 {
416     seqStore_t* seqStorePtr = &(ctx->seqStore);
417     optState_t* optStatePtr = &(ctx->optState);
418     const BYTE* const istart = (const BYTE*)src;
419     const BYTE* ip = istart;
420     const BYTE* anchor = istart;
421     const BYTE* const iend = istart + srcSize;
422     const BYTE* const ilimit = iend - 8;
423     const BYTE* const base = ctx->base;
424     const BYTE* const prefixStart = base + ctx->dictLimit;
425 
426     const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
427     const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
428     const U32 mls = ctx->appliedParams.cParams.searchLength;
429     const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
430 
431     ZSTD_optimal_t* opt = optStatePtr->priceTable;
432     ZSTD_match_t* matches = optStatePtr->matchTable;
433     const BYTE* inr;
434     U32 offset, rep[ZSTD_REP_NUM];
435 
436     /* init */
437     ctx->nextToUpdate3 = ctx->nextToUpdate;
438     ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
439     ip += (ip==prefixStart);
440     { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }
441 
442     /* Match Loop */
443     while (ip < ilimit) {
444         U32 cur, match_num, last_pos, litlen, price;
445         U32 u, mlen, best_mlen, best_off, litLength;
446         memset(opt, 0, sizeof(ZSTD_optimal_t));
447         last_pos = 0;
448         litlen = (U32)(ip - anchor);
449 
450         /* check repCode */
451         {   U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
452             for (i=(ip == anchor); i<last_i; i++) {
453                 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
454                 if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
455                     && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) {
456                     mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
457                     if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
458                         best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
459                         goto _storeSequence;
460                     }
461                     best_off = i - (ip == anchor);
462                     do {
463                         price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
464                         if (mlen > last_pos || price < opt[mlen].price)
465                             SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
466                         mlen--;
467                     } while (mlen >= minMatch);
468         }   }   }
469 
470         match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);
471 
472         if (!last_pos && !match_num) { ip++; continue; }
473 
474         if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
475             best_mlen = matches[match_num-1].len;
476             best_off = matches[match_num-1].off;
477             cur = 0;
478             last_pos = 1;
479             goto _storeSequence;
480         }
481 
482         /* set prices using matches at position = 0 */
483         best_mlen = (last_pos) ? last_pos : minMatch;
484         for (u = 0; u < match_num; u++) {
485             mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
486             best_mlen = matches[u].len;
487             while (mlen <= best_mlen) {
488                 price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
489                 if (mlen > last_pos || price < opt[mlen].price)
490                     SET_PRICE(mlen, mlen, matches[u].off, litlen, price);   /* note : macro modifies last_pos */
491                 mlen++;
492         }   }
493 
494         if (last_pos < minMatch) { ip++; continue; }
495 
496         /* initialize opt[0] */
497         { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
498         opt[0].mlen = 1;
499         opt[0].litlen = litlen;
500 
501          /* check further positions */
502         for (cur = 1; cur <= last_pos; cur++) {
503            inr = ip + cur;
504 
505            if (opt[cur-1].mlen == 1) {
506                 litlen = opt[cur-1].litlen + 1;
507                 if (cur > litlen) {
508                     price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen);
509                 } else
510                     price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor);
511            } else {
512                 litlen = 1;
513                 price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1);
514            }
515 
516            if (cur > last_pos || price <= opt[cur].price)
517                 SET_PRICE(cur, 1, 0, litlen, price);
518 
519            if (cur == last_pos) break;
520 
521            if (inr > ilimit)  /* last match must start at a minimum distance of 8 from oend */
522                continue;
523 
524            mlen = opt[cur].mlen;
525            if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
526                 opt[cur].rep[2] = opt[cur-mlen].rep[1];
527                 opt[cur].rep[1] = opt[cur-mlen].rep[0];
528                 opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
529            } else {
530                 opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
531                 opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
532                 /* If opt[cur].off == ZSTD_REP_MOVE_OPT, then mlen != 1.
533                  * offset ZSTD_REP_MOVE_OPT is used for the special case
534                  * litLength == 0, where offset 0 means something special.
535                  * mlen == 1 means the previous byte was stored as a literal,
536                  * so they are mutually exclusive.
537                  */
538                 assert(!(opt[cur].off == ZSTD_REP_MOVE_OPT && mlen == 1));
539                 opt[cur].rep[0] = (opt[cur].off == ZSTD_REP_MOVE_OPT) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
540            }
541 
542             best_mlen = minMatch;
543             {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
544                 for (i=(opt[cur].mlen != 1); i<last_i; i++) {  /* check rep */
545                     const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
546                     if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
547                        && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) {
548                        mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
549 
550                        if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
551                             best_mlen = mlen; best_off = i; last_pos = cur + 1;
552                             goto _storeSequence;
553                        }
554 
555                        best_off = i - (opt[cur].mlen != 1);
556                        if (mlen > best_mlen) best_mlen = mlen;
557 
558                        do {
559                            if (opt[cur].mlen == 1) {
560                                 litlen = opt[cur].litlen;
561                                 if (cur > litlen) {
562                                     price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
563                                 } else
564                                     price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
565                             } else {
566                                 litlen = 0;
567                                 price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
568                             }
569 
570                             if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
571                                 SET_PRICE(cur + mlen, mlen, i, litlen, price);
572                             mlen--;
573                         } while (mlen >= minMatch);
574             }   }   }
575 
576             match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);
577 
578             if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
579                 best_mlen = matches[match_num-1].len;
580                 best_off = matches[match_num-1].off;
581                 last_pos = cur + 1;
582                 goto _storeSequence;
583             }
584 
585             /* set prices using matches at position = cur */
586             for (u = 0; u < match_num; u++) {
587                 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
588                 best_mlen = matches[u].len;
589 
590                 while (mlen <= best_mlen) {
591                     if (opt[cur].mlen == 1) {
592                         litlen = opt[cur].litlen;
593                         if (cur > litlen)
594                             price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
595                         else
596                             price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
597                     } else {
598                         litlen = 0;
599                         price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
600                     }
601 
602                     if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
603                         SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
604 
605                     mlen++;
606         }   }   }
607 
608         best_mlen = opt[last_pos].mlen;
609         best_off = opt[last_pos].off;
610         cur = last_pos - best_mlen;
611 
612         /* store sequence */
613 _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
614         opt[0].mlen = 1;
615 
616         while (1) {
617             mlen = opt[cur].mlen;
618             offset = opt[cur].off;
619             opt[cur].mlen = best_mlen;
620             opt[cur].off = best_off;
621             best_mlen = mlen;
622             best_off = offset;
623             if (mlen > cur) break;
624             cur -= mlen;
625         }
626 
627         for (u = 0; u <= last_pos;) {
628             u += opt[u].mlen;
629         }
630 
631         for (cur=0; cur < last_pos; ) {
632             mlen = opt[cur].mlen;
633             if (mlen == 1) { ip++; cur++; continue; }
634             offset = opt[cur].off;
635             cur += mlen;
636             litLength = (U32)(ip - anchor);
637 
638             if (offset > ZSTD_REP_MOVE_OPT) {
639                 rep[2] = rep[1];
640                 rep[1] = rep[0];
641                 rep[0] = offset - ZSTD_REP_MOVE_OPT;
642                 offset--;
643             } else {
644                 if (offset != 0) {
645                     best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
646                     if (offset != 1) rep[2] = rep[1];
647                     rep[1] = rep[0];
648                     rep[0] = best_off;
649                 }
650                 if (litLength==0) offset--;
651             }
652 
653             ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH);
654             ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
655             anchor = ip = ip + mlen;
656     }    }   /* for (cur=0; cur < last_pos; ) */
657 
658     /* Save reps for next block */
659     { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }
660 
661     /* Return the last literals size */
662     return iend - anchor;
663 }
664 
665 
666 size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
667 {
668     return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
669 }
670 
671 size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
672 {
673     return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
674 }
675 
676 
677 FORCE_INLINE_TEMPLATE
678 size_t ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
679                                      const void* src, size_t srcSize, const int ultra)
680 {
681     seqStore_t* seqStorePtr = &(ctx->seqStore);
682     optState_t* optStatePtr = &(ctx->optState);
683     const BYTE* const istart = (const BYTE*)src;
684     const BYTE* ip = istart;
685     const BYTE* anchor = istart;
686     const BYTE* const iend = istart + srcSize;
687     const BYTE* const ilimit = iend - 8;
688     const BYTE* const base = ctx->base;
689     const U32 lowestIndex = ctx->lowLimit;
690     const U32 dictLimit = ctx->dictLimit;
691     const BYTE* const prefixStart = base + dictLimit;
692     const BYTE* const dictBase = ctx->dictBase;
693     const BYTE* const dictEnd  = dictBase + dictLimit;
694 
695     const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
696     const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
697     const U32 mls = ctx->appliedParams.cParams.searchLength;
698     const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
699 
700     ZSTD_optimal_t* opt = optStatePtr->priceTable;
701     ZSTD_match_t* matches = optStatePtr->matchTable;
702     const BYTE* inr;
703 
704     /* init */
705     U32 offset, rep[ZSTD_REP_NUM];
706     { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }
707 
708     ctx->nextToUpdate3 = ctx->nextToUpdate;
709     ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
710     ip += (ip==prefixStart);
711 
712     /* Match Loop */
713     while (ip < ilimit) {
714         U32 cur, match_num, last_pos, litlen, price;
715         U32 u, mlen, best_mlen, best_off, litLength;
716         U32 current = (U32)(ip-base);
717         memset(opt, 0, sizeof(ZSTD_optimal_t));
718         last_pos = 0;
719         opt[0].litlen = (U32)(ip - anchor);
720 
721         /* check repCode */
722         {   U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
723             for (i = (ip==anchor); i<last_i; i++) {
724                 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
725                 const U32 repIndex = (U32)(current - repCur);
726                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
727                 const BYTE* const repMatch = repBase + repIndex;
728                 if ( (repCur > 0 && repCur <= (S32)current)
729                    && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
730                    && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
731                     /* repcode detected we should take it */
732                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
733                     mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
734 
735                     if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
736                         best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
737                         goto _storeSequence;
738                     }
739 
740                     best_off = i - (ip==anchor);
741                     litlen = opt[0].litlen;
742                     do {
743                         price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
744                         if (mlen > last_pos || price < opt[mlen].price)
745                             SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
746                         mlen--;
747                     } while (mlen >= minMatch);
748         }   }   }
749 
750         match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch);  /* first search (depth 0) */
751 
752         if (!last_pos && !match_num) { ip++; continue; }
753 
754         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
755         opt[0].mlen = 1;
756 
757         if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
758             best_mlen = matches[match_num-1].len;
759             best_off = matches[match_num-1].off;
760             cur = 0;
761             last_pos = 1;
762             goto _storeSequence;
763         }
764 
765         best_mlen = (last_pos) ? last_pos : minMatch;
766 
767         /* set prices using matches at position = 0 */
768         for (u = 0; u < match_num; u++) {
769             mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
770             best_mlen = matches[u].len;
771             litlen = opt[0].litlen;
772             while (mlen <= best_mlen) {
773                 price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
774                 if (mlen > last_pos || price < opt[mlen].price)
775                     SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
776                 mlen++;
777         }   }
778 
779         if (last_pos < minMatch) {
780             ip++; continue;
781         }
782 
783         /* check further positions */
784         for (cur = 1; cur <= last_pos; cur++) {
785             inr = ip + cur;
786 
787             if (opt[cur-1].mlen == 1) {
788                 litlen = opt[cur-1].litlen + 1;
789                 if (cur > litlen) {
790                     price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen);
791                 } else
792                     price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor);
793             } else {
794                 litlen = 1;
795                 price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1);
796             }
797 
798             if (cur > last_pos || price <= opt[cur].price)
799                 SET_PRICE(cur, 1, 0, litlen, price);
800 
801             if (cur == last_pos) break;
802 
803             if (inr > ilimit)  /* last match must start at a minimum distance of 8 from oend */
804                 continue;
805 
806             mlen = opt[cur].mlen;
807             if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
808                 opt[cur].rep[2] = opt[cur-mlen].rep[1];
809                 opt[cur].rep[1] = opt[cur-mlen].rep[0];
810                 opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
811             } else {
812                 opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
813                 opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
814                 assert(!(opt[cur].off == ZSTD_REP_MOVE_OPT && mlen == 1));
815                 opt[cur].rep[0] = (opt[cur].off == ZSTD_REP_MOVE_OPT) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
816             }
817 
818             best_mlen = minMatch;
819             {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
820                 for (i = (mlen != 1); i<last_i; i++) {
821                     const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
822                     const U32 repIndex = (U32)(current+cur - repCur);
823                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
824                     const BYTE* const repMatch = repBase + repIndex;
825                     if ( (repCur > 0 && repCur <= (S32)(current+cur))
826                       && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
827                       && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
828                         /* repcode detected */
829                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
830                         mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
831 
832                         if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
833                             best_mlen = mlen; best_off = i; last_pos = cur + 1;
834                             goto _storeSequence;
835                         }
836 
837                         best_off = i - (opt[cur].mlen != 1);
838                         if (mlen > best_mlen) best_mlen = mlen;
839 
840                         do {
841                             if (opt[cur].mlen == 1) {
842                                 litlen = opt[cur].litlen;
843                                 if (cur > litlen) {
844                                     price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
845                                 } else
846                                     price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
847                             } else {
848                                 litlen = 0;
849                                 price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
850                             }
851 
852                             if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
853                                 SET_PRICE(cur + mlen, mlen, i, litlen, price);
854                             mlen--;
855                         } while (mlen >= minMatch);
856             }   }   }
857 
858             match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
859 
860             if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
861                 best_mlen = matches[match_num-1].len;
862                 best_off = matches[match_num-1].off;
863                 last_pos = cur + 1;
864                 goto _storeSequence;
865             }
866 
867             /* set prices using matches at position = cur */
868             for (u = 0; u < match_num; u++) {
869                 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
870                 best_mlen = matches[u].len;
871 
872                 while (mlen <= best_mlen) {
873                     if (opt[cur].mlen == 1) {
874                         litlen = opt[cur].litlen;
875                         if (cur > litlen)
876                             price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
877                         else
878                             price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
879                     } else {
880                         litlen = 0;
881                         price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
882                     }
883 
884                     if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
885                         SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
886 
887                     mlen++;
888         }   }   }   /* for (cur = 1; cur <= last_pos; cur++) */
889 
890         best_mlen = opt[last_pos].mlen;
891         best_off = opt[last_pos].off;
892         cur = last_pos - best_mlen;
893 
894         /* store sequence */
895 _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
896         opt[0].mlen = 1;
897 
898         while (1) {
899             mlen = opt[cur].mlen;
900             offset = opt[cur].off;
901             opt[cur].mlen = best_mlen;
902             opt[cur].off = best_off;
903             best_mlen = mlen;
904             best_off = offset;
905             if (mlen > cur) break;
906             cur -= mlen;
907         }
908 
909         for (u = 0; u <= last_pos; ) {
910             u += opt[u].mlen;
911         }
912 
913         for (cur=0; cur < last_pos; ) {
914             mlen = opt[cur].mlen;
915             if (mlen == 1) { ip++; cur++; continue; }
916             offset = opt[cur].off;
917             cur += mlen;
918             litLength = (U32)(ip - anchor);
919 
920             if (offset > ZSTD_REP_MOVE_OPT) {
921                 rep[2] = rep[1];
922                 rep[1] = rep[0];
923                 rep[0] = offset - ZSTD_REP_MOVE_OPT;
924                 offset--;
925             } else {
926                 if (offset != 0) {
927                     best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
928                     if (offset != 1) rep[2] = rep[1];
929                     rep[1] = rep[0];
930                     rep[0] = best_off;
931                 }
932 
933                 if (litLength==0) offset--;
934             }
935 
936             ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH);
937             ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
938             anchor = ip = ip + mlen;
939     }    }   /* for (cur=0; cur < last_pos; ) */
940 
941     /* Save reps for next block */
942     { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }
943 
944     /* Return the last literals size */
945     return iend - anchor;
946 }
947 
948 
949 size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
950 {
951     return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
952 }
953 
954 size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
955 {
956     return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
957 }
958