1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
2 /*
3 * Copyright (c) Meta Platforms, Inc. and affiliates.
4 * All rights reserved.
5 *
6 * This source code is licensed under both the BSD-style license (found in the
7 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8 * in the COPYING file in the root directory of this source tree).
9 * You may select, at your option, one of the above-listed licenses.
10 */
11
12 #include "zstd_compress_internal.h"
13 #include "hist.h"
14 #include "zstd_opt.h"
15
16 #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
17 || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
18 || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
19
20 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
21 #define ZSTD_MAX_PRICE (1<<30)
22
23 #define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
24
25
26 /*-*************************************
27 * Price functions for optimal parser
28 ***************************************/
29
30 #if 0 /* approximation at bit level (for tests) */
31 # define BITCOST_ACCURACY 0
32 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
33 # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
34 #elif 0 /* fractional bit accuracy (for tests) */
35 # define BITCOST_ACCURACY 8
36 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
37 # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
38 #else /* opt==approx, ultra==accurate */
39 # define BITCOST_ACCURACY 8
40 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
41 # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
42 #endif
43
44 /* ZSTD_bitWeight() :
45 * provide estimated "cost" of a stat in full bits only */
ZSTD_bitWeight(U32 stat)46 MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
47 {
48 return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
49 }
50
51 /* ZSTD_fracWeight() :
52 * provide fractional-bit "cost" of a stat,
53 * using linear interpolation approximation */
ZSTD_fracWeight(U32 rawStat)54 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
55 {
56 U32 const stat = rawStat + 1;
57 U32 const hb = ZSTD_highbit32(stat);
58 U32 const BWeight = hb * BITCOST_MULTIPLIER;
59 /* Fweight was meant for "Fractional weight"
60 * but it's effectively a value between 1 and 2
61 * using fixed point arithmetic */
62 U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
63 U32 const weight = BWeight + FWeight;
64 assert(hb + BITCOST_ACCURACY < 31);
65 return weight;
66 }
67
68 #if (DEBUGLEVEL>=2)
69 /* debugging function,
70 * @return price in bytes as fractional value
71 * for debug messages only */
ZSTD_fCost(int price)72 MEM_STATIC double ZSTD_fCost(int price)
73 {
74 return (double)price / (BITCOST_MULTIPLIER*8);
75 }
76 #endif
77
ZSTD_compressedLiterals(optState_t const * const optPtr)78 static int ZSTD_compressedLiterals(optState_t const* const optPtr)
79 {
80 return optPtr->literalCompressionMode != ZSTD_ps_disable;
81 }
82
ZSTD_setBasePrices(optState_t * optPtr,int optLevel)83 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
84 {
85 if (ZSTD_compressedLiterals(optPtr))
86 optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
87 optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
88 optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
89 optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
90 }
91
92
sum_u32(const unsigned table[],size_t nbElts)93 static U32 sum_u32(const unsigned table[], size_t nbElts)
94 {
95 size_t n;
96 U32 total = 0;
97 for (n=0; n<nbElts; n++) {
98 total += table[n];
99 }
100 return total;
101 }
102
103 typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
104
105 static U32
ZSTD_downscaleStats(unsigned * table,U32 lastEltIndex,U32 shift,base_directive_e base1)106 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
107 {
108 U32 s, sum=0;
109 DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
110 (unsigned)lastEltIndex+1, (unsigned)shift );
111 assert(shift < 30);
112 for (s=0; s<lastEltIndex+1; s++) {
113 unsigned const base = base1 ? 1 : (table[s]>0);
114 unsigned const newStat = base + (table[s] >> shift);
115 sum += newStat;
116 table[s] = newStat;
117 }
118 return sum;
119 }
120
121 /* ZSTD_scaleStats() :
122 * reduce all elt frequencies in table if sum too large
123 * return the resulting sum of elements */
ZSTD_scaleStats(unsigned * table,U32 lastEltIndex,U32 logTarget)124 static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
125 {
126 U32 const prevsum = sum_u32(table, lastEltIndex+1);
127 U32 const factor = prevsum >> logTarget;
128 DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
129 assert(logTarget < 30);
130 if (factor <= 1) return prevsum;
131 return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
132 }
133
134 /* ZSTD_rescaleFreqs() :
135 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
136 * take hints from dictionary if there is one
137 * and init from zero if there is none,
138 * using src for literals stats, and baseline stats for sequence symbols
139 * otherwise downscale existing stats, to be used as seed for next block.
140 */
141 static void
ZSTD_rescaleFreqs(optState_t * const optPtr,const BYTE * const src,size_t const srcSize,int const optLevel)142 ZSTD_rescaleFreqs(optState_t* const optPtr,
143 const BYTE* const src, size_t const srcSize,
144 int const optLevel)
145 {
146 int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
147 DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
148 optPtr->priceType = zop_dynamic;
149
150 if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
151
152 /* heuristic: use pre-defined stats for too small inputs */
153 if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
154 DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
155 optPtr->priceType = zop_predef;
156 }
157
158 assert(optPtr->symbolCosts != NULL);
159 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
160
161 /* huffman stats covering the full value set : table presumed generated by dictionary */
162 optPtr->priceType = zop_dynamic;
163
164 if (compressedLiterals) {
165 /* generate literals statistics from huffman table */
166 unsigned lit;
167 assert(optPtr->litFreq != NULL);
168 optPtr->litSum = 0;
169 for (lit=0; lit<=MaxLit; lit++) {
170 U32 const scaleLog = 11; /* scale to 2K */
171 U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
172 assert(bitCost <= scaleLog);
173 optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
174 optPtr->litSum += optPtr->litFreq[lit];
175 } }
176
177 { unsigned ll;
178 FSE_CState_t llstate;
179 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
180 optPtr->litLengthSum = 0;
181 for (ll=0; ll<=MaxLL; ll++) {
182 U32 const scaleLog = 10; /* scale to 1K */
183 U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
184 assert(bitCost < scaleLog);
185 optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
186 optPtr->litLengthSum += optPtr->litLengthFreq[ll];
187 } }
188
189 { unsigned ml;
190 FSE_CState_t mlstate;
191 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
192 optPtr->matchLengthSum = 0;
193 for (ml=0; ml<=MaxML; ml++) {
194 U32 const scaleLog = 10;
195 U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
196 assert(bitCost < scaleLog);
197 optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
198 optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
199 } }
200
201 { unsigned of;
202 FSE_CState_t ofstate;
203 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
204 optPtr->offCodeSum = 0;
205 for (of=0; of<=MaxOff; of++) {
206 U32 const scaleLog = 10;
207 U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
208 assert(bitCost < scaleLog);
209 optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
210 optPtr->offCodeSum += optPtr->offCodeFreq[of];
211 } }
212
213 } else { /* first block, no dictionary */
214
215 assert(optPtr->litFreq != NULL);
216 if (compressedLiterals) {
217 /* base initial cost of literals on direct frequency within src */
218 unsigned lit = MaxLit;
219 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
220 optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
221 }
222
223 { unsigned const baseLLfreqs[MaxLL+1] = {
224 4, 2, 1, 1, 1, 1, 1, 1,
225 1, 1, 1, 1, 1, 1, 1, 1,
226 1, 1, 1, 1, 1, 1, 1, 1,
227 1, 1, 1, 1, 1, 1, 1, 1,
228 1, 1, 1, 1
229 };
230 ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
231 optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
232 }
233
234 { unsigned ml;
235 for (ml=0; ml<=MaxML; ml++)
236 optPtr->matchLengthFreq[ml] = 1;
237 }
238 optPtr->matchLengthSum = MaxML+1;
239
240 { unsigned const baseOFCfreqs[MaxOff+1] = {
241 6, 2, 1, 1, 2, 3, 4, 4,
242 4, 3, 2, 1, 1, 1, 1, 1,
243 1, 1, 1, 1, 1, 1, 1, 1,
244 1, 1, 1, 1, 1, 1, 1, 1
245 };
246 ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
247 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
248 }
249
250 }
251
252 } else { /* new block : scale down accumulated statistics */
253
254 if (compressedLiterals)
255 optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
256 optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
257 optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
258 optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
259 }
260
261 ZSTD_setBasePrices(optPtr, optLevel);
262 }
263
264 /* ZSTD_rawLiteralsCost() :
265 * price of literals (only) in specified segment (which length can be 0).
266 * does not include price of literalLength symbol */
ZSTD_rawLiteralsCost(const BYTE * const literals,U32 const litLength,const optState_t * const optPtr,int optLevel)267 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
268 const optState_t* const optPtr,
269 int optLevel)
270 {
271 DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
272 if (litLength == 0) return 0;
273
274 if (!ZSTD_compressedLiterals(optPtr))
275 return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */
276
277 if (optPtr->priceType == zop_predef)
278 return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
279
280 /* dynamic statistics */
281 { U32 price = optPtr->litSumBasePrice * litLength;
282 U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
283 U32 u;
284 assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
285 for (u=0; u < litLength; u++) {
286 U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
287 if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
288 price -= litPrice;
289 }
290 return price;
291 }
292 }
293
294 /* ZSTD_litLengthPrice() :
295 * cost of literalLength symbol */
ZSTD_litLengthPrice(U32 const litLength,const optState_t * const optPtr,int optLevel)296 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
297 {
298 assert(litLength <= ZSTD_BLOCKSIZE_MAX);
299 if (optPtr->priceType == zop_predef)
300 return WEIGHT(litLength, optLevel);
301
302 /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
303 * because it isn't representable in the zstd format.
304 * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
305 * In such a case, the block would be all literals.
306 */
307 if (litLength == ZSTD_BLOCKSIZE_MAX)
308 return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
309
310 /* dynamic statistics */
311 { U32 const llCode = ZSTD_LLcode(litLength);
312 return (LL_bits[llCode] * BITCOST_MULTIPLIER)
313 + optPtr->litLengthSumBasePrice
314 - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
315 }
316 }
317
318 /* ZSTD_getMatchPrice() :
319 * Provides the cost of the match part (offset + matchLength) of a sequence.
320 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
321 * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
322 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
323 */
324 FORCE_INLINE_TEMPLATE U32
ZSTD_getMatchPrice(U32 const offBase,U32 const matchLength,const optState_t * const optPtr,int const optLevel)325 ZSTD_getMatchPrice(U32 const offBase,
326 U32 const matchLength,
327 const optState_t* const optPtr,
328 int const optLevel)
329 {
330 U32 price;
331 U32 const offCode = ZSTD_highbit32(offBase);
332 U32 const mlBase = matchLength - MINMATCH;
333 assert(matchLength >= MINMATCH);
334
335 if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
336 return WEIGHT(mlBase, optLevel)
337 + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
338
339 /* dynamic statistics */
340 price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
341 if ((optLevel<2) /*static*/ && offCode >= 20)
342 price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
343
344 /* match Length */
345 { U32 const mlCode = ZSTD_MLcode(mlBase);
346 price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
347 }
348
349 price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
350
351 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
352 return price;
353 }
354
355 /* ZSTD_updateStats() :
356 * assumption : literals + litLength <= iend */
ZSTD_updateStats(optState_t * const optPtr,U32 litLength,const BYTE * literals,U32 offBase,U32 matchLength)357 static void ZSTD_updateStats(optState_t* const optPtr,
358 U32 litLength, const BYTE* literals,
359 U32 offBase, U32 matchLength)
360 {
361 /* literals */
362 if (ZSTD_compressedLiterals(optPtr)) {
363 U32 u;
364 for (u=0; u < litLength; u++)
365 optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
366 optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
367 }
368
369 /* literal Length */
370 { U32 const llCode = ZSTD_LLcode(litLength);
371 optPtr->litLengthFreq[llCode]++;
372 optPtr->litLengthSum++;
373 }
374
375 /* offset code : follows storeSeq() numeric representation */
376 { U32 const offCode = ZSTD_highbit32(offBase);
377 assert(offCode <= MaxOff);
378 optPtr->offCodeFreq[offCode]++;
379 optPtr->offCodeSum++;
380 }
381
382 /* match Length */
383 { U32 const mlBase = matchLength - MINMATCH;
384 U32 const mlCode = ZSTD_MLcode(mlBase);
385 optPtr->matchLengthFreq[mlCode]++;
386 optPtr->matchLengthSum++;
387 }
388 }
389
390
391 /* ZSTD_readMINMATCH() :
392 * function safe only for comparisons
393 * assumption : memPtr must be at least 4 bytes before end of buffer */
ZSTD_readMINMATCH(const void * memPtr,U32 length)394 MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
395 {
396 switch (length)
397 {
398 default :
399 case 4 : return MEM_read32(memPtr);
400 case 3 : if (MEM_isLittleEndian())
401 return MEM_read32(memPtr)<<8;
402 else
403 return MEM_read32(memPtr)>>8;
404 }
405 }
406
407
408 /* Update hashTable3 up to ip (excluded)
409 Assumption : always within prefix (i.e. not within extDict) */
410 static
411 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_insertAndFindFirstIndexHash3(const ZSTD_MatchState_t * ms,U32 * nextToUpdate3,const BYTE * const ip)412 U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_MatchState_t* ms,
413 U32* nextToUpdate3,
414 const BYTE* const ip)
415 {
416 U32* const hashTable3 = ms->hashTable3;
417 U32 const hashLog3 = ms->hashLog3;
418 const BYTE* const base = ms->window.base;
419 U32 idx = *nextToUpdate3;
420 U32 const target = (U32)(ip - base);
421 size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
422 assert(hashLog3 > 0);
423
424 while(idx < target) {
425 hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
426 idx++;
427 }
428
429 *nextToUpdate3 = target;
430 return hashTable3[hash3];
431 }
432
433
434 /*-*************************************
435 * Binary Tree search
436 ***************************************/
437 /* ZSTD_insertBt1() : add one or multiple positions to tree.
438 * @param ip assumed <= iend-8 .
439 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
440 * @return : nb of positions added */
441 static
442 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_insertBt1(const ZSTD_MatchState_t * ms,const BYTE * const ip,const BYTE * const iend,U32 const target,U32 const mls,const int extDict)443 U32 ZSTD_insertBt1(
444 const ZSTD_MatchState_t* ms,
445 const BYTE* const ip, const BYTE* const iend,
446 U32 const target,
447 U32 const mls, const int extDict)
448 {
449 const ZSTD_compressionParameters* const cParams = &ms->cParams;
450 U32* const hashTable = ms->hashTable;
451 U32 const hashLog = cParams->hashLog;
452 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
453 U32* const bt = ms->chainTable;
454 U32 const btLog = cParams->chainLog - 1;
455 U32 const btMask = (1 << btLog) - 1;
456 U32 matchIndex = hashTable[h];
457 size_t commonLengthSmaller=0, commonLengthLarger=0;
458 const BYTE* const base = ms->window.base;
459 const BYTE* const dictBase = ms->window.dictBase;
460 const U32 dictLimit = ms->window.dictLimit;
461 const BYTE* const dictEnd = dictBase + dictLimit;
462 const BYTE* const prefixStart = base + dictLimit;
463 const BYTE* match;
464 const U32 curr = (U32)(ip-base);
465 const U32 btLow = btMask >= curr ? 0 : curr - btMask;
466 U32* smallerPtr = bt + 2*(curr&btMask);
467 U32* largerPtr = smallerPtr + 1;
468 U32 dummy32; /* to be nullified at the end */
469 /* windowLow is based on target because
470 * we only need positions that will be in the window at the end of the tree update.
471 */
472 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
473 U32 matchEndIdx = curr+8+1;
474 size_t bestLength = 8;
475 U32 nbCompares = 1U << cParams->searchLog;
476 #ifdef ZSTD_C_PREDICT
477 U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
478 U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
479 predictedSmall += (predictedSmall>0);
480 predictedLarge += (predictedLarge>0);
481 #endif /* ZSTD_C_PREDICT */
482
483 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
484
485 assert(curr <= target);
486 assert(ip <= iend-8); /* required for h calculation */
487 hashTable[h] = curr; /* Update Hash Table */
488
489 assert(windowLow > 0);
490 for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
491 U32* const nextPtr = bt + 2*(matchIndex & btMask);
492 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
493 assert(matchIndex < curr);
494
495 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
496 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
497 if (matchIndex == predictedSmall) {
498 /* no need to check length, result known */
499 *smallerPtr = matchIndex;
500 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
501 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
502 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
503 predictedSmall = predictPtr[1] + (predictPtr[1]>0);
504 continue;
505 }
506 if (matchIndex == predictedLarge) {
507 *largerPtr = matchIndex;
508 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
509 largerPtr = nextPtr;
510 matchIndex = nextPtr[0];
511 predictedLarge = predictPtr[0] + (predictPtr[0]>0);
512 continue;
513 }
514 #endif
515
516 if (!extDict || (matchIndex+matchLength >= dictLimit)) {
517 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
518 match = base + matchIndex;
519 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
520 } else {
521 match = dictBase + matchIndex;
522 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
523 if (matchIndex+matchLength >= dictLimit)
524 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
525 }
526
527 if (matchLength > bestLength) {
528 bestLength = matchLength;
529 if (matchLength > matchEndIdx - matchIndex)
530 matchEndIdx = matchIndex + (U32)matchLength;
531 }
532
533 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
534 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
535 }
536
537 if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
538 /* match is smaller than current */
539 *smallerPtr = matchIndex; /* update smaller idx */
540 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
541 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
542 smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
543 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
544 } else {
545 /* match is larger than current */
546 *largerPtr = matchIndex;
547 commonLengthLarger = matchLength;
548 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
549 largerPtr = nextPtr;
550 matchIndex = nextPtr[0];
551 } }
552
553 *smallerPtr = *largerPtr = 0;
554 { U32 positions = 0;
555 if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
556 assert(matchEndIdx > curr + 8);
557 return MAX(positions, matchEndIdx - (curr + 8));
558 }
559 }
560
561 FORCE_INLINE_TEMPLATE
562 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_updateTree_internal(ZSTD_MatchState_t * ms,const BYTE * const ip,const BYTE * const iend,const U32 mls,const ZSTD_dictMode_e dictMode)563 void ZSTD_updateTree_internal(
564 ZSTD_MatchState_t* ms,
565 const BYTE* const ip, const BYTE* const iend,
566 const U32 mls, const ZSTD_dictMode_e dictMode)
567 {
568 const BYTE* const base = ms->window.base;
569 U32 const target = (U32)(ip - base);
570 U32 idx = ms->nextToUpdate;
571 DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
572 idx, target, dictMode);
573
574 while(idx < target) {
575 U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
576 assert(idx < (U32)(idx + forward));
577 idx += forward;
578 }
579 assert((size_t)(ip - base) <= (size_t)(U32)(-1));
580 assert((size_t)(iend - base) <= (size_t)(U32)(-1));
581 ms->nextToUpdate = target;
582 }
583
ZSTD_updateTree(ZSTD_MatchState_t * ms,const BYTE * ip,const BYTE * iend)584 void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend) {
585 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
586 }
587
588 FORCE_INLINE_TEMPLATE
589 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
590 U32
ZSTD_insertBtAndGetAllMatches(ZSTD_match_t * matches,ZSTD_MatchState_t * ms,U32 * nextToUpdate3,const BYTE * const ip,const BYTE * const iLimit,const ZSTD_dictMode_e dictMode,const U32 rep[ZSTD_REP_NUM],const U32 ll0,const U32 lengthToBeat,const U32 mls)591 ZSTD_insertBtAndGetAllMatches (
592 ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
593 ZSTD_MatchState_t* ms,
594 U32* nextToUpdate3,
595 const BYTE* const ip, const BYTE* const iLimit,
596 const ZSTD_dictMode_e dictMode,
597 const U32 rep[ZSTD_REP_NUM],
598 const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
599 const U32 lengthToBeat,
600 const U32 mls /* template */)
601 {
602 const ZSTD_compressionParameters* const cParams = &ms->cParams;
603 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
604 const BYTE* const base = ms->window.base;
605 U32 const curr = (U32)(ip-base);
606 U32 const hashLog = cParams->hashLog;
607 U32 const minMatch = (mls==3) ? 3 : 4;
608 U32* const hashTable = ms->hashTable;
609 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
610 U32 matchIndex = hashTable[h];
611 U32* const bt = ms->chainTable;
612 U32 const btLog = cParams->chainLog - 1;
613 U32 const btMask= (1U << btLog) - 1;
614 size_t commonLengthSmaller=0, commonLengthLarger=0;
615 const BYTE* const dictBase = ms->window.dictBase;
616 U32 const dictLimit = ms->window.dictLimit;
617 const BYTE* const dictEnd = dictBase + dictLimit;
618 const BYTE* const prefixStart = base + dictLimit;
619 U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
620 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
621 U32 const matchLow = windowLow ? windowLow : 1;
622 U32* smallerPtr = bt + 2*(curr&btMask);
623 U32* largerPtr = bt + 2*(curr&btMask) + 1;
624 U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
625 U32 dummy32; /* to be nullified at the end */
626 U32 mnum = 0;
627 U32 nbCompares = 1U << cParams->searchLog;
628
629 const ZSTD_MatchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
630 const ZSTD_compressionParameters* const dmsCParams =
631 dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
632 const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
633 const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
634 U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
635 U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
636 U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
637 U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
638 U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
639 U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
640 U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
641
642 size_t bestLength = lengthToBeat-1;
643 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
644
645 /* check repCode */
646 assert(ll0 <= 1); /* necessarily 1 or 0 */
647 { U32 const lastR = ZSTD_REP_NUM + ll0;
648 U32 repCode;
649 for (repCode = ll0; repCode < lastR; repCode++) {
650 U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
651 U32 const repIndex = curr - repOffset;
652 U32 repLen = 0;
653 assert(curr >= dictLimit);
654 if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
655 /* We must validate the repcode offset because when we're using a dictionary the
656 * valid offset range shrinks when the dictionary goes out of bounds.
657 */
658 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
659 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
660 }
661 } else { /* repIndex < dictLimit || repIndex >= curr */
662 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
663 dmsBase + repIndex - dmsIndexDelta :
664 dictBase + repIndex;
665 assert(curr >= windowLow);
666 if ( dictMode == ZSTD_extDict
667 && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
668 & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
669 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
670 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
671 }
672 if (dictMode == ZSTD_dictMatchState
673 && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
674 & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
675 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
676 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
677 } }
678 /* save longer solution */
679 if (repLen > bestLength) {
680 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
681 repCode, ll0, repOffset, repLen);
682 bestLength = repLen;
683 matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
684 matches[mnum].len = (U32)repLen;
685 mnum++;
686 if ( (repLen > sufficient_len)
687 | (ip+repLen == iLimit) ) { /* best possible */
688 return mnum;
689 } } } }
690
691 /* HC3 match finder */
692 if ((mls == 3) /*static*/ && (bestLength < mls)) {
693 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
694 if ((matchIndex3 >= matchLow)
695 & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
696 size_t mlen;
697 if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
698 const BYTE* const match = base + matchIndex3;
699 mlen = ZSTD_count(ip, match, iLimit);
700 } else {
701 const BYTE* const match = dictBase + matchIndex3;
702 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
703 }
704
705 /* save best solution */
706 if (mlen >= mls /* == 3 > bestLength */) {
707 DEBUGLOG(8, "found small match with hlog3, of length %u",
708 (U32)mlen);
709 bestLength = mlen;
710 assert(curr > matchIndex3);
711 assert(mnum==0); /* no prior solution */
712 matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
713 matches[0].len = (U32)mlen;
714 mnum = 1;
715 if ( (mlen > sufficient_len) |
716 (ip+mlen == iLimit) ) { /* best possible length */
717 ms->nextToUpdate = curr+1; /* skip insertion */
718 return 1;
719 } } }
720 /* no dictMatchState lookup: dicts don't have a populated HC3 table */
721 } /* if (mls == 3) */
722
723 hashTable[h] = curr; /* Update Hash Table */
724
725 for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
726 U32* const nextPtr = bt + 2*(matchIndex & btMask);
727 const BYTE* match;
728 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
729 assert(curr > matchIndex);
730
731 if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
732 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
733 match = base + matchIndex;
734 if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
735 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
736 } else {
737 match = dictBase + matchIndex;
738 assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
739 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
740 if (matchIndex+matchLength >= dictLimit)
741 match = base + matchIndex; /* prepare for match[matchLength] read */
742 }
743
744 if (matchLength > bestLength) {
745 DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
746 (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
747 assert(matchEndIdx > matchIndex);
748 if (matchLength > matchEndIdx - matchIndex)
749 matchEndIdx = matchIndex + (U32)matchLength;
750 bestLength = matchLength;
751 matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
752 matches[mnum].len = (U32)matchLength;
753 mnum++;
754 if ( (matchLength > ZSTD_OPT_NUM)
755 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
756 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
757 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
758 } }
759
760 if (match[matchLength] < ip[matchLength]) {
761 /* match smaller than current */
762 *smallerPtr = matchIndex; /* update smaller idx */
763 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
764 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
765 smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
766 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
767 } else {
768 *largerPtr = matchIndex;
769 commonLengthLarger = matchLength;
770 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
771 largerPtr = nextPtr;
772 matchIndex = nextPtr[0];
773 } }
774
775 *smallerPtr = *largerPtr = 0;
776
777 assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
778 if (dictMode == ZSTD_dictMatchState && nbCompares) {
779 size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
780 U32 dictMatchIndex = dms->hashTable[dmsH];
781 const U32* const dmsBt = dms->chainTable;
782 commonLengthSmaller = commonLengthLarger = 0;
783 for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
784 const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
785 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
786 const BYTE* match = dmsBase + dictMatchIndex;
787 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
788 if (dictMatchIndex+matchLength >= dmsHighLimit)
789 match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
790
791 if (matchLength > bestLength) {
792 matchIndex = dictMatchIndex + dmsIndexDelta;
793 DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
794 (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
795 if (matchLength > matchEndIdx - matchIndex)
796 matchEndIdx = matchIndex + (U32)matchLength;
797 bestLength = matchLength;
798 matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
799 matches[mnum].len = (U32)matchLength;
800 mnum++;
801 if ( (matchLength > ZSTD_OPT_NUM)
802 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
803 break; /* drop, to guarantee consistency (miss a little bit of compression) */
804 } }
805
806 if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
807 if (match[matchLength] < ip[matchLength]) {
808 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
809 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
810 } else {
811 /* match is larger than current */
812 commonLengthLarger = matchLength;
813 dictMatchIndex = nextPtr[0];
814 } } } /* if (dictMode == ZSTD_dictMatchState) */
815
816 assert(matchEndIdx > curr+8);
817 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
818 return mnum;
819 }
820
821 typedef U32 (*ZSTD_getAllMatchesFn)(
822 ZSTD_match_t*,
823 ZSTD_MatchState_t*,
824 U32*,
825 const BYTE*,
826 const BYTE*,
827 const U32 rep[ZSTD_REP_NUM],
828 U32 const ll0,
829 U32 const lengthToBeat);
830
831 FORCE_INLINE_TEMPLATE
832 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_btGetAllMatches_internal(ZSTD_match_t * matches,ZSTD_MatchState_t * ms,U32 * nextToUpdate3,const BYTE * ip,const BYTE * const iHighLimit,const U32 rep[ZSTD_REP_NUM],U32 const ll0,U32 const lengthToBeat,const ZSTD_dictMode_e dictMode,const U32 mls)833 U32 ZSTD_btGetAllMatches_internal(
834 ZSTD_match_t* matches,
835 ZSTD_MatchState_t* ms,
836 U32* nextToUpdate3,
837 const BYTE* ip,
838 const BYTE* const iHighLimit,
839 const U32 rep[ZSTD_REP_NUM],
840 U32 const ll0,
841 U32 const lengthToBeat,
842 const ZSTD_dictMode_e dictMode,
843 const U32 mls)
844 {
845 assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
846 DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
847 if (ip < ms->window.base + ms->nextToUpdate)
848 return 0; /* skipped area */
849 ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
850 return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
851 }
852
853 #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
854
855 #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
856 static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
857 ZSTD_match_t* matches, \
858 ZSTD_MatchState_t* ms, \
859 U32* nextToUpdate3, \
860 const BYTE* ip, \
861 const BYTE* const iHighLimit, \
862 const U32 rep[ZSTD_REP_NUM], \
863 U32 const ll0, \
864 U32 const lengthToBeat) \
865 { \
866 return ZSTD_btGetAllMatches_internal( \
867 matches, ms, nextToUpdate3, ip, iHighLimit, \
868 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
869 }
870
871 #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
872 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
873 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
874 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
875 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
876
877 GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)878 GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
879 GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
880
881 #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
882 { \
883 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
884 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
885 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
886 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
887 }
888
889 static ZSTD_getAllMatchesFn
890 ZSTD_selectBtGetAllMatches(ZSTD_MatchState_t const* ms, ZSTD_dictMode_e const dictMode)
891 {
892 ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
893 ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
894 ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
895 ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
896 };
897 U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
898 assert((U32)dictMode < 3);
899 assert(mls - 3 < 4);
900 return getAllMatchesFns[(int)dictMode][mls - 3];
901 }
902
903 /* ***********************
904 * LDM helper functions *
905 *************************/
906
907 /* Struct containing info needed to make decision about ldm inclusion */
908 typedef struct {
909 RawSeqStore_t seqStore; /* External match candidates store for this block */
910 U32 startPosInBlock; /* Start position of the current match candidate */
911 U32 endPosInBlock; /* End position of the current match candidate */
912 U32 offset; /* Offset of the match candidate */
913 } ZSTD_optLdm_t;
914
915 /* ZSTD_optLdm_skipRawSeqStoreBytes():
916 * Moves forward in @rawSeqStore by @nbBytes,
917 * which will update the fields 'pos' and 'posInSequence'.
918 */
ZSTD_optLdm_skipRawSeqStoreBytes(RawSeqStore_t * rawSeqStore,size_t nbBytes)919 static void ZSTD_optLdm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes)
920 {
921 U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
922 while (currPos && rawSeqStore->pos < rawSeqStore->size) {
923 rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
924 if (currPos >= currSeq.litLength + currSeq.matchLength) {
925 currPos -= currSeq.litLength + currSeq.matchLength;
926 rawSeqStore->pos++;
927 } else {
928 rawSeqStore->posInSequence = currPos;
929 break;
930 }
931 }
932 if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
933 rawSeqStore->posInSequence = 0;
934 }
935 }
936
937 /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
938 * Calculates the beginning and end of the next match in the current block.
939 * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
940 */
941 static void
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t * optLdm,U32 currPosInBlock,U32 blockBytesRemaining)942 ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
943 U32 blockBytesRemaining)
944 {
945 rawSeq currSeq;
946 U32 currBlockEndPos;
947 U32 literalsBytesRemaining;
948 U32 matchBytesRemaining;
949
950 /* Setting match end position to MAX to ensure we never use an LDM during this block */
951 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
952 optLdm->startPosInBlock = UINT_MAX;
953 optLdm->endPosInBlock = UINT_MAX;
954 return;
955 }
956 /* Calculate appropriate bytes left in matchLength and litLength
957 * after adjusting based on ldmSeqStore->posInSequence */
958 currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
959 assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
960 currBlockEndPos = currPosInBlock + blockBytesRemaining;
961 literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
962 currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
963 0;
964 matchBytesRemaining = (literalsBytesRemaining == 0) ?
965 currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
966 currSeq.matchLength;
967
968 /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
969 if (literalsBytesRemaining >= blockBytesRemaining) {
970 optLdm->startPosInBlock = UINT_MAX;
971 optLdm->endPosInBlock = UINT_MAX;
972 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
973 return;
974 }
975
976 /* Matches may be < minMatch by this process. In that case, we will reject them
977 when we are deciding whether or not to add the ldm */
978 optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
979 optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
980 optLdm->offset = currSeq.offset;
981
982 if (optLdm->endPosInBlock > currBlockEndPos) {
983 /* Match ends after the block ends, we can't use the whole match */
984 optLdm->endPosInBlock = currBlockEndPos;
985 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
986 } else {
987 /* Consume nb of bytes equal to size of sequence left */
988 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
989 }
990 }
991
992 /* ZSTD_optLdm_maybeAddMatch():
993 * Adds a match if it's long enough,
994 * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
995 * into 'matches'. Maintains the correct ordering of 'matches'.
996 */
ZSTD_optLdm_maybeAddMatch(ZSTD_match_t * matches,U32 * nbMatches,const ZSTD_optLdm_t * optLdm,U32 currPosInBlock,U32 minMatch)997 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
998 const ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
999 U32 minMatch)
1000 {
1001 U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
1002 /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
1003 U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
1004
1005 /* Ensure that current block position is not outside of the match */
1006 if (currPosInBlock < optLdm->startPosInBlock
1007 || currPosInBlock >= optLdm->endPosInBlock
1008 || candidateMatchLength < minMatch) {
1009 return;
1010 }
1011
1012 if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
1013 U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
1014 DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1015 candidateOffBase, candidateMatchLength, currPosInBlock);
1016 matches[*nbMatches].len = candidateMatchLength;
1017 matches[*nbMatches].off = candidateOffBase;
1018 (*nbMatches)++;
1019 }
1020 }
1021
1022 /* ZSTD_optLdm_processMatchCandidate():
1023 * Wrapper function to update ldm seq store and call ldm functions as necessary.
1024 */
1025 static void
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t * optLdm,ZSTD_match_t * matches,U32 * nbMatches,U32 currPosInBlock,U32 remainingBytes,U32 minMatch)1026 ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1027 ZSTD_match_t* matches, U32* nbMatches,
1028 U32 currPosInBlock, U32 remainingBytes,
1029 U32 minMatch)
1030 {
1031 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
1032 return;
1033 }
1034
1035 if (currPosInBlock >= optLdm->endPosInBlock) {
1036 if (currPosInBlock > optLdm->endPosInBlock) {
1037 /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
1038 * at the end of a match from the ldm seq store, and will often be some bytes
1039 * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1040 */
1041 U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1042 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1043 }
1044 ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1045 }
1046 ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock, minMatch);
1047 }
1048
1049
1050 /*-*******************************
1051 * Optimal parser
1052 *********************************/
1053
1054 #if 0 /* debug */
1055
1056 static void
1057 listStats(const U32* table, int lastEltID)
1058 {
1059 int const nbElts = lastEltID + 1;
1060 int enb;
1061 for (enb=0; enb < nbElts; enb++) {
1062 (void)table;
1063 /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
1064 RAWLOG(2, "%4i,", table[enb]);
1065 }
1066 RAWLOG(2, " \n");
1067 }
1068
1069 #endif
1070
1071 #define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
1072 #define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
1073 #define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
1074
1075 FORCE_INLINE_TEMPLATE
1076 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1077 size_t
ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const int optLevel,const ZSTD_dictMode_e dictMode)1078 ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t* ms,
1079 SeqStore_t* seqStore,
1080 U32 rep[ZSTD_REP_NUM],
1081 const void* src, size_t srcSize,
1082 const int optLevel,
1083 const ZSTD_dictMode_e dictMode)
1084 {
1085 optState_t* const optStatePtr = &ms->opt;
1086 const BYTE* const istart = (const BYTE*)src;
1087 const BYTE* ip = istart;
1088 const BYTE* anchor = istart;
1089 const BYTE* const iend = istart + srcSize;
1090 const BYTE* const ilimit = iend - 8;
1091 const BYTE* const base = ms->window.base;
1092 const BYTE* const prefixStart = base + ms->window.dictLimit;
1093 const ZSTD_compressionParameters* const cParams = &ms->cParams;
1094
1095 ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1096
1097 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1098 U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1099 U32 nextToUpdate3 = ms->nextToUpdate;
1100
1101 ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1102 ZSTD_match_t* const matches = optStatePtr->matchTable;
1103 ZSTD_optimal_t lastStretch;
1104 ZSTD_optLdm_t optLdm;
1105
1106 ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
1107
1108 optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1109 optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1110 ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1111
1112 /* init */
1113 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1114 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1115 assert(optLevel <= 2);
1116 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1117 ip += (ip==prefixStart);
1118
1119 /* Match Loop */
1120 while (ip < ilimit) {
1121 U32 cur, last_pos = 0;
1122
1123 /* find first match */
1124 { U32 const litlen = (U32)(ip - anchor);
1125 U32 const ll0 = !litlen;
1126 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1127 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1128 (U32)(ip-istart), (U32)(iend-ip),
1129 minMatch);
1130 if (!nbMatches) {
1131 DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
1132 ip++;
1133 continue;
1134 }
1135
1136 /* Match found: let's store this solution, and eventually find more candidates.
1137 * During this forward pass, @opt is used to store stretches,
1138 * defined as "a match followed by N literals".
1139 * Note how this is different from a Sequence, which is "N literals followed by a match".
1140 * Storing stretches allows us to store different match predecessors
1141 * for each literal position part of a literals run. */
1142
1143 /* initialize opt[0] */
1144 opt[0].mlen = 0; /* there are only literals so far */
1145 opt[0].litlen = litlen;
1146 /* No need to include the actual price of the literals before the first match
1147 * because it is static for the duration of the forward pass, and is included
1148 * in every subsequent price. But, we include the literal length because
1149 * the cost variation of litlen depends on the value of litlen.
1150 */
1151 opt[0].price = LL_PRICE(litlen);
1152 ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
1153 ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
1154
1155 /* large match -> immediate encoding */
1156 { U32 const maxML = matches[nbMatches-1].len;
1157 U32 const maxOffBase = matches[nbMatches-1].off;
1158 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1159 nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
1160
1161 if (maxML > sufficient_len) {
1162 lastStretch.litlen = 0;
1163 lastStretch.mlen = maxML;
1164 lastStretch.off = maxOffBase;
1165 DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
1166 maxML, sufficient_len);
1167 cur = 0;
1168 last_pos = maxML;
1169 goto _shortestPath;
1170 } }
1171
1172 /* set prices for first matches starting position == 0 */
1173 assert(opt[0].price >= 0);
1174 { U32 pos;
1175 U32 matchNb;
1176 for (pos = 1; pos < minMatch; pos++) {
1177 opt[pos].price = ZSTD_MAX_PRICE;
1178 opt[pos].mlen = 0;
1179 opt[pos].litlen = litlen + pos;
1180 }
1181 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1182 U32 const offBase = matches[matchNb].off;
1183 U32 const end = matches[matchNb].len;
1184 for ( ; pos <= end ; pos++ ) {
1185 int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1186 int const sequencePrice = opt[0].price + matchPrice;
1187 DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1188 pos, ZSTD_fCost(sequencePrice));
1189 opt[pos].mlen = pos;
1190 opt[pos].off = offBase;
1191 opt[pos].litlen = 0; /* end of match */
1192 opt[pos].price = sequencePrice + LL_PRICE(0);
1193 }
1194 }
1195 last_pos = pos-1;
1196 opt[pos].price = ZSTD_MAX_PRICE;
1197 }
1198 }
1199
1200 /* check further positions */
1201 for (cur = 1; cur <= last_pos; cur++) {
1202 const BYTE* const inr = ip + cur;
1203 assert(cur <= ZSTD_OPT_NUM);
1204 DEBUGLOG(7, "cPos:%i==rPos:%u", (int)(inr-istart), cur);
1205
1206 /* Fix current position with one literal if cheaper */
1207 { U32 const litlen = opt[cur-1].litlen + 1;
1208 int const price = opt[cur-1].price
1209 + LIT_PRICE(ip+cur-1)
1210 + LL_INCPRICE(litlen);
1211 assert(price < 1000000000); /* overflow check */
1212 if (price <= opt[cur].price) {
1213 ZSTD_optimal_t const prevMatch = opt[cur];
1214 DEBUGLOG(7, "cPos:%i==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1215 (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1216 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1217 opt[cur] = opt[cur-1];
1218 opt[cur].litlen = litlen;
1219 opt[cur].price = price;
1220 if ( (optLevel >= 1) /* additional check only for higher modes */
1221 && (prevMatch.litlen == 0) /* replace a match */
1222 && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
1223 && LIKELY(ip + cur < iend)
1224 ) {
1225 /* check next position, in case it would be cheaper */
1226 int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
1227 int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
1228 DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
1229 cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
1230 if ( (with1literal < withMoreLiterals)
1231 && (with1literal < opt[cur+1].price) ) {
1232 /* update offset history - before it disappears */
1233 U32 const prev = cur - prevMatch.mlen;
1234 Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
1235 assert(cur >= prevMatch.mlen);
1236 DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
1237 ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
1238 newReps.rep[0], newReps.rep[1], newReps.rep[2] );
1239 opt[cur+1] = prevMatch; /* mlen & offbase */
1240 ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(Repcodes_t));
1241 opt[cur+1].litlen = 1;
1242 opt[cur+1].price = with1literal;
1243 if (last_pos < cur+1) last_pos = cur+1;
1244 }
1245 }
1246 } else {
1247 DEBUGLOG(7, "cPos:%i==rPos:%u : literal would cost more (%.2f>%.2f)",
1248 (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
1249 }
1250 }
1251
1252 /* Offset history is not updated during match comparison.
1253 * Do it here, now that the match is selected and confirmed.
1254 */
1255 ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(Repcodes_t));
1256 assert(cur >= opt[cur].mlen);
1257 if (opt[cur].litlen == 0) {
1258 /* just finished a match => alter offset history */
1259 U32 const prev = cur - opt[cur].mlen;
1260 Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
1261 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(Repcodes_t));
1262 }
1263
1264 /* last match must start at a minimum distance of 8 from oend */
1265 if (inr > ilimit) continue;
1266
1267 if (cur == last_pos) break;
1268
1269 if ( (optLevel==0) /*static_test*/
1270 && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1271 DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
1272 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1273 }
1274
1275 assert(opt[cur].price >= 0);
1276 { U32 const ll0 = (opt[cur].litlen == 0);
1277 int const previousPrice = opt[cur].price;
1278 int const basePrice = previousPrice + LL_PRICE(0);
1279 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1280 U32 matchNb;
1281
1282 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1283 (U32)(inr-istart), (U32)(iend-inr),
1284 minMatch);
1285
1286 if (!nbMatches) {
1287 DEBUGLOG(7, "rPos:%u : no match found", cur);
1288 continue;
1289 }
1290
1291 { U32 const longestML = matches[nbMatches-1].len;
1292 DEBUGLOG(7, "cPos:%i==rPos:%u, found %u matches, of longest ML=%u",
1293 (int)(inr-istart), cur, nbMatches, longestML);
1294
1295 if ( (longestML > sufficient_len)
1296 || (cur + longestML >= ZSTD_OPT_NUM)
1297 || (ip + cur + longestML >= iend) ) {
1298 lastStretch.mlen = longestML;
1299 lastStretch.off = matches[nbMatches-1].off;
1300 lastStretch.litlen = 0;
1301 last_pos = cur + longestML;
1302 goto _shortestPath;
1303 } }
1304
1305 /* set prices using matches found at position == cur */
1306 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1307 U32 const offset = matches[matchNb].off;
1308 U32 const lastML = matches[matchNb].len;
1309 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1310 U32 mlen;
1311
1312 DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1313 matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
1314
1315 for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1316 U32 const pos = cur + mlen;
1317 int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1318
1319 if ((pos > last_pos) || (price < opt[pos].price)) {
1320 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1321 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1322 while (last_pos < pos) {
1323 /* fill empty positions, for future comparisons */
1324 last_pos++;
1325 opt[last_pos].price = ZSTD_MAX_PRICE;
1326 opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */
1327 }
1328 opt[pos].mlen = mlen;
1329 opt[pos].off = offset;
1330 opt[pos].litlen = 0;
1331 opt[pos].price = price;
1332 } else {
1333 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1334 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1335 if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1336 }
1337 } } }
1338 opt[last_pos+1].price = ZSTD_MAX_PRICE;
1339 } /* for (cur = 1; cur <= last_pos; cur++) */
1340
1341 lastStretch = opt[last_pos];
1342 assert(cur >= lastStretch.mlen);
1343 cur = last_pos - lastStretch.mlen;
1344
1345 _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1346 assert(opt[0].mlen == 0);
1347 assert(last_pos >= lastStretch.mlen);
1348 assert(cur == last_pos - lastStretch.mlen);
1349
1350 if (lastStretch.mlen==0) {
1351 /* no solution : all matches have been converted into literals */
1352 assert(lastStretch.litlen == (ip - anchor) + last_pos);
1353 ip += last_pos;
1354 continue;
1355 }
1356 assert(lastStretch.off > 0);
1357
1358 /* Update offset history */
1359 if (lastStretch.litlen == 0) {
1360 /* finishing on a match : update offset history */
1361 Repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
1362 ZSTD_memcpy(rep, &reps, sizeof(Repcodes_t));
1363 } else {
1364 ZSTD_memcpy(rep, lastStretch.rep, sizeof(Repcodes_t));
1365 assert(cur >= lastStretch.litlen);
1366 cur -= lastStretch.litlen;
1367 }
1368
1369 /* Let's write the shortest path solution.
1370 * It is stored in @opt in reverse order,
1371 * starting from @storeEnd (==cur+2),
1372 * effectively partially @opt overwriting.
1373 * Content is changed too:
1374 * - So far, @opt stored stretches, aka a match followed by literals
1375 * - Now, it will store sequences, aka literals followed by a match
1376 */
1377 { U32 const storeEnd = cur + 2;
1378 U32 storeStart = storeEnd;
1379 U32 stretchPos = cur;
1380
1381 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1382 last_pos, cur); (void)last_pos;
1383 assert(storeEnd < ZSTD_OPT_SIZE);
1384 DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1385 storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
1386 if (lastStretch.litlen > 0) {
1387 /* last "sequence" is unfinished: just a bunch of literals */
1388 opt[storeEnd].litlen = lastStretch.litlen;
1389 opt[storeEnd].mlen = 0;
1390 storeStart = storeEnd-1;
1391 opt[storeStart] = lastStretch;
1392 } {
1393 opt[storeEnd] = lastStretch; /* note: litlen will be fixed */
1394 storeStart = storeEnd;
1395 }
1396 while (1) {
1397 ZSTD_optimal_t nextStretch = opt[stretchPos];
1398 opt[storeStart].litlen = nextStretch.litlen;
1399 DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
1400 opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
1401 if (nextStretch.mlen == 0) {
1402 /* reaching beginning of segment */
1403 break;
1404 }
1405 storeStart--;
1406 opt[storeStart] = nextStretch; /* note: litlen will be fixed */
1407 assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
1408 stretchPos -= nextStretch.litlen + nextStretch.mlen;
1409 }
1410
1411 /* save sequences */
1412 DEBUGLOG(6, "sending selected sequences into seqStore");
1413 { U32 storePos;
1414 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1415 U32 const llen = opt[storePos].litlen;
1416 U32 const mlen = opt[storePos].mlen;
1417 U32 const offBase = opt[storePos].off;
1418 U32 const advance = llen + mlen;
1419 DEBUGLOG(6, "considering seq starting at %i, llen=%u, mlen=%u",
1420 (int)(anchor - istart), (unsigned)llen, (unsigned)mlen);
1421
1422 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
1423 assert(storePos == storeEnd); /* must be last sequence */
1424 ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
1425 continue; /* will finish */
1426 }
1427
1428 assert(anchor + llen <= iend);
1429 ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1430 ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1431 anchor += advance;
1432 ip = anchor;
1433 } }
1434 DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
1435
1436 /* update all costs */
1437 ZSTD_setBasePrices(optStatePtr, optLevel);
1438 }
1439 } /* while (ip < ilimit) */
1440
1441 /* Return the last literals size */
1442 return (size_t)(iend - anchor);
1443 }
1444 #endif /* build exclusions */
1445
1446 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
ZSTD_compressBlock_opt0(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const ZSTD_dictMode_e dictMode)1447 static size_t ZSTD_compressBlock_opt0(
1448 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1449 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1450 {
1451 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1452 }
1453 #endif
1454
1455 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
ZSTD_compressBlock_opt2(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const ZSTD_dictMode_e dictMode)1456 static size_t ZSTD_compressBlock_opt2(
1457 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1458 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1459 {
1460 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1461 }
1462 #endif
1463
1464 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
ZSTD_compressBlock_btopt(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1465 size_t ZSTD_compressBlock_btopt(
1466 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1467 const void* src, size_t srcSize)
1468 {
1469 DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1470 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1471 }
1472 #endif
1473
1474
1475
1476
1477 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1478 /* ZSTD_initStats_ultra():
1479 * make a first compression pass, just to seed stats with more accurate starting values.
1480 * only works on first block, with no dictionary and no ldm.
1481 * this function cannot error out, its narrow contract must be respected.
1482 */
1483 static
1484 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_initStats_ultra(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1485 void ZSTD_initStats_ultra(ZSTD_MatchState_t* ms,
1486 SeqStore_t* seqStore,
1487 U32 rep[ZSTD_REP_NUM],
1488 const void* src, size_t srcSize)
1489 {
1490 U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1491 ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1492
1493 DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1494 assert(ms->opt.litLengthSum == 0); /* first block */
1495 assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
1496 assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1497 assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1498
1499 ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1500
1501 /* invalidate first scan from history, only keep entropy stats */
1502 ZSTD_resetSeqStore(seqStore);
1503 ms->window.base -= srcSize;
1504 ms->window.dictLimit += (U32)srcSize;
1505 ms->window.lowLimit = ms->window.dictLimit;
1506 ms->nextToUpdate = ms->window.dictLimit;
1507
1508 }
1509
ZSTD_compressBlock_btultra(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1510 size_t ZSTD_compressBlock_btultra(
1511 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1512 const void* src, size_t srcSize)
1513 {
1514 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1515 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1516 }
1517
ZSTD_compressBlock_btultra2(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1518 size_t ZSTD_compressBlock_btultra2(
1519 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1520 const void* src, size_t srcSize)
1521 {
1522 U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1523 DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1524
1525 /* 2-passes strategy:
1526 * this strategy makes a first pass over first block to collect statistics
1527 * in order to seed next round's statistics with it.
1528 * After 1st pass, function forgets history, and starts a new block.
1529 * Consequently, this can only work if no data has been previously loaded in tables,
1530 * aka, no dictionary, no prefix, no ldm preprocessing.
1531 * The compression ratio gain is generally small (~0.5% on first block),
1532 * the cost is 2x cpu time on first block. */
1533 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1534 if ( (ms->opt.litLengthSum==0) /* first block */
1535 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1536 && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1537 && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1538 && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1539 ) {
1540 ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1541 }
1542
1543 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1544 }
1545 #endif
1546
1547 #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
ZSTD_compressBlock_btopt_dictMatchState(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1548 size_t ZSTD_compressBlock_btopt_dictMatchState(
1549 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1550 const void* src, size_t srcSize)
1551 {
1552 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1553 }
1554
ZSTD_compressBlock_btopt_extDict(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1555 size_t ZSTD_compressBlock_btopt_extDict(
1556 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1557 const void* src, size_t srcSize)
1558 {
1559 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1560 }
1561 #endif
1562
1563 #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
ZSTD_compressBlock_btultra_dictMatchState(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1564 size_t ZSTD_compressBlock_btultra_dictMatchState(
1565 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1566 const void* src, size_t srcSize)
1567 {
1568 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1569 }
1570
ZSTD_compressBlock_btultra_extDict(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1571 size_t ZSTD_compressBlock_btultra_extDict(
1572 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1573 const void* src, size_t srcSize)
1574 {
1575 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1576 }
1577 #endif
1578
1579 /* note : no btultra2 variant for extDict nor dictMatchState,
1580 * because btultra2 is not meant to work with dictionaries
1581 * and is only specific for the first block (no prefix) */
1582