1 /*
2 * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 #include "zstd_compress_internal.h"
12 #include "hist.h"
13 #include "zstd_opt.h"
14
15
16 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17 #define ZSTD_MAX_PRICE (1<<30)
18
19 #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
20
21
22 /*-*************************************
23 * Price functions for optimal parser
24 ***************************************/
25
26 #if 0 /* approximation at bit level (for tests) */
27 # define BITCOST_ACCURACY 0
28 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
29 # define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
30 #elif 0 /* fractional bit accuracy (for tests) */
31 # define BITCOST_ACCURACY 8
32 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
33 # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
34 #else /* opt==approx, ultra==accurate */
35 # define BITCOST_ACCURACY 8
36 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
37 # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
38 #endif
39
ZSTD_bitWeight(U32 stat)40 MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
41 {
42 return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
43 }
44
ZSTD_fracWeight(U32 rawStat)45 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
46 {
47 U32 const stat = rawStat + 1;
48 U32 const hb = ZSTD_highbit32(stat);
49 U32 const BWeight = hb * BITCOST_MULTIPLIER;
50 U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
51 U32 const weight = BWeight + FWeight;
52 assert(hb + BITCOST_ACCURACY < 31);
53 return weight;
54 }
55
56 #if (DEBUGLEVEL>=2)
57 /* debugging function,
58 * @return price in bytes as fractional value
59 * for debug messages only */
ZSTD_fCost(U32 price)60 MEM_STATIC double ZSTD_fCost(U32 price)
61 {
62 return (double)price / (BITCOST_MULTIPLIER*8);
63 }
64 #endif
65
ZSTD_compressedLiterals(optState_t const * const optPtr)66 static int ZSTD_compressedLiterals(optState_t const* const optPtr)
67 {
68 return optPtr->literalCompressionMode != ZSTD_ps_disable;
69 }
70
ZSTD_setBasePrices(optState_t * optPtr,int optLevel)71 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
72 {
73 if (ZSTD_compressedLiterals(optPtr))
74 optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
75 optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
76 optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
77 optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
78 }
79
80
sum_u32(const unsigned table[],size_t nbElts)81 static U32 sum_u32(const unsigned table[], size_t nbElts)
82 {
83 size_t n;
84 U32 total = 0;
85 for (n=0; n<nbElts; n++) {
86 total += table[n];
87 }
88 return total;
89 }
90
ZSTD_downscaleStats(unsigned * table,U32 lastEltIndex,U32 shift)91 static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
92 {
93 U32 s, sum=0;
94 DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
95 assert(shift < 30);
96 for (s=0; s<lastEltIndex+1; s++) {
97 table[s] = 1 + (table[s] >> shift);
98 sum += table[s];
99 }
100 return sum;
101 }
102
103 /* ZSTD_scaleStats() :
104 * reduce all elements in table is sum too large
105 * return the resulting sum of elements */
ZSTD_scaleStats(unsigned * table,U32 lastEltIndex,U32 logTarget)106 static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
107 {
108 U32 const prevsum = sum_u32(table, lastEltIndex+1);
109 U32 const factor = prevsum >> logTarget;
110 DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
111 assert(logTarget < 30);
112 if (factor <= 1) return prevsum;
113 return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
114 }
115
116 /* ZSTD_rescaleFreqs() :
117 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
118 * take hints from dictionary if there is one
119 * and init from zero if there is none,
120 * using src for literals stats, and baseline stats for sequence symbols
121 * otherwise downscale existing stats, to be used as seed for next block.
122 */
123 static void
ZSTD_rescaleFreqs(optState_t * const optPtr,const BYTE * const src,size_t const srcSize,int const optLevel)124 ZSTD_rescaleFreqs(optState_t* const optPtr,
125 const BYTE* const src, size_t const srcSize,
126 int const optLevel)
127 {
128 int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
129 DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
130 optPtr->priceType = zop_dynamic;
131
132 if (optPtr->litLengthSum == 0) { /* first block : init */
133 if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
134 DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
135 optPtr->priceType = zop_predef;
136 }
137
138 assert(optPtr->symbolCosts != NULL);
139 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
140 /* huffman table presumed generated by dictionary */
141 optPtr->priceType = zop_dynamic;
142
143 if (compressedLiterals) {
144 unsigned lit;
145 assert(optPtr->litFreq != NULL);
146 optPtr->litSum = 0;
147 for (lit=0; lit<=MaxLit; lit++) {
148 U32 const scaleLog = 11; /* scale to 2K */
149 U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
150 assert(bitCost <= scaleLog);
151 optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
152 optPtr->litSum += optPtr->litFreq[lit];
153 } }
154
155 { unsigned ll;
156 FSE_CState_t llstate;
157 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
158 optPtr->litLengthSum = 0;
159 for (ll=0; ll<=MaxLL; ll++) {
160 U32 const scaleLog = 10; /* scale to 1K */
161 U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
162 assert(bitCost < scaleLog);
163 optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
164 optPtr->litLengthSum += optPtr->litLengthFreq[ll];
165 } }
166
167 { unsigned ml;
168 FSE_CState_t mlstate;
169 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
170 optPtr->matchLengthSum = 0;
171 for (ml=0; ml<=MaxML; ml++) {
172 U32 const scaleLog = 10;
173 U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
174 assert(bitCost < scaleLog);
175 optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
176 optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
177 } }
178
179 { unsigned of;
180 FSE_CState_t ofstate;
181 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
182 optPtr->offCodeSum = 0;
183 for (of=0; of<=MaxOff; of++) {
184 U32 const scaleLog = 10;
185 U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
186 assert(bitCost < scaleLog);
187 optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
188 optPtr->offCodeSum += optPtr->offCodeFreq[of];
189 } }
190
191 } else { /* not a dictionary */
192
193 assert(optPtr->litFreq != NULL);
194 if (compressedLiterals) {
195 unsigned lit = MaxLit;
196 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
197 optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
198 }
199
200 { unsigned const baseLLfreqs[MaxLL+1] = {
201 4, 2, 1, 1, 1, 1, 1, 1,
202 1, 1, 1, 1, 1, 1, 1, 1,
203 1, 1, 1, 1, 1, 1, 1, 1,
204 1, 1, 1, 1, 1, 1, 1, 1,
205 1, 1, 1, 1
206 };
207 ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
208 optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
209 }
210
211 { unsigned ml;
212 for (ml=0; ml<=MaxML; ml++)
213 optPtr->matchLengthFreq[ml] = 1;
214 }
215 optPtr->matchLengthSum = MaxML+1;
216
217 { unsigned const baseOFCfreqs[MaxOff+1] = {
218 6, 2, 1, 1, 2, 3, 4, 4,
219 4, 3, 2, 1, 1, 1, 1, 1,
220 1, 1, 1, 1, 1, 1, 1, 1,
221 1, 1, 1, 1, 1, 1, 1, 1
222 };
223 ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
224 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
225 }
226
227
228 }
229
230 } else { /* new block : re-use previous statistics, scaled down */
231
232 if (compressedLiterals)
233 optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
234 optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
235 optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
236 optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
237 }
238
239 ZSTD_setBasePrices(optPtr, optLevel);
240 }
241
242 /* ZSTD_rawLiteralsCost() :
243 * price of literals (only) in specified segment (which length can be 0).
244 * does not include price of literalLength symbol */
ZSTD_rawLiteralsCost(const BYTE * const literals,U32 const litLength,const optState_t * const optPtr,int optLevel)245 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
246 const optState_t* const optPtr,
247 int optLevel)
248 {
249 if (litLength == 0) return 0;
250
251 if (!ZSTD_compressedLiterals(optPtr))
252 return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */
253
254 if (optPtr->priceType == zop_predef)
255 return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
256
257 /* dynamic statistics */
258 { U32 price = litLength * optPtr->litSumBasePrice;
259 U32 u;
260 for (u=0; u < litLength; u++) {
261 assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
262 price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
263 }
264 return price;
265 }
266 }
267
268 /* ZSTD_litLengthPrice() :
269 * cost of literalLength symbol */
ZSTD_litLengthPrice(U32 const litLength,const optState_t * const optPtr,int optLevel)270 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
271 {
272 assert(litLength <= ZSTD_BLOCKSIZE_MAX);
273 if (optPtr->priceType == zop_predef)
274 return WEIGHT(litLength, optLevel);
275 /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
276 * because it isn't representable in the zstd format. So instead just
277 * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
278 * would be all literals.
279 */
280 if (litLength == ZSTD_BLOCKSIZE_MAX)
281 return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
282
283 /* dynamic statistics */
284 { U32 const llCode = ZSTD_LLcode(litLength);
285 return (LL_bits[llCode] * BITCOST_MULTIPLIER)
286 + optPtr->litLengthSumBasePrice
287 - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
288 }
289 }
290
291 /* ZSTD_getMatchPrice() :
292 * Provides the cost of the match part (offset + matchLength) of a sequence
293 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
294 * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
295 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
296 */
297 FORCE_INLINE_TEMPLATE U32
ZSTD_getMatchPrice(U32 const offcode,U32 const matchLength,const optState_t * const optPtr,int const optLevel)298 ZSTD_getMatchPrice(U32 const offcode,
299 U32 const matchLength,
300 const optState_t* const optPtr,
301 int const optLevel)
302 {
303 U32 price;
304 U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
305 U32 const mlBase = matchLength - MINMATCH;
306 assert(matchLength >= MINMATCH);
307
308 if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
309 return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
310
311 /* dynamic statistics */
312 price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
313 if ((optLevel<2) /*static*/ && offCode >= 20)
314 price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
315
316 /* match Length */
317 { U32 const mlCode = ZSTD_MLcode(mlBase);
318 price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
319 }
320
321 price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
322
323 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
324 return price;
325 }
326
327 /* ZSTD_updateStats() :
328 * assumption : literals + litLengtn <= iend */
ZSTD_updateStats(optState_t * const optPtr,U32 litLength,const BYTE * literals,U32 offsetCode,U32 matchLength)329 static void ZSTD_updateStats(optState_t* const optPtr,
330 U32 litLength, const BYTE* literals,
331 U32 offsetCode, U32 matchLength)
332 {
333 /* literals */
334 if (ZSTD_compressedLiterals(optPtr)) {
335 U32 u;
336 for (u=0; u < litLength; u++)
337 optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
338 optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
339 }
340
341 /* literal Length */
342 { U32 const llCode = ZSTD_LLcode(litLength);
343 optPtr->litLengthFreq[llCode]++;
344 optPtr->litLengthSum++;
345 }
346
347 /* offset code : expected to follow storeSeq() numeric representation */
348 { U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
349 assert(offCode <= MaxOff);
350 optPtr->offCodeFreq[offCode]++;
351 optPtr->offCodeSum++;
352 }
353
354 /* match Length */
355 { U32 const mlBase = matchLength - MINMATCH;
356 U32 const mlCode = ZSTD_MLcode(mlBase);
357 optPtr->matchLengthFreq[mlCode]++;
358 optPtr->matchLengthSum++;
359 }
360 }
361
362
363 /* ZSTD_readMINMATCH() :
364 * function safe only for comparisons
365 * assumption : memPtr must be at least 4 bytes before end of buffer */
ZSTD_readMINMATCH(const void * memPtr,U32 length)366 MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
367 {
368 switch (length)
369 {
370 default :
371 case 4 : return MEM_read32(memPtr);
372 case 3 : if (MEM_isLittleEndian())
373 return MEM_read32(memPtr)<<8;
374 else
375 return MEM_read32(memPtr)>>8;
376 }
377 }
378
379
380 /* Update hashTable3 up to ip (excluded)
381 Assumption : always within prefix (i.e. not within extDict) */
ZSTD_insertAndFindFirstIndexHash3(const ZSTD_matchState_t * ms,U32 * nextToUpdate3,const BYTE * const ip)382 static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
383 U32* nextToUpdate3,
384 const BYTE* const ip)
385 {
386 U32* const hashTable3 = ms->hashTable3;
387 U32 const hashLog3 = ms->hashLog3;
388 const BYTE* const base = ms->window.base;
389 U32 idx = *nextToUpdate3;
390 U32 const target = (U32)(ip - base);
391 size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
392 assert(hashLog3 > 0);
393
394 while(idx < target) {
395 hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
396 idx++;
397 }
398
399 *nextToUpdate3 = target;
400 return hashTable3[hash3];
401 }
402
403
404 /*-*************************************
405 * Binary Tree search
406 ***************************************/
407 /** ZSTD_insertBt1() : add one or multiple positions to tree.
408 * @param ip assumed <= iend-8 .
409 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
410 * @return : nb of positions added */
ZSTD_insertBt1(const ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,U32 const target,U32 const mls,const int extDict)411 static U32 ZSTD_insertBt1(
412 const ZSTD_matchState_t* ms,
413 const BYTE* const ip, const BYTE* const iend,
414 U32 const target,
415 U32 const mls, const int extDict)
416 {
417 const ZSTD_compressionParameters* const cParams = &ms->cParams;
418 U32* const hashTable = ms->hashTable;
419 U32 const hashLog = cParams->hashLog;
420 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
421 U32* const bt = ms->chainTable;
422 U32 const btLog = cParams->chainLog - 1;
423 U32 const btMask = (1 << btLog) - 1;
424 U32 matchIndex = hashTable[h];
425 size_t commonLengthSmaller=0, commonLengthLarger=0;
426 const BYTE* const base = ms->window.base;
427 const BYTE* const dictBase = ms->window.dictBase;
428 const U32 dictLimit = ms->window.dictLimit;
429 const BYTE* const dictEnd = dictBase + dictLimit;
430 const BYTE* const prefixStart = base + dictLimit;
431 const BYTE* match;
432 const U32 curr = (U32)(ip-base);
433 const U32 btLow = btMask >= curr ? 0 : curr - btMask;
434 U32* smallerPtr = bt + 2*(curr&btMask);
435 U32* largerPtr = smallerPtr + 1;
436 U32 dummy32; /* to be nullified at the end */
437 /* windowLow is based on target because
438 * we only need positions that will be in the window at the end of the tree update.
439 */
440 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
441 U32 matchEndIdx = curr+8+1;
442 size_t bestLength = 8;
443 U32 nbCompares = 1U << cParams->searchLog;
444 #ifdef ZSTD_C_PREDICT
445 U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
446 U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
447 predictedSmall += (predictedSmall>0);
448 predictedLarge += (predictedLarge>0);
449 #endif /* ZSTD_C_PREDICT */
450
451 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
452
453 assert(curr <= target);
454 assert(ip <= iend-8); /* required for h calculation */
455 hashTable[h] = curr; /* Update Hash Table */
456
457 assert(windowLow > 0);
458 for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
459 U32* const nextPtr = bt + 2*(matchIndex & btMask);
460 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
461 assert(matchIndex < curr);
462
463 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
464 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
465 if (matchIndex == predictedSmall) {
466 /* no need to check length, result known */
467 *smallerPtr = matchIndex;
468 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
469 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
470 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
471 predictedSmall = predictPtr[1] + (predictPtr[1]>0);
472 continue;
473 }
474 if (matchIndex == predictedLarge) {
475 *largerPtr = matchIndex;
476 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
477 largerPtr = nextPtr;
478 matchIndex = nextPtr[0];
479 predictedLarge = predictPtr[0] + (predictPtr[0]>0);
480 continue;
481 }
482 #endif
483
484 if (!extDict || (matchIndex+matchLength >= dictLimit)) {
485 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
486 match = base + matchIndex;
487 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
488 } else {
489 match = dictBase + matchIndex;
490 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
491 if (matchIndex+matchLength >= dictLimit)
492 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
493 }
494
495 if (matchLength > bestLength) {
496 bestLength = matchLength;
497 if (matchLength > matchEndIdx - matchIndex)
498 matchEndIdx = matchIndex + (U32)matchLength;
499 }
500
501 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
502 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
503 }
504
505 if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
506 /* match is smaller than current */
507 *smallerPtr = matchIndex; /* update smaller idx */
508 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
509 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
510 smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
511 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
512 } else {
513 /* match is larger than current */
514 *largerPtr = matchIndex;
515 commonLengthLarger = matchLength;
516 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
517 largerPtr = nextPtr;
518 matchIndex = nextPtr[0];
519 } }
520
521 *smallerPtr = *largerPtr = 0;
522 { U32 positions = 0;
523 if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
524 assert(matchEndIdx > curr + 8);
525 return MAX(positions, matchEndIdx - (curr + 8));
526 }
527 }
528
529 FORCE_INLINE_TEMPLATE
ZSTD_updateTree_internal(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,const U32 mls,const ZSTD_dictMode_e dictMode)530 void ZSTD_updateTree_internal(
531 ZSTD_matchState_t* ms,
532 const BYTE* const ip, const BYTE* const iend,
533 const U32 mls, const ZSTD_dictMode_e dictMode)
534 {
535 const BYTE* const base = ms->window.base;
536 U32 const target = (U32)(ip - base);
537 U32 idx = ms->nextToUpdate;
538 DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
539 idx, target, dictMode);
540
541 while(idx < target) {
542 U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
543 assert(idx < (U32)(idx + forward));
544 idx += forward;
545 }
546 assert((size_t)(ip - base) <= (size_t)(U32)(-1));
547 assert((size_t)(iend - base) <= (size_t)(U32)(-1));
548 ms->nextToUpdate = target;
549 }
550
ZSTD_updateTree(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * iend)551 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
552 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
553 }
554
555 FORCE_INLINE_TEMPLATE
ZSTD_insertBtAndGetAllMatches(ZSTD_match_t * matches,ZSTD_matchState_t * ms,U32 * nextToUpdate3,const BYTE * const ip,const BYTE * const iLimit,const ZSTD_dictMode_e dictMode,const U32 rep[ZSTD_REP_NUM],U32 const ll0,const U32 lengthToBeat,U32 const mls)556 U32 ZSTD_insertBtAndGetAllMatches (
557 ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
558 ZSTD_matchState_t* ms,
559 U32* nextToUpdate3,
560 const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
561 const U32 rep[ZSTD_REP_NUM],
562 U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
563 const U32 lengthToBeat,
564 U32 const mls /* template */)
565 {
566 const ZSTD_compressionParameters* const cParams = &ms->cParams;
567 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
568 const BYTE* const base = ms->window.base;
569 U32 const curr = (U32)(ip-base);
570 U32 const hashLog = cParams->hashLog;
571 U32 const minMatch = (mls==3) ? 3 : 4;
572 U32* const hashTable = ms->hashTable;
573 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
574 U32 matchIndex = hashTable[h];
575 U32* const bt = ms->chainTable;
576 U32 const btLog = cParams->chainLog - 1;
577 U32 const btMask= (1U << btLog) - 1;
578 size_t commonLengthSmaller=0, commonLengthLarger=0;
579 const BYTE* const dictBase = ms->window.dictBase;
580 U32 const dictLimit = ms->window.dictLimit;
581 const BYTE* const dictEnd = dictBase + dictLimit;
582 const BYTE* const prefixStart = base + dictLimit;
583 U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
584 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
585 U32 const matchLow = windowLow ? windowLow : 1;
586 U32* smallerPtr = bt + 2*(curr&btMask);
587 U32* largerPtr = bt + 2*(curr&btMask) + 1;
588 U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
589 U32 dummy32; /* to be nullified at the end */
590 U32 mnum = 0;
591 U32 nbCompares = 1U << cParams->searchLog;
592
593 const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
594 const ZSTD_compressionParameters* const dmsCParams =
595 dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
596 const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
597 const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
598 U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
599 U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
600 U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
601 U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
602 U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
603 U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
604 U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
605
606 size_t bestLength = lengthToBeat-1;
607 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
608
609 /* check repCode */
610 assert(ll0 <= 1); /* necessarily 1 or 0 */
611 { U32 const lastR = ZSTD_REP_NUM + ll0;
612 U32 repCode;
613 for (repCode = ll0; repCode < lastR; repCode++) {
614 U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
615 U32 const repIndex = curr - repOffset;
616 U32 repLen = 0;
617 assert(curr >= dictLimit);
618 if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
619 /* We must validate the repcode offset because when we're using a dictionary the
620 * valid offset range shrinks when the dictionary goes out of bounds.
621 */
622 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
623 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
624 }
625 } else { /* repIndex < dictLimit || repIndex >= curr */
626 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
627 dmsBase + repIndex - dmsIndexDelta :
628 dictBase + repIndex;
629 assert(curr >= windowLow);
630 if ( dictMode == ZSTD_extDict
631 && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
632 & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
633 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
634 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
635 }
636 if (dictMode == ZSTD_dictMatchState
637 && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
638 & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
639 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
640 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
641 } }
642 /* save longer solution */
643 if (repLen > bestLength) {
644 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
645 repCode, ll0, repOffset, repLen);
646 bestLength = repLen;
647 matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1); /* expect value between 1 and 3 */
648 matches[mnum].len = (U32)repLen;
649 mnum++;
650 if ( (repLen > sufficient_len)
651 | (ip+repLen == iLimit) ) { /* best possible */
652 return mnum;
653 } } } }
654
655 /* HC3 match finder */
656 if ((mls == 3) /*static*/ && (bestLength < mls)) {
657 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
658 if ((matchIndex3 >= matchLow)
659 & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
660 size_t mlen;
661 if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
662 const BYTE* const match = base + matchIndex3;
663 mlen = ZSTD_count(ip, match, iLimit);
664 } else {
665 const BYTE* const match = dictBase + matchIndex3;
666 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
667 }
668
669 /* save best solution */
670 if (mlen >= mls /* == 3 > bestLength */) {
671 DEBUGLOG(8, "found small match with hlog3, of length %u",
672 (U32)mlen);
673 bestLength = mlen;
674 assert(curr > matchIndex3);
675 assert(mnum==0); /* no prior solution */
676 matches[0].off = STORE_OFFSET(curr - matchIndex3);
677 matches[0].len = (U32)mlen;
678 mnum = 1;
679 if ( (mlen > sufficient_len) |
680 (ip+mlen == iLimit) ) { /* best possible length */
681 ms->nextToUpdate = curr+1; /* skip insertion */
682 return 1;
683 } } }
684 /* no dictMatchState lookup: dicts don't have a populated HC3 table */
685 } /* if (mls == 3) */
686
687 hashTable[h] = curr; /* Update Hash Table */
688
689 for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
690 U32* const nextPtr = bt + 2*(matchIndex & btMask);
691 const BYTE* match;
692 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
693 assert(curr > matchIndex);
694
695 if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
696 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
697 match = base + matchIndex;
698 if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
699 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
700 } else {
701 match = dictBase + matchIndex;
702 assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
703 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
704 if (matchIndex+matchLength >= dictLimit)
705 match = base + matchIndex; /* prepare for match[matchLength] read */
706 }
707
708 if (matchLength > bestLength) {
709 DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
710 (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
711 assert(matchEndIdx > matchIndex);
712 if (matchLength > matchEndIdx - matchIndex)
713 matchEndIdx = matchIndex + (U32)matchLength;
714 bestLength = matchLength;
715 matches[mnum].off = STORE_OFFSET(curr - matchIndex);
716 matches[mnum].len = (U32)matchLength;
717 mnum++;
718 if ( (matchLength > ZSTD_OPT_NUM)
719 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
720 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
721 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
722 } }
723
724 if (match[matchLength] < ip[matchLength]) {
725 /* match smaller than current */
726 *smallerPtr = matchIndex; /* update smaller idx */
727 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
728 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
729 smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
730 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
731 } else {
732 *largerPtr = matchIndex;
733 commonLengthLarger = matchLength;
734 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
735 largerPtr = nextPtr;
736 matchIndex = nextPtr[0];
737 } }
738
739 *smallerPtr = *largerPtr = 0;
740
741 assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
742 if (dictMode == ZSTD_dictMatchState && nbCompares) {
743 size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
744 U32 dictMatchIndex = dms->hashTable[dmsH];
745 const U32* const dmsBt = dms->chainTable;
746 commonLengthSmaller = commonLengthLarger = 0;
747 for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
748 const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
749 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
750 const BYTE* match = dmsBase + dictMatchIndex;
751 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
752 if (dictMatchIndex+matchLength >= dmsHighLimit)
753 match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
754
755 if (matchLength > bestLength) {
756 matchIndex = dictMatchIndex + dmsIndexDelta;
757 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
758 (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
759 if (matchLength > matchEndIdx - matchIndex)
760 matchEndIdx = matchIndex + (U32)matchLength;
761 bestLength = matchLength;
762 matches[mnum].off = STORE_OFFSET(curr - matchIndex);
763 matches[mnum].len = (U32)matchLength;
764 mnum++;
765 if ( (matchLength > ZSTD_OPT_NUM)
766 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
767 break; /* drop, to guarantee consistency (miss a little bit of compression) */
768 } }
769
770 if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
771 if (match[matchLength] < ip[matchLength]) {
772 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
773 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
774 } else {
775 /* match is larger than current */
776 commonLengthLarger = matchLength;
777 dictMatchIndex = nextPtr[0];
778 } } } /* if (dictMode == ZSTD_dictMatchState) */
779
780 assert(matchEndIdx > curr+8);
781 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
782 return mnum;
783 }
784
785 typedef U32 (*ZSTD_getAllMatchesFn)(
786 ZSTD_match_t*,
787 ZSTD_matchState_t*,
788 U32*,
789 const BYTE*,
790 const BYTE*,
791 const U32 rep[ZSTD_REP_NUM],
792 U32 const ll0,
793 U32 const lengthToBeat);
794
ZSTD_btGetAllMatches_internal(ZSTD_match_t * matches,ZSTD_matchState_t * ms,U32 * nextToUpdate3,const BYTE * ip,const BYTE * const iHighLimit,const U32 rep[ZSTD_REP_NUM],U32 const ll0,U32 const lengthToBeat,const ZSTD_dictMode_e dictMode,const U32 mls)795 FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
796 ZSTD_match_t* matches,
797 ZSTD_matchState_t* ms,
798 U32* nextToUpdate3,
799 const BYTE* ip,
800 const BYTE* const iHighLimit,
801 const U32 rep[ZSTD_REP_NUM],
802 U32 const ll0,
803 U32 const lengthToBeat,
804 const ZSTD_dictMode_e dictMode,
805 const U32 mls)
806 {
807 assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
808 DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
809 if (ip < ms->window.base + ms->nextToUpdate)
810 return 0; /* skipped area */
811 ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
812 return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
813 }
814
815 #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
816
817 #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
818 static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
819 ZSTD_match_t* matches, \
820 ZSTD_matchState_t* ms, \
821 U32* nextToUpdate3, \
822 const BYTE* ip, \
823 const BYTE* const iHighLimit, \
824 const U32 rep[ZSTD_REP_NUM], \
825 U32 const ll0, \
826 U32 const lengthToBeat) \
827 { \
828 return ZSTD_btGetAllMatches_internal( \
829 matches, ms, nextToUpdate3, ip, iHighLimit, \
830 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
831 }
832
833 #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
834 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
835 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
836 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
837 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
838
839 GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)840 GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
841 GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
842
843 #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
844 { \
845 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
846 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
847 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
848 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
849 }
850
851 static ZSTD_getAllMatchesFn
852 ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
853 {
854 ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
855 ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
856 ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
857 ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
858 };
859 U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
860 assert((U32)dictMode < 3);
861 assert(mls - 3 < 4);
862 return getAllMatchesFns[(int)dictMode][mls - 3];
863 }
864
865 /*************************
866 * LDM helper functions *
867 *************************/
868
869 /* Struct containing info needed to make decision about ldm inclusion */
870 typedef struct {
871 rawSeqStore_t seqStore; /* External match candidates store for this block */
872 U32 startPosInBlock; /* Start position of the current match candidate */
873 U32 endPosInBlock; /* End position of the current match candidate */
874 U32 offset; /* Offset of the match candidate */
875 } ZSTD_optLdm_t;
876
877 /* ZSTD_optLdm_skipRawSeqStoreBytes():
878 * Moves forward in @rawSeqStore by @nbBytes,
879 * which will update the fields 'pos' and 'posInSequence'.
880 */
ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t * rawSeqStore,size_t nbBytes)881 static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
882 {
883 U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
884 while (currPos && rawSeqStore->pos < rawSeqStore->size) {
885 rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
886 if (currPos >= currSeq.litLength + currSeq.matchLength) {
887 currPos -= currSeq.litLength + currSeq.matchLength;
888 rawSeqStore->pos++;
889 } else {
890 rawSeqStore->posInSequence = currPos;
891 break;
892 }
893 }
894 if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
895 rawSeqStore->posInSequence = 0;
896 }
897 }
898
899 /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
900 * Calculates the beginning and end of the next match in the current block.
901 * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
902 */
903 static void
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t * optLdm,U32 currPosInBlock,U32 blockBytesRemaining)904 ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
905 U32 blockBytesRemaining)
906 {
907 rawSeq currSeq;
908 U32 currBlockEndPos;
909 U32 literalsBytesRemaining;
910 U32 matchBytesRemaining;
911
912 /* Setting match end position to MAX to ensure we never use an LDM during this block */
913 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
914 optLdm->startPosInBlock = UINT_MAX;
915 optLdm->endPosInBlock = UINT_MAX;
916 return;
917 }
918 /* Calculate appropriate bytes left in matchLength and litLength
919 * after adjusting based on ldmSeqStore->posInSequence */
920 currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
921 assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
922 currBlockEndPos = currPosInBlock + blockBytesRemaining;
923 literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
924 currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
925 0;
926 matchBytesRemaining = (literalsBytesRemaining == 0) ?
927 currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
928 currSeq.matchLength;
929
930 /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
931 if (literalsBytesRemaining >= blockBytesRemaining) {
932 optLdm->startPosInBlock = UINT_MAX;
933 optLdm->endPosInBlock = UINT_MAX;
934 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
935 return;
936 }
937
938 /* Matches may be < MINMATCH by this process. In that case, we will reject them
939 when we are deciding whether or not to add the ldm */
940 optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
941 optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
942 optLdm->offset = currSeq.offset;
943
944 if (optLdm->endPosInBlock > currBlockEndPos) {
945 /* Match ends after the block ends, we can't use the whole match */
946 optLdm->endPosInBlock = currBlockEndPos;
947 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
948 } else {
949 /* Consume nb of bytes equal to size of sequence left */
950 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
951 }
952 }
953
954 /* ZSTD_optLdm_maybeAddMatch():
955 * Adds a match if it's long enough,
956 * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
957 * into 'matches'. Maintains the correct ordering of 'matches'.
958 */
ZSTD_optLdm_maybeAddMatch(ZSTD_match_t * matches,U32 * nbMatches,const ZSTD_optLdm_t * optLdm,U32 currPosInBlock)959 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
960 const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
961 {
962 U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
963 /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
964 U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
965
966 /* Ensure that current block position is not outside of the match */
967 if (currPosInBlock < optLdm->startPosInBlock
968 || currPosInBlock >= optLdm->endPosInBlock
969 || candidateMatchLength < MINMATCH) {
970 return;
971 }
972
973 if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
974 U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
975 DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
976 candidateOffCode, candidateMatchLength, currPosInBlock);
977 matches[*nbMatches].len = candidateMatchLength;
978 matches[*nbMatches].off = candidateOffCode;
979 (*nbMatches)++;
980 }
981 }
982
983 /* ZSTD_optLdm_processMatchCandidate():
984 * Wrapper function to update ldm seq store and call ldm functions as necessary.
985 */
986 static void
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t * optLdm,ZSTD_match_t * matches,U32 * nbMatches,U32 currPosInBlock,U32 remainingBytes)987 ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
988 ZSTD_match_t* matches, U32* nbMatches,
989 U32 currPosInBlock, U32 remainingBytes)
990 {
991 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
992 return;
993 }
994
995 if (currPosInBlock >= optLdm->endPosInBlock) {
996 if (currPosInBlock > optLdm->endPosInBlock) {
997 /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
998 * at the end of a match from the ldm seq store, and will often be some bytes
999 * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1000 */
1001 U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1002 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1003 }
1004 ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1005 }
1006 ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
1007 }
1008
1009
1010 /*-*******************************
1011 * Optimal parser
1012 *********************************/
1013
ZSTD_totalLen(ZSTD_optimal_t sol)1014 static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
1015 {
1016 return sol.litlen + sol.mlen;
1017 }
1018
1019 #if 0 /* debug */
1020
1021 static void
1022 listStats(const U32* table, int lastEltID)
1023 {
1024 int const nbElts = lastEltID + 1;
1025 int enb;
1026 for (enb=0; enb < nbElts; enb++) {
1027 (void)table;
1028 /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
1029 RAWLOG(2, "%4i,", table[enb]);
1030 }
1031 RAWLOG(2, " \n");
1032 }
1033
1034 #endif
1035
1036 FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const int optLevel,const ZSTD_dictMode_e dictMode)1037 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1038 seqStore_t* seqStore,
1039 U32 rep[ZSTD_REP_NUM],
1040 const void* src, size_t srcSize,
1041 const int optLevel,
1042 const ZSTD_dictMode_e dictMode)
1043 {
1044 optState_t* const optStatePtr = &ms->opt;
1045 const BYTE* const istart = (const BYTE*)src;
1046 const BYTE* ip = istart;
1047 const BYTE* anchor = istart;
1048 const BYTE* const iend = istart + srcSize;
1049 const BYTE* const ilimit = iend - 8;
1050 const BYTE* const base = ms->window.base;
1051 const BYTE* const prefixStart = base + ms->window.dictLimit;
1052 const ZSTD_compressionParameters* const cParams = &ms->cParams;
1053
1054 ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1055
1056 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1057 U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1058 U32 nextToUpdate3 = ms->nextToUpdate;
1059
1060 ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1061 ZSTD_match_t* const matches = optStatePtr->matchTable;
1062 ZSTD_optimal_t lastSequence;
1063 ZSTD_optLdm_t optLdm;
1064
1065 optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1066 optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1067 ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1068
1069 /* init */
1070 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1071 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1072 assert(optLevel <= 2);
1073 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1074 ip += (ip==prefixStart);
1075
1076 /* Match Loop */
1077 while (ip < ilimit) {
1078 U32 cur, last_pos = 0;
1079
1080 /* find first match */
1081 { U32 const litlen = (U32)(ip - anchor);
1082 U32 const ll0 = !litlen;
1083 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1084 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1085 (U32)(ip-istart), (U32)(iend - ip));
1086 if (!nbMatches) { ip++; continue; }
1087
1088 /* initialize opt[0] */
1089 { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
1090 opt[0].mlen = 0; /* means is_a_literal */
1091 opt[0].litlen = litlen;
1092 /* We don't need to include the actual price of the literals because
1093 * it is static for the duration of the forward pass, and is included
1094 * in every price. We include the literal length to avoid negative
1095 * prices when we subtract the previous literal length.
1096 */
1097 opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1098
1099 /* large match -> immediate encoding */
1100 { U32 const maxML = matches[nbMatches-1].len;
1101 U32 const maxOffcode = matches[nbMatches-1].off;
1102 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
1103 nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
1104
1105 if (maxML > sufficient_len) {
1106 lastSequence.litlen = litlen;
1107 lastSequence.mlen = maxML;
1108 lastSequence.off = maxOffcode;
1109 DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1110 maxML, sufficient_len);
1111 cur = 0;
1112 last_pos = ZSTD_totalLen(lastSequence);
1113 goto _shortestPath;
1114 } }
1115
1116 /* set prices for first matches starting position == 0 */
1117 assert(opt[0].price >= 0);
1118 { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1119 U32 pos;
1120 U32 matchNb;
1121 for (pos = 1; pos < minMatch; pos++) {
1122 opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
1123 }
1124 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1125 U32 const offcode = matches[matchNb].off;
1126 U32 const end = matches[matchNb].len;
1127 for ( ; pos <= end ; pos++ ) {
1128 U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
1129 U32 const sequencePrice = literalsPrice + matchPrice;
1130 DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1131 pos, ZSTD_fCost(sequencePrice));
1132 opt[pos].mlen = pos;
1133 opt[pos].off = offcode;
1134 opt[pos].litlen = litlen;
1135 opt[pos].price = (int)sequencePrice;
1136 } }
1137 last_pos = pos-1;
1138 }
1139 }
1140
1141 /* check further positions */
1142 for (cur = 1; cur <= last_pos; cur++) {
1143 const BYTE* const inr = ip + cur;
1144 assert(cur < ZSTD_OPT_NUM);
1145 DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
1146
1147 /* Fix current position with one literal if cheaper */
1148 { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1149 int const price = opt[cur-1].price
1150 + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1151 + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1152 - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1153 assert(price < 1000000000); /* overflow check */
1154 if (price <= opt[cur].price) {
1155 DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1156 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1157 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1158 opt[cur].mlen = 0;
1159 opt[cur].off = 0;
1160 opt[cur].litlen = litlen;
1161 opt[cur].price = price;
1162 } else {
1163 DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
1164 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
1165 opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
1166 }
1167 }
1168
1169 /* Set the repcodes of the current position. We must do it here
1170 * because we rely on the repcodes of the 2nd to last sequence being
1171 * correct to set the next chunks repcodes during the backward
1172 * traversal.
1173 */
1174 ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1175 assert(cur >= opt[cur].mlen);
1176 if (opt[cur].mlen != 0) {
1177 U32 const prev = cur - opt[cur].mlen;
1178 repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1179 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1180 } else {
1181 ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1182 }
1183
1184 /* last match must start at a minimum distance of 8 from oend */
1185 if (inr > ilimit) continue;
1186
1187 if (cur == last_pos) break;
1188
1189 if ( (optLevel==0) /*static_test*/
1190 && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1191 DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
1192 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1193 }
1194
1195 assert(opt[cur].price >= 0);
1196 { U32 const ll0 = (opt[cur].mlen != 0);
1197 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1198 U32 const previousPrice = (U32)opt[cur].price;
1199 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1200 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1201 U32 matchNb;
1202
1203 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1204 (U32)(inr-istart), (U32)(iend-inr));
1205
1206 if (!nbMatches) {
1207 DEBUGLOG(7, "rPos:%u : no match found", cur);
1208 continue;
1209 }
1210
1211 { U32 const maxML = matches[nbMatches-1].len;
1212 DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
1213 inr-istart, cur, nbMatches, maxML);
1214
1215 if ( (maxML > sufficient_len)
1216 || (cur + maxML >= ZSTD_OPT_NUM) ) {
1217 lastSequence.mlen = maxML;
1218 lastSequence.off = matches[nbMatches-1].off;
1219 lastSequence.litlen = litlen;
1220 cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
1221 last_pos = cur + ZSTD_totalLen(lastSequence);
1222 if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
1223 goto _shortestPath;
1224 } }
1225
1226 /* set prices using matches found at position == cur */
1227 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1228 U32 const offset = matches[matchNb].off;
1229 U32 const lastML = matches[matchNb].len;
1230 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1231 U32 mlen;
1232
1233 DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
1234 matchNb, matches[matchNb].off, lastML, litlen);
1235
1236 for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1237 U32 const pos = cur + mlen;
1238 int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1239
1240 if ((pos > last_pos) || (price < opt[pos].price)) {
1241 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1242 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1243 while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
1244 opt[pos].mlen = mlen;
1245 opt[pos].off = offset;
1246 opt[pos].litlen = litlen;
1247 opt[pos].price = price;
1248 } else {
1249 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1250 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1251 if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1252 }
1253 } } }
1254 } /* for (cur = 1; cur <= last_pos; cur++) */
1255
1256 lastSequence = opt[last_pos];
1257 cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
1258 assert(cur < ZSTD_OPT_NUM); /* control overflow*/
1259
1260 _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1261 assert(opt[0].mlen == 0);
1262
1263 /* Set the next chunk's repcodes based on the repcodes of the beginning
1264 * of the last match, and the last sequence. This avoids us having to
1265 * update them while traversing the sequences.
1266 */
1267 if (lastSequence.mlen != 0) {
1268 repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1269 ZSTD_memcpy(rep, &reps, sizeof(reps));
1270 } else {
1271 ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1272 }
1273
1274 { U32 const storeEnd = cur + 1;
1275 U32 storeStart = storeEnd;
1276 U32 seqPos = cur;
1277
1278 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1279 last_pos, cur); (void)last_pos;
1280 assert(storeEnd < ZSTD_OPT_NUM);
1281 DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1282 storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
1283 opt[storeEnd] = lastSequence;
1284 while (seqPos > 0) {
1285 U32 const backDist = ZSTD_totalLen(opt[seqPos]);
1286 storeStart--;
1287 DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1288 seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
1289 opt[storeStart] = opt[seqPos];
1290 seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
1291 }
1292
1293 /* save sequences */
1294 DEBUGLOG(6, "sending selected sequences into seqStore")
1295 { U32 storePos;
1296 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1297 U32 const llen = opt[storePos].litlen;
1298 U32 const mlen = opt[storePos].mlen;
1299 U32 const offCode = opt[storePos].off;
1300 U32 const advance = llen + mlen;
1301 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1302 anchor - istart, (unsigned)llen, (unsigned)mlen);
1303
1304 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
1305 assert(storePos == storeEnd); /* must be last sequence */
1306 ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
1307 continue; /* will finish */
1308 }
1309
1310 assert(anchor + llen <= iend);
1311 ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1312 ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
1313 anchor += advance;
1314 ip = anchor;
1315 } }
1316 ZSTD_setBasePrices(optStatePtr, optLevel);
1317 }
1318 } /* while (ip < ilimit) */
1319
1320 /* Return the last literals size */
1321 return (size_t)(iend - anchor);
1322 }
1323
ZSTD_compressBlock_opt0(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const ZSTD_dictMode_e dictMode)1324 static size_t ZSTD_compressBlock_opt0(
1325 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1326 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1327 {
1328 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1329 }
1330
ZSTD_compressBlock_opt2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const ZSTD_dictMode_e dictMode)1331 static size_t ZSTD_compressBlock_opt2(
1332 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1333 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1334 {
1335 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1336 }
1337
ZSTD_compressBlock_btopt(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1338 size_t ZSTD_compressBlock_btopt(
1339 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1340 const void* src, size_t srcSize)
1341 {
1342 DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1343 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1344 }
1345
1346
1347
1348
1349 /* ZSTD_initStats_ultra():
1350 * make a first compression pass, just to seed stats with more accurate starting values.
1351 * only works on first block, with no dictionary and no ldm.
1352 * this function cannot error, hence its contract must be respected.
1353 */
1354 static void
ZSTD_initStats_ultra(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1355 ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1356 seqStore_t* seqStore,
1357 U32 rep[ZSTD_REP_NUM],
1358 const void* src, size_t srcSize)
1359 {
1360 U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1361 ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1362
1363 DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1364 assert(ms->opt.litLengthSum == 0); /* first block */
1365 assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
1366 assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1367 assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1368
1369 ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1370
1371 /* invalidate first scan from history */
1372 ZSTD_resetSeqStore(seqStore);
1373 ms->window.base -= srcSize;
1374 ms->window.dictLimit += (U32)srcSize;
1375 ms->window.lowLimit = ms->window.dictLimit;
1376 ms->nextToUpdate = ms->window.dictLimit;
1377
1378 }
1379
ZSTD_compressBlock_btultra(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1380 size_t ZSTD_compressBlock_btultra(
1381 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1382 const void* src, size_t srcSize)
1383 {
1384 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1385 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1386 }
1387
ZSTD_compressBlock_btultra2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1388 size_t ZSTD_compressBlock_btultra2(
1389 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1390 const void* src, size_t srcSize)
1391 {
1392 U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1393 DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1394
1395 /* 2-pass strategy:
1396 * this strategy makes a first pass over first block to collect statistics
1397 * and seed next round's statistics with it.
1398 * After 1st pass, function forgets everything, and starts a new block.
1399 * Consequently, this can only work if no data has been previously loaded in tables,
1400 * aka, no dictionary, no prefix, no ldm preprocessing.
1401 * The compression ratio gain is generally small (~0.5% on first block),
1402 * the cost is 2x cpu time on first block. */
1403 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1404 if ( (ms->opt.litLengthSum==0) /* first block */
1405 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1406 && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1407 && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1408 && (srcSize > ZSTD_PREDEF_THRESHOLD)
1409 ) {
1410 ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1411 }
1412
1413 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1414 }
1415
ZSTD_compressBlock_btopt_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1416 size_t ZSTD_compressBlock_btopt_dictMatchState(
1417 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1418 const void* src, size_t srcSize)
1419 {
1420 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1421 }
1422
ZSTD_compressBlock_btultra_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1423 size_t ZSTD_compressBlock_btultra_dictMatchState(
1424 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1425 const void* src, size_t srcSize)
1426 {
1427 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1428 }
1429
ZSTD_compressBlock_btopt_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1430 size_t ZSTD_compressBlock_btopt_extDict(
1431 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1432 const void* src, size_t srcSize)
1433 {
1434 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1435 }
1436
ZSTD_compressBlock_btultra_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1437 size_t ZSTD_compressBlock_btultra_extDict(
1438 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1439 const void* src, size_t srcSize)
1440 {
1441 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1442 }
1443
1444 /* note : no btultra2 variant for extDict nor dictMatchState,
1445 * because btultra2 is not meant to work with dictionaries
1446 * and is only specific for the first block (no prefix) */
1447