1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
2 /*
3 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
4 * All rights reserved.
5 *
6 * This source code is licensed under both the BSD-style license (found in the
7 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8 * in the COPYING file in the root directory of this source tree).
9 * You may select, at your option, one of the above-listed licenses.
10 */
11
12 #include "zstd_compress_internal.h"
13 #include "zstd_lazy.h"
14
15
16 /*-*************************************
17 * Binary Tree search
18 ***************************************/
19
20 static void
ZSTD_updateDUBT(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * iend,U32 mls)21 ZSTD_updateDUBT(ZSTD_matchState_t* ms,
22 const BYTE* ip, const BYTE* iend,
23 U32 mls)
24 {
25 const ZSTD_compressionParameters* const cParams = &ms->cParams;
26 U32* const hashTable = ms->hashTable;
27 U32 const hashLog = cParams->hashLog;
28
29 U32* const bt = ms->chainTable;
30 U32 const btLog = cParams->chainLog - 1;
31 U32 const btMask = (1 << btLog) - 1;
32
33 const BYTE* const base = ms->window.base;
34 U32 const target = (U32)(ip - base);
35 U32 idx = ms->nextToUpdate;
36
37 if (idx != target)
38 DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
39 idx, target, ms->window.dictLimit);
40 assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */
41 (void)iend;
42
43 assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */
44 for ( ; idx < target ; idx++) {
45 size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */
46 U32 const matchIndex = hashTable[h];
47
48 U32* const nextCandidatePtr = bt + 2*(idx&btMask);
49 U32* const sortMarkPtr = nextCandidatePtr + 1;
50
51 DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
52 hashTable[h] = idx; /* Update Hash Table */
53 *nextCandidatePtr = matchIndex; /* update BT like a chain */
54 *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
55 }
56 ms->nextToUpdate = target;
57 }
58
59
60 /** ZSTD_insertDUBT1() :
61 * sort one already inserted but unsorted position
62 * assumption : current >= btlow == (current - btmask)
63 * doesn't fail */
64 static void
ZSTD_insertDUBT1(ZSTD_matchState_t * ms,U32 current,const BYTE * inputEnd,U32 nbCompares,U32 btLow,const ZSTD_dictMode_e dictMode)65 ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
66 U32 current, const BYTE* inputEnd,
67 U32 nbCompares, U32 btLow,
68 const ZSTD_dictMode_e dictMode)
69 {
70 const ZSTD_compressionParameters* const cParams = &ms->cParams;
71 U32* const bt = ms->chainTable;
72 U32 const btLog = cParams->chainLog - 1;
73 U32 const btMask = (1 << btLog) - 1;
74 size_t commonLengthSmaller=0, commonLengthLarger=0;
75 const BYTE* const base = ms->window.base;
76 const BYTE* const dictBase = ms->window.dictBase;
77 const U32 dictLimit = ms->window.dictLimit;
78 const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
79 const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
80 const BYTE* const dictEnd = dictBase + dictLimit;
81 const BYTE* const prefixStart = base + dictLimit;
82 const BYTE* match;
83 U32* smallerPtr = bt + 2*(current&btMask);
84 U32* largerPtr = smallerPtr + 1;
85 U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
86 U32 dummy32; /* to be nullified at the end */
87 U32 const windowValid = ms->window.lowLimit;
88 U32 const maxDistance = 1U << cParams->windowLog;
89 U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
90
91
92 DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
93 current, dictLimit, windowLow);
94 assert(current >= btLow);
95 assert(ip < iend); /* condition for ZSTD_count */
96
97 while (nbCompares-- && (matchIndex > windowLow)) {
98 U32* const nextPtr = bt + 2*(matchIndex & btMask);
99 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
100 assert(matchIndex < current);
101 /* note : all candidates are now supposed sorted,
102 * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
103 * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
104
105 if ( (dictMode != ZSTD_extDict)
106 || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
107 || (current < dictLimit) /* both in extDict */) {
108 const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
109 || (matchIndex+matchLength >= dictLimit)) ?
110 base : dictBase;
111 assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
112 || (current < dictLimit) );
113 match = mBase + matchIndex;
114 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
115 } else {
116 match = dictBase + matchIndex;
117 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
118 if (matchIndex+matchLength >= dictLimit)
119 match = base + matchIndex; /* preparation for next read of match[matchLength] */
120 }
121
122 DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
123 current, matchIndex, (U32)matchLength);
124
125 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
126 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
127 }
128
129 if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
130 /* match is smaller than current */
131 *smallerPtr = matchIndex; /* update smaller idx */
132 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
133 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
134 DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
135 matchIndex, btLow, nextPtr[1]);
136 smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
137 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
138 } else {
139 /* match is larger than current */
140 *largerPtr = matchIndex;
141 commonLengthLarger = matchLength;
142 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
143 DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
144 matchIndex, btLow, nextPtr[0]);
145 largerPtr = nextPtr;
146 matchIndex = nextPtr[0];
147 } }
148
149 *smallerPtr = *largerPtr = 0;
150 }
151
152
153 static size_t
ZSTD_DUBT_findBetterDictMatch(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,size_t * offsetPtr,size_t bestLength,U32 nbCompares,U32 const mls,const ZSTD_dictMode_e dictMode)154 ZSTD_DUBT_findBetterDictMatch (
155 ZSTD_matchState_t* ms,
156 const BYTE* const ip, const BYTE* const iend,
157 size_t* offsetPtr,
158 size_t bestLength,
159 U32 nbCompares,
160 U32 const mls,
161 const ZSTD_dictMode_e dictMode)
162 {
163 const ZSTD_matchState_t * const dms = ms->dictMatchState;
164 const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
165 const U32 * const dictHashTable = dms->hashTable;
166 U32 const hashLog = dmsCParams->hashLog;
167 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
168 U32 dictMatchIndex = dictHashTable[h];
169
170 const BYTE* const base = ms->window.base;
171 const BYTE* const prefixStart = base + ms->window.dictLimit;
172 U32 const current = (U32)(ip-base);
173 const BYTE* const dictBase = dms->window.base;
174 const BYTE* const dictEnd = dms->window.nextSrc;
175 U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
176 U32 const dictLowLimit = dms->window.lowLimit;
177 U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
178
179 U32* const dictBt = dms->chainTable;
180 U32 const btLog = dmsCParams->chainLog - 1;
181 U32 const btMask = (1 << btLog) - 1;
182 U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
183
184 size_t commonLengthSmaller=0, commonLengthLarger=0;
185
186 (void)dictMode;
187 assert(dictMode == ZSTD_dictMatchState);
188
189 while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
190 U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
191 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
192 const BYTE* match = dictBase + dictMatchIndex;
193 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
194 if (dictMatchIndex+matchLength >= dictHighLimit)
195 match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */
196
197 if (matchLength > bestLength) {
198 U32 matchIndex = dictMatchIndex + dictIndexDelta;
199 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
200 DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
201 current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
202 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
203 }
204 if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
205 break; /* drop, to guarantee consistency (miss a little bit of compression) */
206 }
207 }
208
209 if (match[matchLength] < ip[matchLength]) {
210 if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
211 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
212 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
213 } else {
214 /* match is larger than current */
215 if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
216 commonLengthLarger = matchLength;
217 dictMatchIndex = nextPtr[0];
218 }
219 }
220
221 if (bestLength >= MINMATCH) {
222 U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
223 DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
224 current, (U32)bestLength, (U32)*offsetPtr, mIndex);
225 }
226 return bestLength;
227
228 }
229
230
231 static size_t
ZSTD_DUBT_findBestMatch(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,size_t * offsetPtr,U32 const mls,const ZSTD_dictMode_e dictMode)232 ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
233 const BYTE* const ip, const BYTE* const iend,
234 size_t* offsetPtr,
235 U32 const mls,
236 const ZSTD_dictMode_e dictMode)
237 {
238 const ZSTD_compressionParameters* const cParams = &ms->cParams;
239 U32* const hashTable = ms->hashTable;
240 U32 const hashLog = cParams->hashLog;
241 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
242 U32 matchIndex = hashTable[h];
243
244 const BYTE* const base = ms->window.base;
245 U32 const current = (U32)(ip-base);
246 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
247
248 U32* const bt = ms->chainTable;
249 U32 const btLog = cParams->chainLog - 1;
250 U32 const btMask = (1 << btLog) - 1;
251 U32 const btLow = (btMask >= current) ? 0 : current - btMask;
252 U32 const unsortLimit = MAX(btLow, windowLow);
253
254 U32* nextCandidate = bt + 2*(matchIndex&btMask);
255 U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1;
256 U32 nbCompares = 1U << cParams->searchLog;
257 U32 nbCandidates = nbCompares;
258 U32 previousCandidate = 0;
259
260 DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
261 assert(ip <= iend-8); /* required for h calculation */
262
263 /* reach end of unsorted candidates list */
264 while ( (matchIndex > unsortLimit)
265 && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
266 && (nbCandidates > 1) ) {
267 DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
268 matchIndex);
269 *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */
270 previousCandidate = matchIndex;
271 matchIndex = *nextCandidate;
272 nextCandidate = bt + 2*(matchIndex&btMask);
273 unsortedMark = bt + 2*(matchIndex&btMask) + 1;
274 nbCandidates --;
275 }
276
277 /* nullify last candidate if it's still unsorted
278 * simplification, detrimental to compression ratio, beneficial for speed */
279 if ( (matchIndex > unsortLimit)
280 && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
281 DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
282 matchIndex);
283 *nextCandidate = *unsortedMark = 0;
284 }
285
286 /* batch sort stacked candidates */
287 matchIndex = previousCandidate;
288 while (matchIndex) { /* will end on matchIndex == 0 */
289 U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
290 U32 const nextCandidateIdx = *nextCandidateIdxPtr;
291 ZSTD_insertDUBT1(ms, matchIndex, iend,
292 nbCandidates, unsortLimit, dictMode);
293 matchIndex = nextCandidateIdx;
294 nbCandidates++;
295 }
296
297 /* find longest match */
298 { size_t commonLengthSmaller = 0, commonLengthLarger = 0;
299 const BYTE* const dictBase = ms->window.dictBase;
300 const U32 dictLimit = ms->window.dictLimit;
301 const BYTE* const dictEnd = dictBase + dictLimit;
302 const BYTE* const prefixStart = base + dictLimit;
303 U32* smallerPtr = bt + 2*(current&btMask);
304 U32* largerPtr = bt + 2*(current&btMask) + 1;
305 U32 matchEndIdx = current + 8 + 1;
306 U32 dummy32; /* to be nullified at the end */
307 size_t bestLength = 0;
308
309 matchIndex = hashTable[h];
310 hashTable[h] = current; /* Update Hash Table */
311
312 while (nbCompares-- && (matchIndex > windowLow)) {
313 U32* const nextPtr = bt + 2*(matchIndex & btMask);
314 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
315 const BYTE* match;
316
317 if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
318 match = base + matchIndex;
319 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
320 } else {
321 match = dictBase + matchIndex;
322 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
323 if (matchIndex+matchLength >= dictLimit)
324 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
325 }
326
327 if (matchLength > bestLength) {
328 if (matchLength > matchEndIdx - matchIndex)
329 matchEndIdx = matchIndex + (U32)matchLength;
330 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
331 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
332 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
333 if (dictMode == ZSTD_dictMatchState) {
334 nbCompares = 0; /* in addition to avoiding checking any
335 * further in this loop, make sure we
336 * skip checking in the dictionary. */
337 }
338 break; /* drop, to guarantee consistency (miss a little bit of compression) */
339 }
340 }
341
342 if (match[matchLength] < ip[matchLength]) {
343 /* match is smaller than current */
344 *smallerPtr = matchIndex; /* update smaller idx */
345 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
346 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
347 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
348 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
349 } else {
350 /* match is larger than current */
351 *largerPtr = matchIndex;
352 commonLengthLarger = matchLength;
353 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
354 largerPtr = nextPtr;
355 matchIndex = nextPtr[0];
356 } }
357
358 *smallerPtr = *largerPtr = 0;
359
360 if (dictMode == ZSTD_dictMatchState && nbCompares) {
361 bestLength = ZSTD_DUBT_findBetterDictMatch(
362 ms, ip, iend,
363 offsetPtr, bestLength, nbCompares,
364 mls, dictMode);
365 }
366
367 assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
368 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
369 if (bestLength >= MINMATCH) {
370 U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
371 DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
372 current, (U32)bestLength, (U32)*offsetPtr, mIndex);
373 }
374 return bestLength;
375 }
376 }
377
378
379 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
380 FORCE_INLINE_TEMPLATE size_t
ZSTD_BtFindBestMatch(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iLimit,size_t * offsetPtr,const U32 mls,const ZSTD_dictMode_e dictMode)381 ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
382 const BYTE* const ip, const BYTE* const iLimit,
383 size_t* offsetPtr,
384 const U32 mls /* template */,
385 const ZSTD_dictMode_e dictMode)
386 {
387 DEBUGLOG(7, "ZSTD_BtFindBestMatch");
388 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
389 ZSTD_updateDUBT(ms, ip, iLimit, mls);
390 return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
391 }
392
393
394 static size_t
ZSTD_BtFindBestMatch_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)395 ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,
396 const BYTE* ip, const BYTE* const iLimit,
397 size_t* offsetPtr)
398 {
399 switch(ms->cParams.minMatch)
400 {
401 default : /* includes case 3 */
402 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
403 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
404 case 7 :
405 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
406 }
407 }
408
409
ZSTD_BtFindBestMatch_dictMatchState_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)410 static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
411 ZSTD_matchState_t* ms,
412 const BYTE* ip, const BYTE* const iLimit,
413 size_t* offsetPtr)
414 {
415 switch(ms->cParams.minMatch)
416 {
417 default : /* includes case 3 */
418 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
419 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
420 case 7 :
421 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
422 }
423 }
424
425
ZSTD_BtFindBestMatch_extDict_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)426 static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
427 ZSTD_matchState_t* ms,
428 const BYTE* ip, const BYTE* const iLimit,
429 size_t* offsetPtr)
430 {
431 switch(ms->cParams.minMatch)
432 {
433 default : /* includes case 3 */
434 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
435 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
436 case 7 :
437 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
438 }
439 }
440
441
442
443 /* *********************************
444 * Hash Chain
445 ***********************************/
446 #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
447
448 /* Update chains up to ip (excluded)
449 Assumption : always within prefix (i.e. not within extDict) */
ZSTD_insertAndFindFirstIndex_internal(ZSTD_matchState_t * ms,const ZSTD_compressionParameters * const cParams,const BYTE * ip,U32 const mls)450 static U32 ZSTD_insertAndFindFirstIndex_internal(
451 ZSTD_matchState_t* ms,
452 const ZSTD_compressionParameters* const cParams,
453 const BYTE* ip, U32 const mls)
454 {
455 U32* const hashTable = ms->hashTable;
456 const U32 hashLog = cParams->hashLog;
457 U32* const chainTable = ms->chainTable;
458 const U32 chainMask = (1 << cParams->chainLog) - 1;
459 const BYTE* const base = ms->window.base;
460 const U32 target = (U32)(ip - base);
461 U32 idx = ms->nextToUpdate;
462
463 while(idx < target) { /* catch up */
464 size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
465 NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
466 hashTable[h] = idx;
467 idx++;
468 }
469
470 ms->nextToUpdate = target;
471 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
472 }
473
ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t * ms,const BYTE * ip)474 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
475 const ZSTD_compressionParameters* const cParams = &ms->cParams;
476 return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
477 }
478
479
480 /* inlining is important to hardwire a hot branch (template emulation) */
481 FORCE_INLINE_TEMPLATE
ZSTD_HcFindBestMatch_generic(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iLimit,size_t * offsetPtr,const U32 mls,const ZSTD_dictMode_e dictMode)482 size_t ZSTD_HcFindBestMatch_generic (
483 ZSTD_matchState_t* ms,
484 const BYTE* const ip, const BYTE* const iLimit,
485 size_t* offsetPtr,
486 const U32 mls, const ZSTD_dictMode_e dictMode)
487 {
488 const ZSTD_compressionParameters* const cParams = &ms->cParams;
489 U32* const chainTable = ms->chainTable;
490 const U32 chainSize = (1 << cParams->chainLog);
491 const U32 chainMask = chainSize-1;
492 const BYTE* const base = ms->window.base;
493 const BYTE* const dictBase = ms->window.dictBase;
494 const U32 dictLimit = ms->window.dictLimit;
495 const BYTE* const prefixStart = base + dictLimit;
496 const BYTE* const dictEnd = dictBase + dictLimit;
497 const U32 current = (U32)(ip-base);
498 const U32 maxDistance = 1U << cParams->windowLog;
499 const U32 lowestValid = ms->window.lowLimit;
500 const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
501 const U32 isDictionary = (ms->loadedDictEnd != 0);
502 const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
503 const U32 minChain = current > chainSize ? current - chainSize : 0;
504 U32 nbAttempts = 1U << cParams->searchLog;
505 size_t ml=4-1;
506
507 /* HC4 match finder */
508 U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
509
510 for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
511 size_t currentMl=0;
512 if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
513 const BYTE* const match = base + matchIndex;
514 assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
515 if (match[ml] == ip[ml]) /* potentially better */
516 currentMl = ZSTD_count(ip, match, iLimit);
517 } else {
518 const BYTE* const match = dictBase + matchIndex;
519 assert(match+4 <= dictEnd);
520 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
521 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
522 }
523
524 /* save best solution */
525 if (currentMl > ml) {
526 ml = currentMl;
527 *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
528 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
529 }
530
531 if (matchIndex <= minChain) break;
532 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
533 }
534
535 if (dictMode == ZSTD_dictMatchState) {
536 const ZSTD_matchState_t* const dms = ms->dictMatchState;
537 const U32* const dmsChainTable = dms->chainTable;
538 const U32 dmsChainSize = (1 << dms->cParams.chainLog);
539 const U32 dmsChainMask = dmsChainSize - 1;
540 const U32 dmsLowestIndex = dms->window.dictLimit;
541 const BYTE* const dmsBase = dms->window.base;
542 const BYTE* const dmsEnd = dms->window.nextSrc;
543 const U32 dmsSize = (U32)(dmsEnd - dmsBase);
544 const U32 dmsIndexDelta = dictLimit - dmsSize;
545 const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
546
547 matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
548
549 for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
550 size_t currentMl=0;
551 const BYTE* const match = dmsBase + matchIndex;
552 assert(match+4 <= dmsEnd);
553 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
554 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
555
556 /* save best solution */
557 if (currentMl > ml) {
558 ml = currentMl;
559 *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
560 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
561 }
562
563 if (matchIndex <= dmsMinChain) break;
564 matchIndex = dmsChainTable[matchIndex & dmsChainMask];
565 }
566 }
567
568 return ml;
569 }
570
571
ZSTD_HcFindBestMatch_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)572 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
573 ZSTD_matchState_t* ms,
574 const BYTE* ip, const BYTE* const iLimit,
575 size_t* offsetPtr)
576 {
577 switch(ms->cParams.minMatch)
578 {
579 default : /* includes case 3 */
580 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
581 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
582 case 7 :
583 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
584 }
585 }
586
587
ZSTD_HcFindBestMatch_dictMatchState_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)588 static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
589 ZSTD_matchState_t* ms,
590 const BYTE* ip, const BYTE* const iLimit,
591 size_t* offsetPtr)
592 {
593 switch(ms->cParams.minMatch)
594 {
595 default : /* includes case 3 */
596 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
597 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
598 case 7 :
599 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
600 }
601 }
602
603
ZSTD_HcFindBestMatch_extDict_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)604 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
605 ZSTD_matchState_t* ms,
606 const BYTE* ip, const BYTE* const iLimit,
607 size_t* offsetPtr)
608 {
609 switch(ms->cParams.minMatch)
610 {
611 default : /* includes case 3 */
612 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
613 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
614 case 7 :
615 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
616 }
617 }
618
619
620 /* *******************************
621 * Common parser - lazy strategy
622 *********************************/
623 typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
624
625 FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_lazy_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const searchMethod_e searchMethod,const U32 depth,ZSTD_dictMode_e const dictMode)626 ZSTD_compressBlock_lazy_generic(
627 ZSTD_matchState_t* ms, seqStore_t* seqStore,
628 U32 rep[ZSTD_REP_NUM],
629 const void* src, size_t srcSize,
630 const searchMethod_e searchMethod, const U32 depth,
631 ZSTD_dictMode_e const dictMode)
632 {
633 const BYTE* const istart = (const BYTE*)src;
634 const BYTE* ip = istart;
635 const BYTE* anchor = istart;
636 const BYTE* const iend = istart + srcSize;
637 const BYTE* const ilimit = iend - 8;
638 const BYTE* const base = ms->window.base;
639 const U32 prefixLowestIndex = ms->window.dictLimit;
640 const BYTE* const prefixLowest = base + prefixLowestIndex;
641
642 typedef size_t (*searchMax_f)(
643 ZSTD_matchState_t* ms,
644 const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
645 searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
646 (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
647 : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
648 (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
649 : ZSTD_HcFindBestMatch_selectMLS);
650 U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
651
652 const ZSTD_matchState_t* const dms = ms->dictMatchState;
653 const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
654 dms->window.dictLimit : 0;
655 const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
656 dms->window.base : NULL;
657 const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
658 dictBase + dictLowestIndex : NULL;
659 const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
660 dms->window.nextSrc : NULL;
661 const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
662 prefixLowestIndex - (U32)(dictEnd - dictBase) :
663 0;
664 const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
665
666 DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
667
668 /* init */
669 ip += (dictAndPrefixLength == 0);
670 if (dictMode == ZSTD_noDict) {
671 U32 const current = (U32)(ip - base);
672 U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
673 U32 const maxRep = current - windowLow;
674 if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
675 if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
676 }
677 if (dictMode == ZSTD_dictMatchState) {
678 /* dictMatchState repCode checks don't currently handle repCode == 0
679 * disabling. */
680 assert(offset_1 <= dictAndPrefixLength);
681 assert(offset_2 <= dictAndPrefixLength);
682 }
683
684 /* Match Loop */
685 #if defined(__GNUC__) && defined(__x86_64__)
686 /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
687 * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
688 */
689 __asm__(".p2align 5");
690 #endif
691 while (ip < ilimit) {
692 size_t matchLength=0;
693 size_t offset=0;
694 const BYTE* start=ip+1;
695
696 /* check repCode */
697 if (dictMode == ZSTD_dictMatchState) {
698 const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
699 const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
700 && repIndex < prefixLowestIndex) ?
701 dictBase + (repIndex - dictIndexDelta) :
702 base + repIndex;
703 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
704 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
705 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
706 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
707 if (depth==0) goto _storeSequence;
708 }
709 }
710 if ( dictMode == ZSTD_noDict
711 && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
712 matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
713 if (depth==0) goto _storeSequence;
714 }
715
716 /* first search (depth 0) */
717 { size_t offsetFound = 999999999;
718 size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
719 if (ml2 > matchLength)
720 matchLength = ml2, start = ip, offset=offsetFound;
721 }
722
723 if (matchLength < 4) {
724 ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
725 continue;
726 }
727
728 /* let's try to find a better solution */
729 if (depth>=1)
730 while (ip<ilimit) {
731 ip ++;
732 if ( (dictMode == ZSTD_noDict)
733 && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
734 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
735 int const gain2 = (int)(mlRep * 3);
736 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
737 if ((mlRep >= 4) && (gain2 > gain1))
738 matchLength = mlRep, offset = 0, start = ip;
739 }
740 if (dictMode == ZSTD_dictMatchState) {
741 const U32 repIndex = (U32)(ip - base) - offset_1;
742 const BYTE* repMatch = repIndex < prefixLowestIndex ?
743 dictBase + (repIndex - dictIndexDelta) :
744 base + repIndex;
745 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
746 && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
747 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
748 size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
749 int const gain2 = (int)(mlRep * 3);
750 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
751 if ((mlRep >= 4) && (gain2 > gain1))
752 matchLength = mlRep, offset = 0, start = ip;
753 }
754 }
755 { size_t offset2=999999999;
756 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
757 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
758 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
759 if ((ml2 >= 4) && (gain2 > gain1)) {
760 matchLength = ml2, offset = offset2, start = ip;
761 continue; /* search a better one */
762 } }
763
764 /* let's find an even better one */
765 if ((depth==2) && (ip<ilimit)) {
766 ip ++;
767 if ( (dictMode == ZSTD_noDict)
768 && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
769 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
770 int const gain2 = (int)(mlRep * 4);
771 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
772 if ((mlRep >= 4) && (gain2 > gain1))
773 matchLength = mlRep, offset = 0, start = ip;
774 }
775 if (dictMode == ZSTD_dictMatchState) {
776 const U32 repIndex = (U32)(ip - base) - offset_1;
777 const BYTE* repMatch = repIndex < prefixLowestIndex ?
778 dictBase + (repIndex - dictIndexDelta) :
779 base + repIndex;
780 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
781 && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
782 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
783 size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
784 int const gain2 = (int)(mlRep * 4);
785 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
786 if ((mlRep >= 4) && (gain2 > gain1))
787 matchLength = mlRep, offset = 0, start = ip;
788 }
789 }
790 { size_t offset2=999999999;
791 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
792 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
793 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
794 if ((ml2 >= 4) && (gain2 > gain1)) {
795 matchLength = ml2, offset = offset2, start = ip;
796 continue;
797 } } }
798 break; /* nothing found : store previous solution */
799 }
800
801 /* NOTE:
802 * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
803 * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
804 * overflows the pointer, which is undefined behavior.
805 */
806 /* catch up */
807 if (offset) {
808 if (dictMode == ZSTD_noDict) {
809 while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
810 && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
811 { start--; matchLength++; }
812 }
813 if (dictMode == ZSTD_dictMatchState) {
814 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
815 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
816 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
817 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
818 }
819 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
820 }
821 /* store sequence */
822 _storeSequence:
823 { size_t const litLength = start - anchor;
824 ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
825 anchor = ip = start + matchLength;
826 }
827
828 /* check immediate repcode */
829 if (dictMode == ZSTD_dictMatchState) {
830 while (ip <= ilimit) {
831 U32 const current2 = (U32)(ip-base);
832 U32 const repIndex = current2 - offset_2;
833 const BYTE* repMatch = dictMode == ZSTD_dictMatchState
834 && repIndex < prefixLowestIndex ?
835 dictBase - dictIndexDelta + repIndex :
836 base + repIndex;
837 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
838 && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
839 const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
840 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
841 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
842 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
843 ip += matchLength;
844 anchor = ip;
845 continue;
846 }
847 break;
848 }
849 }
850
851 if (dictMode == ZSTD_noDict) {
852 while ( ((ip <= ilimit) & (offset_2>0))
853 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
854 /* store sequence */
855 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
856 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
857 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
858 ip += matchLength;
859 anchor = ip;
860 continue; /* faster when present ... (?) */
861 } } }
862
863 /* Save reps for next block */
864 rep[0] = offset_1 ? offset_1 : savedOffset;
865 rep[1] = offset_2 ? offset_2 : savedOffset;
866
867 /* Return the last literals size */
868 return (size_t)(iend - anchor);
869 }
870
871
ZSTD_compressBlock_btlazy2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)872 size_t ZSTD_compressBlock_btlazy2(
873 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
874 void const* src, size_t srcSize)
875 {
876 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
877 }
878
ZSTD_compressBlock_lazy2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)879 size_t ZSTD_compressBlock_lazy2(
880 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
881 void const* src, size_t srcSize)
882 {
883 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
884 }
885
ZSTD_compressBlock_lazy(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)886 size_t ZSTD_compressBlock_lazy(
887 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
888 void const* src, size_t srcSize)
889 {
890 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
891 }
892
ZSTD_compressBlock_greedy(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)893 size_t ZSTD_compressBlock_greedy(
894 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
895 void const* src, size_t srcSize)
896 {
897 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
898 }
899
ZSTD_compressBlock_btlazy2_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)900 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
901 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
902 void const* src, size_t srcSize)
903 {
904 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
905 }
906
ZSTD_compressBlock_lazy2_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)907 size_t ZSTD_compressBlock_lazy2_dictMatchState(
908 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
909 void const* src, size_t srcSize)
910 {
911 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
912 }
913
ZSTD_compressBlock_lazy_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)914 size_t ZSTD_compressBlock_lazy_dictMatchState(
915 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
916 void const* src, size_t srcSize)
917 {
918 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
919 }
920
ZSTD_compressBlock_greedy_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)921 size_t ZSTD_compressBlock_greedy_dictMatchState(
922 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
923 void const* src, size_t srcSize)
924 {
925 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
926 }
927
928
929 FORCE_INLINE_TEMPLATE
ZSTD_compressBlock_lazy_extDict_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const searchMethod_e searchMethod,const U32 depth)930 size_t ZSTD_compressBlock_lazy_extDict_generic(
931 ZSTD_matchState_t* ms, seqStore_t* seqStore,
932 U32 rep[ZSTD_REP_NUM],
933 const void* src, size_t srcSize,
934 const searchMethod_e searchMethod, const U32 depth)
935 {
936 const BYTE* const istart = (const BYTE*)src;
937 const BYTE* ip = istart;
938 const BYTE* anchor = istart;
939 const BYTE* const iend = istart + srcSize;
940 const BYTE* const ilimit = iend - 8;
941 const BYTE* const base = ms->window.base;
942 const U32 dictLimit = ms->window.dictLimit;
943 const BYTE* const prefixStart = base + dictLimit;
944 const BYTE* const dictBase = ms->window.dictBase;
945 const BYTE* const dictEnd = dictBase + dictLimit;
946 const BYTE* const dictStart = dictBase + ms->window.lowLimit;
947 const U32 windowLog = ms->cParams.windowLog;
948
949 typedef size_t (*searchMax_f)(
950 ZSTD_matchState_t* ms,
951 const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
952 searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
953
954 U32 offset_1 = rep[0], offset_2 = rep[1];
955
956 DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
957
958 /* init */
959 ip += (ip == prefixStart);
960
961 /* Match Loop */
962 #if defined(__GNUC__) && defined(__x86_64__)
963 /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
964 * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
965 */
966 __asm__(".p2align 5");
967 #endif
968 while (ip < ilimit) {
969 size_t matchLength=0;
970 size_t offset=0;
971 const BYTE* start=ip+1;
972 U32 current = (U32)(ip-base);
973
974 /* check repCode */
975 { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
976 const U32 repIndex = (U32)(current+1 - offset_1);
977 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
978 const BYTE* const repMatch = repBase + repIndex;
979 if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
980 & (offset_1 < current+1 - windowLow) ) /* note: we are searching at current+1 */
981 if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
982 /* repcode detected we should take it */
983 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
984 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
985 if (depth==0) goto _storeSequence;
986 } }
987
988 /* first search (depth 0) */
989 { size_t offsetFound = 999999999;
990 size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
991 if (ml2 > matchLength)
992 matchLength = ml2, start = ip, offset=offsetFound;
993 }
994
995 if (matchLength < 4) {
996 ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
997 continue;
998 }
999
1000 /* let's try to find a better solution */
1001 if (depth>=1)
1002 while (ip<ilimit) {
1003 ip ++;
1004 current++;
1005 /* check repCode */
1006 if (offset) {
1007 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1008 const U32 repIndex = (U32)(current - offset_1);
1009 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1010 const BYTE* const repMatch = repBase + repIndex;
1011 if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
1012 & (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */
1013 if (MEM_read32(ip) == MEM_read32(repMatch)) {
1014 /* repcode detected */
1015 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1016 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1017 int const gain2 = (int)(repLength * 3);
1018 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1019 if ((repLength >= 4) && (gain2 > gain1))
1020 matchLength = repLength, offset = 0, start = ip;
1021 } }
1022
1023 /* search match, depth 1 */
1024 { size_t offset2=999999999;
1025 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1026 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1027 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
1028 if ((ml2 >= 4) && (gain2 > gain1)) {
1029 matchLength = ml2, offset = offset2, start = ip;
1030 continue; /* search a better one */
1031 } }
1032
1033 /* let's find an even better one */
1034 if ((depth==2) && (ip<ilimit)) {
1035 ip ++;
1036 current++;
1037 /* check repCode */
1038 if (offset) {
1039 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1040 const U32 repIndex = (U32)(current - offset_1);
1041 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1042 const BYTE* const repMatch = repBase + repIndex;
1043 if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
1044 & (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */
1045 if (MEM_read32(ip) == MEM_read32(repMatch)) {
1046 /* repcode detected */
1047 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1048 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1049 int const gain2 = (int)(repLength * 4);
1050 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1051 if ((repLength >= 4) && (gain2 > gain1))
1052 matchLength = repLength, offset = 0, start = ip;
1053 } }
1054
1055 /* search match, depth 2 */
1056 { size_t offset2=999999999;
1057 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1058 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1059 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
1060 if ((ml2 >= 4) && (gain2 > gain1)) {
1061 matchLength = ml2, offset = offset2, start = ip;
1062 continue;
1063 } } }
1064 break; /* nothing found : store previous solution */
1065 }
1066
1067 /* catch up */
1068 if (offset) {
1069 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
1070 const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
1071 const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
1072 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
1073 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
1074 }
1075
1076 /* store sequence */
1077 _storeSequence:
1078 { size_t const litLength = start - anchor;
1079 ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
1080 anchor = ip = start + matchLength;
1081 }
1082
1083 /* check immediate repcode */
1084 while (ip <= ilimit) {
1085 const U32 repCurrent = (U32)(ip-base);
1086 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
1087 const U32 repIndex = repCurrent - offset_2;
1088 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1089 const BYTE* const repMatch = repBase + repIndex;
1090 if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
1091 & (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
1092 if (MEM_read32(ip) == MEM_read32(repMatch)) {
1093 /* repcode detected we should take it */
1094 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1095 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1096 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
1097 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1098 ip += matchLength;
1099 anchor = ip;
1100 continue; /* faster when present ... (?) */
1101 }
1102 break;
1103 } }
1104
1105 /* Save reps for next block */
1106 rep[0] = offset_1;
1107 rep[1] = offset_2;
1108
1109 /* Return the last literals size */
1110 return (size_t)(iend - anchor);
1111 }
1112
1113
ZSTD_compressBlock_greedy_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1114 size_t ZSTD_compressBlock_greedy_extDict(
1115 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1116 void const* src, size_t srcSize)
1117 {
1118 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
1119 }
1120
ZSTD_compressBlock_lazy_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1121 size_t ZSTD_compressBlock_lazy_extDict(
1122 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1123 void const* src, size_t srcSize)
1124
1125 {
1126 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
1127 }
1128
ZSTD_compressBlock_lazy2_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1129 size_t ZSTD_compressBlock_lazy2_extDict(
1130 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1131 void const* src, size_t srcSize)
1132
1133 {
1134 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
1135 }
1136
ZSTD_compressBlock_btlazy2_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1137 size_t ZSTD_compressBlock_btlazy2_extDict(
1138 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1139 void const* src, size_t srcSize)
1140
1141 {
1142 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
1143 }
1144