1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
2 /*
3 * Copyright (c) Meta Platforms, Inc. and affiliates.
4 * All rights reserved.
5 *
6 * This source code is licensed under both the BSD-style license (found in the
7 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8 * in the COPYING file in the root directory of this source tree).
9 * You may select, at your option, one of the above-listed licenses.
10 */
11
12 #include "zstd_compress_internal.h"
13 #include "zstd_double_fast.h"
14
15 #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
16
17 static
18 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_fillDoubleHashTableForCDict(ZSTD_MatchState_t * ms,void const * end,ZSTD_dictTableLoadMethod_e dtlm)19 void ZSTD_fillDoubleHashTableForCDict(ZSTD_MatchState_t* ms,
20 void const* end, ZSTD_dictTableLoadMethod_e dtlm)
21 {
22 const ZSTD_compressionParameters* const cParams = &ms->cParams;
23 U32* const hashLarge = ms->hashTable;
24 U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
25 U32 const mls = cParams->minMatch;
26 U32* const hashSmall = ms->chainTable;
27 U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
28 const BYTE* const base = ms->window.base;
29 const BYTE* ip = base + ms->nextToUpdate;
30 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
31 const U32 fastHashFillStep = 3;
32
33 /* Always insert every fastHashFillStep position into the hash tables.
34 * Insert the other positions into the large hash table if their entry
35 * is empty.
36 */
37 for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
38 U32 const curr = (U32)(ip - base);
39 U32 i;
40 for (i = 0; i < fastHashFillStep; ++i) {
41 size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
42 size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
43 if (i == 0) {
44 ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
45 }
46 if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
47 ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
48 }
49 /* Only load extra positions for ZSTD_dtlm_full */
50 if (dtlm == ZSTD_dtlm_fast)
51 break;
52 } }
53 }
54
55 static
56 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_fillDoubleHashTableForCCtx(ZSTD_MatchState_t * ms,void const * end,ZSTD_dictTableLoadMethod_e dtlm)57 void ZSTD_fillDoubleHashTableForCCtx(ZSTD_MatchState_t* ms,
58 void const* end, ZSTD_dictTableLoadMethod_e dtlm)
59 {
60 const ZSTD_compressionParameters* const cParams = &ms->cParams;
61 U32* const hashLarge = ms->hashTable;
62 U32 const hBitsL = cParams->hashLog;
63 U32 const mls = cParams->minMatch;
64 U32* const hashSmall = ms->chainTable;
65 U32 const hBitsS = cParams->chainLog;
66 const BYTE* const base = ms->window.base;
67 const BYTE* ip = base + ms->nextToUpdate;
68 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
69 const U32 fastHashFillStep = 3;
70
71 /* Always insert every fastHashFillStep position into the hash tables.
72 * Insert the other positions into the large hash table if their entry
73 * is empty.
74 */
75 for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
76 U32 const curr = (U32)(ip - base);
77 U32 i;
78 for (i = 0; i < fastHashFillStep; ++i) {
79 size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
80 size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
81 if (i == 0)
82 hashSmall[smHash] = curr + i;
83 if (i == 0 || hashLarge[lgHash] == 0)
84 hashLarge[lgHash] = curr + i;
85 /* Only load extra positions for ZSTD_dtlm_full */
86 if (dtlm == ZSTD_dtlm_fast)
87 break;
88 } }
89 }
90
ZSTD_fillDoubleHashTable(ZSTD_MatchState_t * ms,const void * const end,ZSTD_dictTableLoadMethod_e dtlm,ZSTD_tableFillPurpose_e tfp)91 void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms,
92 const void* const end,
93 ZSTD_dictTableLoadMethod_e dtlm,
94 ZSTD_tableFillPurpose_e tfp)
95 {
96 if (tfp == ZSTD_tfp_forCDict) {
97 ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
98 } else {
99 ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
100 }
101 }
102
103
104 FORCE_INLINE_TEMPLATE
105 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_compressBlock_doubleFast_noDict_generic(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize,U32 const mls)106 size_t ZSTD_compressBlock_doubleFast_noDict_generic(
107 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
108 void const* src, size_t srcSize, U32 const mls /* template */)
109 {
110 ZSTD_compressionParameters const* cParams = &ms->cParams;
111 U32* const hashLong = ms->hashTable;
112 const U32 hBitsL = cParams->hashLog;
113 U32* const hashSmall = ms->chainTable;
114 const U32 hBitsS = cParams->chainLog;
115 const BYTE* const base = ms->window.base;
116 const BYTE* const istart = (const BYTE*)src;
117 const BYTE* anchor = istart;
118 const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
119 /* presumes that, if there is a dictionary, it must be using Attach mode */
120 const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
121 const BYTE* const prefixLowest = base + prefixLowestIndex;
122 const BYTE* const iend = istart + srcSize;
123 const BYTE* const ilimit = iend - HASH_READ_SIZE;
124 U32 offset_1=rep[0], offset_2=rep[1];
125 U32 offsetSaved1 = 0, offsetSaved2 = 0;
126
127 size_t mLength;
128 U32 offset;
129 U32 curr;
130
131 /* how many positions to search before increasing step size */
132 const size_t kStepIncr = 1 << kSearchStrength;
133 /* the position at which to increment the step size if no match is found */
134 const BYTE* nextStep;
135 size_t step; /* the current step size */
136
137 size_t hl0; /* the long hash at ip */
138 size_t hl1; /* the long hash at ip1 */
139
140 U32 idxl0; /* the long match index for ip */
141 U32 idxl1; /* the long match index for ip1 */
142
143 const BYTE* matchl0; /* the long match for ip */
144 const BYTE* matchs0; /* the short match for ip */
145 const BYTE* matchl1; /* the long match for ip1 */
146 const BYTE* matchs0_safe; /* matchs0 or safe address */
147
148 const BYTE* ip = istart; /* the current position */
149 const BYTE* ip1; /* the next position */
150 /* Array of ~random data, should have low probability of matching data
151 * we load from here instead of from tables, if matchl0/matchl1 are
152 * invalid indices. Used to avoid unpredictable branches. */
153 const BYTE dummy[] = {0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,0xe2,0xb4};
154
155 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
156
157 /* init */
158 ip += ((ip - prefixLowest) == 0);
159 {
160 U32 const current = (U32)(ip - base);
161 U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
162 U32 const maxRep = current - windowLow;
163 if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
164 if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
165 }
166
167 /* Outer Loop: one iteration per match found and stored */
168 while (1) {
169 step = 1;
170 nextStep = ip + kStepIncr;
171 ip1 = ip + step;
172
173 if (ip1 > ilimit) {
174 goto _cleanup;
175 }
176
177 hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
178 idxl0 = hashLong[hl0];
179 matchl0 = base + idxl0;
180
181 /* Inner Loop: one iteration per search / position */
182 do {
183 const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
184 const U32 idxs0 = hashSmall[hs0];
185 curr = (U32)(ip-base);
186 matchs0 = base + idxs0;
187
188 hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */
189
190 /* check noDict repcode */
191 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
192 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
193 ip++;
194 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
195 goto _match_stored;
196 }
197
198 hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
199
200 /* idxl0 > prefixLowestIndex is a (somewhat) unpredictable branch.
201 * However expression below complies into conditional move. Since
202 * match is unlikely and we only *branch* on idxl0 > prefixLowestIndex
203 * if there is a match, all branches become predictable. */
204 { const BYTE* const matchl0_safe = ZSTD_selectAddr(idxl0, prefixLowestIndex, matchl0, &dummy[0]);
205
206 /* check prefix long match */
207 if (MEM_read64(matchl0_safe) == MEM_read64(ip) && matchl0_safe == matchl0) {
208 mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
209 offset = (U32)(ip-matchl0);
210 while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
211 goto _match_found;
212 } }
213
214 idxl1 = hashLong[hl1];
215 matchl1 = base + idxl1;
216
217 /* Same optimization as matchl0 above */
218 matchs0_safe = ZSTD_selectAddr(idxs0, prefixLowestIndex, matchs0, &dummy[0]);
219
220 /* check prefix short match */
221 if(MEM_read32(matchs0_safe) == MEM_read32(ip) && matchs0_safe == matchs0) {
222 goto _search_next_long;
223 }
224
225 if (ip1 >= nextStep) {
226 PREFETCH_L1(ip1 + 64);
227 PREFETCH_L1(ip1 + 128);
228 step++;
229 nextStep += kStepIncr;
230 }
231 ip = ip1;
232 ip1 += step;
233
234 hl0 = hl1;
235 idxl0 = idxl1;
236 matchl0 = matchl1;
237 #if defined(__aarch64__)
238 PREFETCH_L1(ip+256);
239 #endif
240 } while (ip1 <= ilimit);
241
242 _cleanup:
243 /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
244 * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
245 offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
246
247 /* save reps for next block */
248 rep[0] = offset_1 ? offset_1 : offsetSaved1;
249 rep[1] = offset_2 ? offset_2 : offsetSaved2;
250
251 /* Return the last literals size */
252 return (size_t)(iend - anchor);
253
254 _search_next_long:
255
256 /* short match found: let's check for a longer one */
257 mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
258 offset = (U32)(ip - matchs0);
259
260 /* check long match at +1 position */
261 if ((idxl1 > prefixLowestIndex) && (MEM_read64(matchl1) == MEM_read64(ip1))) {
262 size_t const l1len = ZSTD_count(ip1+8, matchl1+8, iend) + 8;
263 if (l1len > mLength) {
264 /* use the long match instead */
265 ip = ip1;
266 mLength = l1len;
267 offset = (U32)(ip-matchl1);
268 matchs0 = matchl1;
269 }
270 }
271
272 while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* complete backward */
273
274 /* fall-through */
275
276 _match_found: /* requires ip, offset, mLength */
277 offset_2 = offset_1;
278 offset_1 = offset;
279
280 if (step < 4) {
281 /* It is unsafe to write this value back to the hashtable when ip1 is
282 * greater than or equal to the new ip we will have after we're done
283 * processing this match. Rather than perform that test directly
284 * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
285 * more predictable test. The minmatch even if we take a short match is
286 * 4 bytes, so as long as step, the distance between ip and ip1
287 * (initially) is less than 4, we know ip1 < new ip. */
288 hashLong[hl1] = (U32)(ip1 - base);
289 }
290
291 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
292
293 _match_stored:
294 /* match found */
295 ip += mLength;
296 anchor = ip;
297
298 if (ip <= ilimit) {
299 /* Complementary insertion */
300 /* done after iLimit test, as candidates could be > iend-8 */
301 { U32 const indexToInsert = curr+2;
302 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
303 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
304 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
305 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
306 }
307
308 /* check immediate repcode */
309 while ( (ip <= ilimit)
310 && ( (offset_2>0)
311 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
312 /* store sequence */
313 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
314 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
315 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
316 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
317 ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
318 ip += rLength;
319 anchor = ip;
320 continue; /* faster when present ... (?) */
321 }
322 }
323 }
324 }
325
326
327 FORCE_INLINE_TEMPLATE
328 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_compressBlock_doubleFast_dictMatchState_generic(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize,U32 const mls)329 size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
330 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
331 void const* src, size_t srcSize,
332 U32 const mls /* template */)
333 {
334 ZSTD_compressionParameters const* cParams = &ms->cParams;
335 U32* const hashLong = ms->hashTable;
336 const U32 hBitsL = cParams->hashLog;
337 U32* const hashSmall = ms->chainTable;
338 const U32 hBitsS = cParams->chainLog;
339 const BYTE* const base = ms->window.base;
340 const BYTE* const istart = (const BYTE*)src;
341 const BYTE* ip = istart;
342 const BYTE* anchor = istart;
343 const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
344 /* presumes that, if there is a dictionary, it must be using Attach mode */
345 const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
346 const BYTE* const prefixLowest = base + prefixLowestIndex;
347 const BYTE* const iend = istart + srcSize;
348 const BYTE* const ilimit = iend - HASH_READ_SIZE;
349 U32 offset_1=rep[0], offset_2=rep[1];
350
351 const ZSTD_MatchState_t* const dms = ms->dictMatchState;
352 const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
353 const U32* const dictHashLong = dms->hashTable;
354 const U32* const dictHashSmall = dms->chainTable;
355 const U32 dictStartIndex = dms->window.dictLimit;
356 const BYTE* const dictBase = dms->window.base;
357 const BYTE* const dictStart = dictBase + dictStartIndex;
358 const BYTE* const dictEnd = dms->window.nextSrc;
359 const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
360 const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
361 const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
362 const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
363
364 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
365
366 /* if a dictionary is attached, it must be within window range */
367 assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
368
369 if (ms->prefetchCDictTables) {
370 size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
371 size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
372 PREFETCH_AREA(dictHashLong, hashTableBytes);
373 PREFETCH_AREA(dictHashSmall, chainTableBytes);
374 }
375
376 /* init */
377 ip += (dictAndPrefixLength == 0);
378
379 /* dictMatchState repCode checks don't currently handle repCode == 0
380 * disabling. */
381 assert(offset_1 <= dictAndPrefixLength);
382 assert(offset_2 <= dictAndPrefixLength);
383
384 /* Main Search Loop */
385 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
386 size_t mLength;
387 U32 offset;
388 size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
389 size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
390 size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
391 size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
392 U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
393 U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
394 int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
395 int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
396 U32 const curr = (U32)(ip-base);
397 U32 const matchIndexL = hashLong[h2];
398 U32 matchIndexS = hashSmall[h];
399 const BYTE* matchLong = base + matchIndexL;
400 const BYTE* match = base + matchIndexS;
401 const U32 repIndex = curr + 1 - offset_1;
402 const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
403 dictBase + (repIndex - dictIndexDelta) :
404 base + repIndex;
405 hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
406
407 /* check repcode */
408 if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
409 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
410 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
411 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
412 ip++;
413 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
414 goto _match_stored;
415 }
416
417 if ((matchIndexL >= prefixLowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
418 /* check prefix long match */
419 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
420 offset = (U32)(ip-matchLong);
421 while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
422 goto _match_found;
423 } else if (dictTagsMatchL) {
424 /* check dictMatchState long match */
425 U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
426 const BYTE* dictMatchL = dictBase + dictMatchIndexL;
427 assert(dictMatchL < dictEnd);
428
429 if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
430 mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
431 offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
432 while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
433 goto _match_found;
434 } }
435
436 if (matchIndexS > prefixLowestIndex) {
437 /* short match candidate */
438 if (MEM_read32(match) == MEM_read32(ip)) {
439 goto _search_next_long;
440 }
441 } else if (dictTagsMatchS) {
442 /* check dictMatchState short match */
443 U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
444 match = dictBase + dictMatchIndexS;
445 matchIndexS = dictMatchIndexS + dictIndexDelta;
446
447 if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
448 goto _search_next_long;
449 } }
450
451 ip += ((ip-anchor) >> kSearchStrength) + 1;
452 #if defined(__aarch64__)
453 PREFETCH_L1(ip+256);
454 #endif
455 continue;
456
457 _search_next_long:
458 { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
459 size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
460 U32 const matchIndexL3 = hashLong[hl3];
461 U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
462 int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
463 const BYTE* matchL3 = base + matchIndexL3;
464 hashLong[hl3] = curr + 1;
465
466 /* check prefix long +1 match */
467 if ((matchIndexL3 >= prefixLowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1))) {
468 mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
469 ip++;
470 offset = (U32)(ip-matchL3);
471 while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
472 goto _match_found;
473 } else if (dictTagsMatchL3) {
474 /* check dict long +1 match */
475 U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
476 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
477 assert(dictMatchL3 < dictEnd);
478 if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
479 mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
480 ip++;
481 offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
482 while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
483 goto _match_found;
484 } } }
485
486 /* if no long +1 match, explore the short match we found */
487 if (matchIndexS < prefixLowestIndex) {
488 mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
489 offset = (U32)(curr - matchIndexS);
490 while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
491 } else {
492 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
493 offset = (U32)(ip - match);
494 while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
495 }
496
497 _match_found:
498 offset_2 = offset_1;
499 offset_1 = offset;
500
501 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
502
503 _match_stored:
504 /* match found */
505 ip += mLength;
506 anchor = ip;
507
508 if (ip <= ilimit) {
509 /* Complementary insertion */
510 /* done after iLimit test, as candidates could be > iend-8 */
511 { U32 const indexToInsert = curr+2;
512 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
513 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
514 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
515 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
516 }
517
518 /* check immediate repcode */
519 while (ip <= ilimit) {
520 U32 const current2 = (U32)(ip-base);
521 U32 const repIndex2 = current2 - offset_2;
522 const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
523 dictBase + repIndex2 - dictIndexDelta :
524 base + repIndex2;
525 if ( (ZSTD_index_overlap_check(prefixLowestIndex, repIndex2))
526 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
527 const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
528 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
529 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
530 ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
531 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
532 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
533 ip += repLength2;
534 anchor = ip;
535 continue;
536 }
537 break;
538 }
539 }
540 } /* while (ip < ilimit) */
541
542 /* save reps for next block */
543 rep[0] = offset_1;
544 rep[1] = offset_2;
545
546 /* Return the last literals size */
547 return (size_t)(iend - anchor);
548 }
549
550 #define ZSTD_GEN_DFAST_FN(dictMode, mls) \
551 static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
552 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
553 void const* src, size_t srcSize) \
554 { \
555 return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
556 }
557
558 ZSTD_GEN_DFAST_FN(noDict, 4)
559 ZSTD_GEN_DFAST_FN(noDict, 5)
560 ZSTD_GEN_DFAST_FN(noDict, 6)
561 ZSTD_GEN_DFAST_FN(noDict, 7)
562
563 ZSTD_GEN_DFAST_FN(dictMatchState, 4)
564 ZSTD_GEN_DFAST_FN(dictMatchState, 5)
565 ZSTD_GEN_DFAST_FN(dictMatchState, 6)
566 ZSTD_GEN_DFAST_FN(dictMatchState, 7)
567
568
ZSTD_compressBlock_doubleFast(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)569 size_t ZSTD_compressBlock_doubleFast(
570 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
571 void const* src, size_t srcSize)
572 {
573 const U32 mls = ms->cParams.minMatch;
574 switch(mls)
575 {
576 default: /* includes case 3 */
577 case 4 :
578 return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
579 case 5 :
580 return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
581 case 6 :
582 return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
583 case 7 :
584 return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
585 }
586 }
587
588
ZSTD_compressBlock_doubleFast_dictMatchState(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)589 size_t ZSTD_compressBlock_doubleFast_dictMatchState(
590 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
591 void const* src, size_t srcSize)
592 {
593 const U32 mls = ms->cParams.minMatch;
594 switch(mls)
595 {
596 default: /* includes case 3 */
597 case 4 :
598 return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
599 case 5 :
600 return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
601 case 6 :
602 return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
603 case 7 :
604 return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
605 }
606 }
607
608
609 static
610 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize,U32 const mls)611 size_t ZSTD_compressBlock_doubleFast_extDict_generic(
612 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
613 void const* src, size_t srcSize,
614 U32 const mls /* template */)
615 {
616 ZSTD_compressionParameters const* cParams = &ms->cParams;
617 U32* const hashLong = ms->hashTable;
618 U32 const hBitsL = cParams->hashLog;
619 U32* const hashSmall = ms->chainTable;
620 U32 const hBitsS = cParams->chainLog;
621 const BYTE* const istart = (const BYTE*)src;
622 const BYTE* ip = istart;
623 const BYTE* anchor = istart;
624 const BYTE* const iend = istart + srcSize;
625 const BYTE* const ilimit = iend - 8;
626 const BYTE* const base = ms->window.base;
627 const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
628 const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
629 const U32 dictStartIndex = lowLimit;
630 const U32 dictLimit = ms->window.dictLimit;
631 const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
632 const BYTE* const prefixStart = base + prefixStartIndex;
633 const BYTE* const dictBase = ms->window.dictBase;
634 const BYTE* const dictStart = dictBase + dictStartIndex;
635 const BYTE* const dictEnd = dictBase + prefixStartIndex;
636 U32 offset_1=rep[0], offset_2=rep[1];
637
638 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
639
640 /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
641 if (prefixStartIndex == dictStartIndex)
642 return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
643
644 /* Search Loop */
645 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
646 const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
647 const U32 matchIndex = hashSmall[hSmall];
648 const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
649 const BYTE* match = matchBase + matchIndex;
650
651 const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
652 const U32 matchLongIndex = hashLong[hLong];
653 const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
654 const BYTE* matchLong = matchLongBase + matchLongIndex;
655
656 const U32 curr = (U32)(ip-base);
657 const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
658 const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
659 const BYTE* const repMatch = repBase + repIndex;
660 size_t mLength;
661 hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
662
663 if (((ZSTD_index_overlap_check(prefixStartIndex, repIndex))
664 & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
665 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
666 const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
667 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
668 ip++;
669 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
670 } else {
671 if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
672 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
673 const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
674 U32 offset;
675 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
676 offset = curr - matchLongIndex;
677 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
678 offset_2 = offset_1;
679 offset_1 = offset;
680 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
681
682 } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
683 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
684 U32 const matchIndex3 = hashLong[h3];
685 const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
686 const BYTE* match3 = match3Base + matchIndex3;
687 U32 offset;
688 hashLong[h3] = curr + 1;
689 if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
690 const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
691 const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
692 mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
693 ip++;
694 offset = curr+1 - matchIndex3;
695 while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
696 } else {
697 const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
698 const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
699 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
700 offset = curr - matchIndex;
701 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
702 }
703 offset_2 = offset_1;
704 offset_1 = offset;
705 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
706
707 } else {
708 ip += ((ip-anchor) >> kSearchStrength) + 1;
709 continue;
710 } }
711
712 /* move to next sequence start */
713 ip += mLength;
714 anchor = ip;
715
716 if (ip <= ilimit) {
717 /* Complementary insertion */
718 /* done after iLimit test, as candidates could be > iend-8 */
719 { U32 const indexToInsert = curr+2;
720 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
721 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
722 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
723 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
724 }
725
726 /* check immediate repcode */
727 while (ip <= ilimit) {
728 U32 const current2 = (U32)(ip-base);
729 U32 const repIndex2 = current2 - offset_2;
730 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
731 if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2))
732 & (offset_2 <= current2 - dictStartIndex))
733 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
734 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
735 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
736 U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
737 ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
738 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
739 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
740 ip += repLength2;
741 anchor = ip;
742 continue;
743 }
744 break;
745 } } }
746
747 /* save reps for next block */
748 rep[0] = offset_1;
749 rep[1] = offset_2;
750
751 /* Return the last literals size */
752 return (size_t)(iend - anchor);
753 }
754
755 ZSTD_GEN_DFAST_FN(extDict, 4)
756 ZSTD_GEN_DFAST_FN(extDict, 5)
757 ZSTD_GEN_DFAST_FN(extDict, 6)
758 ZSTD_GEN_DFAST_FN(extDict, 7)
759
ZSTD_compressBlock_doubleFast_extDict(ZSTD_MatchState_t * ms,SeqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)760 size_t ZSTD_compressBlock_doubleFast_extDict(
761 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
762 void const* src, size_t srcSize)
763 {
764 U32 const mls = ms->cParams.minMatch;
765 switch(mls)
766 {
767 default: /* includes case 3 */
768 case 4 :
769 return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
770 case 5 :
771 return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
772 case 6 :
773 return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
774 case 7 :
775 return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
776 }
777 }
778
779 #endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
780