10c16b537SWarner Losh /*
2*5ff13fbcSAllan Jude * Copyright (c) Yann Collet, Facebook, Inc.
30c16b537SWarner Losh * All rights reserved.
40c16b537SWarner Losh *
50c16b537SWarner Losh * This source code is licensed under both the BSD-style license (found in the
60c16b537SWarner Losh * LICENSE file in the root directory of this source tree) and the GPLv2 (found
70c16b537SWarner Losh * in the COPYING file in the root directory of this source tree).
80c16b537SWarner Losh * You may select, at your option, one of the above-listed licenses.
90c16b537SWarner Losh */
100c16b537SWarner Losh
119cbefe25SConrad Meyer #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
120c16b537SWarner Losh #include "zstd_fast.h"
130c16b537SWarner Losh
140c16b537SWarner Losh
ZSTD_fillHashTable(ZSTD_matchState_t * ms,const void * const end,ZSTD_dictTableLoadMethod_e dtlm)1519fcbaf1SConrad Meyer void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
164d3f1eafSConrad Meyer const void* const end,
174d3f1eafSConrad Meyer ZSTD_dictTableLoadMethod_e dtlm)
180c16b537SWarner Losh {
190f743729SConrad Meyer const ZSTD_compressionParameters* const cParams = &ms->cParams;
2019fcbaf1SConrad Meyer U32* const hashTable = ms->hashTable;
2119fcbaf1SConrad Meyer U32 const hBits = cParams->hashLog;
22a0483764SConrad Meyer U32 const mls = cParams->minMatch;
2319fcbaf1SConrad Meyer const BYTE* const base = ms->window.base;
2419fcbaf1SConrad Meyer const BYTE* ip = base + ms->nextToUpdate;
250c16b537SWarner Losh const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
2619fcbaf1SConrad Meyer const U32 fastHashFillStep = 3;
270c16b537SWarner Losh
2819fcbaf1SConrad Meyer /* Always insert every fastHashFillStep position into the hash table.
2919fcbaf1SConrad Meyer * Insert the other positions if their hash entry is empty.
3019fcbaf1SConrad Meyer */
31a0483764SConrad Meyer for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
32f7cd7fe5SConrad Meyer U32 const curr = (U32)(ip - base);
33a0483764SConrad Meyer size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
34f7cd7fe5SConrad Meyer hashTable[hash0] = curr;
35a0483764SConrad Meyer if (dtlm == ZSTD_dtlm_fast) continue;
360f743729SConrad Meyer /* Only load extra positions for ZSTD_dtlm_full */
37a0483764SConrad Meyer { U32 p;
38a0483764SConrad Meyer for (p = 1; p < fastHashFillStep; ++p) {
39a0483764SConrad Meyer size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
40a0483764SConrad Meyer if (hashTable[hash] == 0) { /* not yet filled */
41f7cd7fe5SConrad Meyer hashTable[hash] = curr + p;
42a0483764SConrad Meyer } } } }
4319fcbaf1SConrad Meyer }
440c16b537SWarner Losh
454d3f1eafSConrad Meyer
46*5ff13fbcSAllan Jude /**
47*5ff13fbcSAllan Jude * If you squint hard enough (and ignore repcodes), the search operation at any
48*5ff13fbcSAllan Jude * given position is broken into 4 stages:
49*5ff13fbcSAllan Jude *
50*5ff13fbcSAllan Jude * 1. Hash (map position to hash value via input read)
51*5ff13fbcSAllan Jude * 2. Lookup (map hash val to index via hashtable read)
52*5ff13fbcSAllan Jude * 3. Load (map index to value at that position via input read)
53*5ff13fbcSAllan Jude * 4. Compare
54*5ff13fbcSAllan Jude *
55*5ff13fbcSAllan Jude * Each of these steps involves a memory read at an address which is computed
56*5ff13fbcSAllan Jude * from the previous step. This means these steps must be sequenced and their
57*5ff13fbcSAllan Jude * latencies are cumulative.
58*5ff13fbcSAllan Jude *
59*5ff13fbcSAllan Jude * Rather than do 1->2->3->4 sequentially for a single position before moving
60*5ff13fbcSAllan Jude * onto the next, this implementation interleaves these operations across the
61*5ff13fbcSAllan Jude * next few positions:
62*5ff13fbcSAllan Jude *
63*5ff13fbcSAllan Jude * R = Repcode Read & Compare
64*5ff13fbcSAllan Jude * H = Hash
65*5ff13fbcSAllan Jude * T = Table Lookup
66*5ff13fbcSAllan Jude * M = Match Read & Compare
67*5ff13fbcSAllan Jude *
68*5ff13fbcSAllan Jude * Pos | Time -->
69*5ff13fbcSAllan Jude * ----+-------------------
70*5ff13fbcSAllan Jude * N | ... M
71*5ff13fbcSAllan Jude * N+1 | ... TM
72*5ff13fbcSAllan Jude * N+2 | R H T M
73*5ff13fbcSAllan Jude * N+3 | H TM
74*5ff13fbcSAllan Jude * N+4 | R H T M
75*5ff13fbcSAllan Jude * N+5 | H ...
76*5ff13fbcSAllan Jude * N+6 | R ...
77*5ff13fbcSAllan Jude *
78*5ff13fbcSAllan Jude * This is very much analogous to the pipelining of execution in a CPU. And just
79*5ff13fbcSAllan Jude * like a CPU, we have to dump the pipeline when we find a match (i.e., take a
80*5ff13fbcSAllan Jude * branch).
81*5ff13fbcSAllan Jude *
82*5ff13fbcSAllan Jude * When this happens, we throw away our current state, and do the following prep
83*5ff13fbcSAllan Jude * to re-enter the loop:
84*5ff13fbcSAllan Jude *
85*5ff13fbcSAllan Jude * Pos | Time -->
86*5ff13fbcSAllan Jude * ----+-------------------
87*5ff13fbcSAllan Jude * N | H T
88*5ff13fbcSAllan Jude * N+1 | H
89*5ff13fbcSAllan Jude *
90*5ff13fbcSAllan Jude * This is also the work we do at the beginning to enter the loop initially.
91*5ff13fbcSAllan Jude */
929cbefe25SConrad Meyer FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_fast_noDict_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize,U32 const mls,U32 const hasStep)93*5ff13fbcSAllan Jude ZSTD_compressBlock_fast_noDict_generic(
9419fcbaf1SConrad Meyer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
9519fcbaf1SConrad Meyer void const* src, size_t srcSize,
96*5ff13fbcSAllan Jude U32 const mls, U32 const hasStep)
972b9c00cbSConrad Meyer {
982b9c00cbSConrad Meyer const ZSTD_compressionParameters* const cParams = &ms->cParams;
992b9c00cbSConrad Meyer U32* const hashTable = ms->hashTable;
1002b9c00cbSConrad Meyer U32 const hlog = cParams->hashLog;
1012b9c00cbSConrad Meyer /* support stepSize of 0 */
102*5ff13fbcSAllan Jude size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
1032b9c00cbSConrad Meyer const BYTE* const base = ms->window.base;
1042b9c00cbSConrad Meyer const BYTE* const istart = (const BYTE*)src;
1054d3f1eafSConrad Meyer const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
10637f1f268SConrad Meyer const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
1072b9c00cbSConrad Meyer const BYTE* const prefixStart = base + prefixStartIndex;
1082b9c00cbSConrad Meyer const BYTE* const iend = istart + srcSize;
1092b9c00cbSConrad Meyer const BYTE* const ilimit = iend - HASH_READ_SIZE;
110*5ff13fbcSAllan Jude
111*5ff13fbcSAllan Jude const BYTE* anchor = istart;
112*5ff13fbcSAllan Jude const BYTE* ip0 = istart;
113*5ff13fbcSAllan Jude const BYTE* ip1;
114*5ff13fbcSAllan Jude const BYTE* ip2;
115*5ff13fbcSAllan Jude const BYTE* ip3;
116*5ff13fbcSAllan Jude U32 current0;
117*5ff13fbcSAllan Jude
118*5ff13fbcSAllan Jude U32 rep_offset1 = rep[0];
119*5ff13fbcSAllan Jude U32 rep_offset2 = rep[1];
1202b9c00cbSConrad Meyer U32 offsetSaved = 0;
1212b9c00cbSConrad Meyer
122*5ff13fbcSAllan Jude size_t hash0; /* hash for ip0 */
123*5ff13fbcSAllan Jude size_t hash1; /* hash for ip1 */
124*5ff13fbcSAllan Jude U32 idx; /* match idx for ip0 */
125*5ff13fbcSAllan Jude U32 mval; /* src value at match idx */
126*5ff13fbcSAllan Jude
127*5ff13fbcSAllan Jude U32 offcode;
128*5ff13fbcSAllan Jude const BYTE* match0;
129*5ff13fbcSAllan Jude size_t mLength;
130*5ff13fbcSAllan Jude
131*5ff13fbcSAllan Jude /* ip0 and ip1 are always adjacent. The targetLength skipping and
132*5ff13fbcSAllan Jude * uncompressibility acceleration is applied to every other position,
133*5ff13fbcSAllan Jude * matching the behavior of #1562. step therefore represents the gap
134*5ff13fbcSAllan Jude * between pairs of positions, from ip0 to ip2 or ip1 to ip3. */
135*5ff13fbcSAllan Jude size_t step;
136*5ff13fbcSAllan Jude const BYTE* nextStep;
137*5ff13fbcSAllan Jude const size_t kStepIncr = (1 << (kSearchStrength - 1));
138*5ff13fbcSAllan Jude
1399cbefe25SConrad Meyer DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
1402b9c00cbSConrad Meyer ip0 += (ip0 == prefixStart);
141f7cd7fe5SConrad Meyer { U32 const curr = (U32)(ip0 - base);
142f7cd7fe5SConrad Meyer U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
143f7cd7fe5SConrad Meyer U32 const maxRep = curr - windowLow;
144*5ff13fbcSAllan Jude if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
145*5ff13fbcSAllan Jude if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
1462b9c00cbSConrad Meyer }
1472b9c00cbSConrad Meyer
148*5ff13fbcSAllan Jude /* start each op */
149*5ff13fbcSAllan Jude _start: /* Requires: ip0 */
15037f1f268SConrad Meyer
151*5ff13fbcSAllan Jude step = stepSize;
152*5ff13fbcSAllan Jude nextStep = ip0 + kStepIncr;
15337f1f268SConrad Meyer
154*5ff13fbcSAllan Jude /* calculate positions, ip0 - anchor == 0, so we skip step calc */
155*5ff13fbcSAllan Jude ip1 = ip0 + 1;
156*5ff13fbcSAllan Jude ip2 = ip0 + step;
157*5ff13fbcSAllan Jude ip3 = ip2 + 1;
1582b9c00cbSConrad Meyer
159*5ff13fbcSAllan Jude if (ip3 >= ilimit) {
160*5ff13fbcSAllan Jude goto _cleanup;
161*5ff13fbcSAllan Jude }
1622b9c00cbSConrad Meyer
163*5ff13fbcSAllan Jude hash0 = ZSTD_hashPtr(ip0, hlog, mls);
164*5ff13fbcSAllan Jude hash1 = ZSTD_hashPtr(ip1, hlog, mls);
165*5ff13fbcSAllan Jude
166*5ff13fbcSAllan Jude idx = hashTable[hash0];
167*5ff13fbcSAllan Jude
168*5ff13fbcSAllan Jude do {
169*5ff13fbcSAllan Jude /* load repcode match for ip[2]*/
170*5ff13fbcSAllan Jude const U32 rval = MEM_read32(ip2 - rep_offset1);
171*5ff13fbcSAllan Jude
172*5ff13fbcSAllan Jude /* write back hash table entry */
173*5ff13fbcSAllan Jude current0 = (U32)(ip0 - base);
174*5ff13fbcSAllan Jude hashTable[hash0] = current0;
175*5ff13fbcSAllan Jude
176*5ff13fbcSAllan Jude /* check repcode at ip[2] */
177*5ff13fbcSAllan Jude if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) {
178*5ff13fbcSAllan Jude ip0 = ip2;
179*5ff13fbcSAllan Jude match0 = ip0 - rep_offset1;
180*5ff13fbcSAllan Jude mLength = ip0[-1] == match0[-1];
181*5ff13fbcSAllan Jude ip0 -= mLength;
182*5ff13fbcSAllan Jude match0 -= mLength;
183*5ff13fbcSAllan Jude offcode = STORE_REPCODE_1;
18437f1f268SConrad Meyer mLength += 4;
1852b9c00cbSConrad Meyer goto _match;
1862b9c00cbSConrad Meyer }
187*5ff13fbcSAllan Jude
188*5ff13fbcSAllan Jude /* load match for ip[0] */
189*5ff13fbcSAllan Jude if (idx >= prefixStartIndex) {
190*5ff13fbcSAllan Jude mval = MEM_read32(base + idx);
191*5ff13fbcSAllan Jude } else {
192*5ff13fbcSAllan Jude mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
193*5ff13fbcSAllan Jude }
194*5ff13fbcSAllan Jude
195*5ff13fbcSAllan Jude /* check match at ip[0] */
196*5ff13fbcSAllan Jude if (MEM_read32(ip0) == mval) {
197*5ff13fbcSAllan Jude /* found a match! */
1982b9c00cbSConrad Meyer goto _offset;
1992b9c00cbSConrad Meyer }
200*5ff13fbcSAllan Jude
201*5ff13fbcSAllan Jude /* lookup ip[1] */
202*5ff13fbcSAllan Jude idx = hashTable[hash1];
203*5ff13fbcSAllan Jude
204*5ff13fbcSAllan Jude /* hash ip[2] */
205*5ff13fbcSAllan Jude hash0 = hash1;
206*5ff13fbcSAllan Jude hash1 = ZSTD_hashPtr(ip2, hlog, mls);
207*5ff13fbcSAllan Jude
208*5ff13fbcSAllan Jude /* advance to next positions */
2092b9c00cbSConrad Meyer ip0 = ip1;
210*5ff13fbcSAllan Jude ip1 = ip2;
211*5ff13fbcSAllan Jude ip2 = ip3;
212*5ff13fbcSAllan Jude
213*5ff13fbcSAllan Jude /* write back hash table entry */
214*5ff13fbcSAllan Jude current0 = (U32)(ip0 - base);
215*5ff13fbcSAllan Jude hashTable[hash0] = current0;
216*5ff13fbcSAllan Jude
217*5ff13fbcSAllan Jude /* load match for ip[0] */
218*5ff13fbcSAllan Jude if (idx >= prefixStartIndex) {
219*5ff13fbcSAllan Jude mval = MEM_read32(base + idx);
220*5ff13fbcSAllan Jude } else {
221*5ff13fbcSAllan Jude mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
222*5ff13fbcSAllan Jude }
223*5ff13fbcSAllan Jude
224*5ff13fbcSAllan Jude /* check match at ip[0] */
225*5ff13fbcSAllan Jude if (MEM_read32(ip0) == mval) {
226*5ff13fbcSAllan Jude /* found a match! */
2272b9c00cbSConrad Meyer goto _offset;
2282b9c00cbSConrad Meyer }
229*5ff13fbcSAllan Jude
230*5ff13fbcSAllan Jude /* lookup ip[1] */
231*5ff13fbcSAllan Jude idx = hashTable[hash1];
232*5ff13fbcSAllan Jude
233*5ff13fbcSAllan Jude /* hash ip[2] */
234*5ff13fbcSAllan Jude hash0 = hash1;
235*5ff13fbcSAllan Jude hash1 = ZSTD_hashPtr(ip2, hlog, mls);
236*5ff13fbcSAllan Jude
237*5ff13fbcSAllan Jude /* advance to next positions */
238*5ff13fbcSAllan Jude ip0 = ip1;
239*5ff13fbcSAllan Jude ip1 = ip2;
240*5ff13fbcSAllan Jude ip2 = ip0 + step;
241*5ff13fbcSAllan Jude ip3 = ip1 + step;
242*5ff13fbcSAllan Jude
243*5ff13fbcSAllan Jude /* calculate step */
244*5ff13fbcSAllan Jude if (ip2 >= nextStep) {
245*5ff13fbcSAllan Jude step++;
246*5ff13fbcSAllan Jude PREFETCH_L1(ip1 + 64);
247*5ff13fbcSAllan Jude PREFETCH_L1(ip1 + 128);
248*5ff13fbcSAllan Jude nextStep += kStepIncr;
2492b9c00cbSConrad Meyer }
250*5ff13fbcSAllan Jude } while (ip3 < ilimit);
251*5ff13fbcSAllan Jude
252*5ff13fbcSAllan Jude _cleanup:
253*5ff13fbcSAllan Jude /* Note that there are probably still a couple positions we could search.
254*5ff13fbcSAllan Jude * However, it seems to be a meaningful performance hit to try to search
255*5ff13fbcSAllan Jude * them. So let's not. */
256*5ff13fbcSAllan Jude
257*5ff13fbcSAllan Jude /* save reps for next block */
258*5ff13fbcSAllan Jude rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
259*5ff13fbcSAllan Jude rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
260*5ff13fbcSAllan Jude
261*5ff13fbcSAllan Jude /* Return the last literals size */
262*5ff13fbcSAllan Jude return (size_t)(iend - anchor);
263*5ff13fbcSAllan Jude
264*5ff13fbcSAllan Jude _offset: /* Requires: ip0, idx */
265*5ff13fbcSAllan Jude
266*5ff13fbcSAllan Jude /* Compute the offset code. */
267*5ff13fbcSAllan Jude match0 = base + idx;
268*5ff13fbcSAllan Jude rep_offset2 = rep_offset1;
269*5ff13fbcSAllan Jude rep_offset1 = (U32)(ip0-match0);
270*5ff13fbcSAllan Jude offcode = STORE_OFFSET(rep_offset1);
27137f1f268SConrad Meyer mLength = 4;
272*5ff13fbcSAllan Jude
273*5ff13fbcSAllan Jude /* Count the backwards match length. */
274*5ff13fbcSAllan Jude while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) {
275*5ff13fbcSAllan Jude ip0--;
276*5ff13fbcSAllan Jude match0--;
277*5ff13fbcSAllan Jude mLength++;
278*5ff13fbcSAllan Jude }
2792b9c00cbSConrad Meyer
2802b9c00cbSConrad Meyer _match: /* Requires: ip0, match0, offcode */
281*5ff13fbcSAllan Jude
282*5ff13fbcSAllan Jude /* Count the forward length. */
28337f1f268SConrad Meyer mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend);
284*5ff13fbcSAllan Jude
285*5ff13fbcSAllan Jude ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
286*5ff13fbcSAllan Jude
2872b9c00cbSConrad Meyer ip0 += mLength;
2882b9c00cbSConrad Meyer anchor = ip0;
2892b9c00cbSConrad Meyer
290*5ff13fbcSAllan Jude /* write next hash table entry */
291*5ff13fbcSAllan Jude if (ip1 < ip0) {
292*5ff13fbcSAllan Jude hashTable[hash1] = (U32)(ip1 - base);
293*5ff13fbcSAllan Jude }
294*5ff13fbcSAllan Jude
295*5ff13fbcSAllan Jude /* Fill table and check for immediate repcode. */
2962b9c00cbSConrad Meyer if (ip0 <= ilimit) {
2972b9c00cbSConrad Meyer /* Fill Table */
2982b9c00cbSConrad Meyer assert(base+current0+2 > istart); /* check base overflow */
2992b9c00cbSConrad Meyer hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
3002b9c00cbSConrad Meyer hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
3012b9c00cbSConrad Meyer
302*5ff13fbcSAllan Jude if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */
303*5ff13fbcSAllan Jude while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) {
3042b9c00cbSConrad Meyer /* store sequence */
305*5ff13fbcSAllan Jude size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4;
306*5ff13fbcSAllan Jude { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
3072b9c00cbSConrad Meyer hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
3082b9c00cbSConrad Meyer ip0 += rLength;
309*5ff13fbcSAllan Jude ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength);
3102b9c00cbSConrad Meyer anchor = ip0;
3112b9c00cbSConrad Meyer continue; /* faster when present (confirmed on gcc-8) ... (?) */
31237f1f268SConrad Meyer } } }
313*5ff13fbcSAllan Jude
314*5ff13fbcSAllan Jude goto _start;
3152b9c00cbSConrad Meyer }
3162b9c00cbSConrad Meyer
317*5ff13fbcSAllan Jude #define ZSTD_GEN_FAST_FN(dictMode, mls, step) \
318*5ff13fbcSAllan Jude static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \
319*5ff13fbcSAllan Jude ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
320*5ff13fbcSAllan Jude void const* src, size_t srcSize) \
321*5ff13fbcSAllan Jude { \
322*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
3232b9c00cbSConrad Meyer }
3242b9c00cbSConrad Meyer
325*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 4, 1)
326*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 5, 1)
327*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 6, 1)
328*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 7, 1)
329*5ff13fbcSAllan Jude
330*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 4, 0)
331*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 5, 0)
332*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 6, 0)
333*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 7, 0)
3342b9c00cbSConrad Meyer
ZSTD_compressBlock_fast(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)3352b9c00cbSConrad Meyer size_t ZSTD_compressBlock_fast(
3362b9c00cbSConrad Meyer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
3372b9c00cbSConrad Meyer void const* src, size_t srcSize)
3382b9c00cbSConrad Meyer {
3399cbefe25SConrad Meyer U32 const mls = ms->cParams.minMatch;
3402b9c00cbSConrad Meyer assert(ms->dictMatchState == NULL);
341*5ff13fbcSAllan Jude if (ms->cParams.targetLength > 1) {
3422b9c00cbSConrad Meyer switch(mls)
3432b9c00cbSConrad Meyer {
3442b9c00cbSConrad Meyer default: /* includes case 3 */
3452b9c00cbSConrad Meyer case 4 :
346*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize);
3472b9c00cbSConrad Meyer case 5 :
348*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize);
3492b9c00cbSConrad Meyer case 6 :
350*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize);
3512b9c00cbSConrad Meyer case 7 :
352*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
353*5ff13fbcSAllan Jude }
354*5ff13fbcSAllan Jude } else {
355*5ff13fbcSAllan Jude switch(mls)
356*5ff13fbcSAllan Jude {
357*5ff13fbcSAllan Jude default: /* includes case 3 */
358*5ff13fbcSAllan Jude case 4 :
359*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize);
360*5ff13fbcSAllan Jude case 5 :
361*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize);
362*5ff13fbcSAllan Jude case 6 :
363*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize);
364*5ff13fbcSAllan Jude case 7 :
365*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
366*5ff13fbcSAllan Jude }
367*5ff13fbcSAllan Jude
3682b9c00cbSConrad Meyer }
3692b9c00cbSConrad Meyer }
3702b9c00cbSConrad Meyer
3712b9c00cbSConrad Meyer FORCE_INLINE_TEMPLATE
ZSTD_compressBlock_fast_dictMatchState_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize,U32 const mls,U32 const hasStep)3722b9c00cbSConrad Meyer size_t ZSTD_compressBlock_fast_dictMatchState_generic(
3732b9c00cbSConrad Meyer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
374*5ff13fbcSAllan Jude void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
3750c16b537SWarner Losh {
3760f743729SConrad Meyer const ZSTD_compressionParameters* const cParams = &ms->cParams;
37719fcbaf1SConrad Meyer U32* const hashTable = ms->hashTable;
3780f743729SConrad Meyer U32 const hlog = cParams->hashLog;
3790f743729SConrad Meyer /* support stepSize of 0 */
3800f743729SConrad Meyer U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
38119fcbaf1SConrad Meyer const BYTE* const base = ms->window.base;
3820c16b537SWarner Losh const BYTE* const istart = (const BYTE*)src;
3830c16b537SWarner Losh const BYTE* ip = istart;
3840c16b537SWarner Losh const BYTE* anchor = istart;
3850f743729SConrad Meyer const U32 prefixStartIndex = ms->window.dictLimit;
3860f743729SConrad Meyer const BYTE* const prefixStart = base + prefixStartIndex;
3870c16b537SWarner Losh const BYTE* const iend = istart + srcSize;
3880c16b537SWarner Losh const BYTE* const ilimit = iend - HASH_READ_SIZE;
38919fcbaf1SConrad Meyer U32 offset_1=rep[0], offset_2=rep[1];
3900c16b537SWarner Losh U32 offsetSaved = 0;
3910c16b537SWarner Losh
3920f743729SConrad Meyer const ZSTD_matchState_t* const dms = ms->dictMatchState;
3932b9c00cbSConrad Meyer const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
3942b9c00cbSConrad Meyer const U32* const dictHashTable = dms->hashTable;
3952b9c00cbSConrad Meyer const U32 dictStartIndex = dms->window.dictLimit;
3962b9c00cbSConrad Meyer const BYTE* const dictBase = dms->window.base;
3972b9c00cbSConrad Meyer const BYTE* const dictStart = dictBase + dictStartIndex;
3982b9c00cbSConrad Meyer const BYTE* const dictEnd = dms->window.nextSrc;
3992b9c00cbSConrad Meyer const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase);
4000f743729SConrad Meyer const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
4012b9c00cbSConrad Meyer const U32 dictHLog = dictCParams->hashLog;
4020f743729SConrad Meyer
4034d3f1eafSConrad Meyer /* if a dictionary is still attached, it necessarily means that
4044d3f1eafSConrad Meyer * it is within window size. So we just check it. */
4054d3f1eafSConrad Meyer const U32 maxDistance = 1U << cParams->windowLog;
4064d3f1eafSConrad Meyer const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
4074d3f1eafSConrad Meyer assert(endIndex - prefixStartIndex <= maxDistance);
4084d3f1eafSConrad Meyer (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
4094d3f1eafSConrad Meyer
410*5ff13fbcSAllan Jude (void)hasStep; /* not currently specialized on whether it's accelerated */
411*5ff13fbcSAllan Jude
412*5ff13fbcSAllan Jude /* ensure there will be no underflow
4134d3f1eafSConrad Meyer * when translating a dict index into a local index */
4142b9c00cbSConrad Meyer assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
4150f743729SConrad Meyer
4160c16b537SWarner Losh /* init */
4179cbefe25SConrad Meyer DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
4180f743729SConrad Meyer ip += (dictAndPrefixLength == 0);
4190f743729SConrad Meyer /* dictMatchState repCode checks don't currently handle repCode == 0
4200f743729SConrad Meyer * disabling. */
4210f743729SConrad Meyer assert(offset_1 <= dictAndPrefixLength);
4220f743729SConrad Meyer assert(offset_2 <= dictAndPrefixLength);
4230c16b537SWarner Losh
4240c16b537SWarner Losh /* Main Search Loop */
4250c16b537SWarner Losh while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
4260c16b537SWarner Losh size_t mLength;
42719fcbaf1SConrad Meyer size_t const h = ZSTD_hashPtr(ip, hlog, mls);
428f7cd7fe5SConrad Meyer U32 const curr = (U32)(ip-base);
4290c16b537SWarner Losh U32 const matchIndex = hashTable[h];
4300c16b537SWarner Losh const BYTE* match = base + matchIndex;
431f7cd7fe5SConrad Meyer const U32 repIndex = curr + 1 - offset_1;
4322b9c00cbSConrad Meyer const BYTE* repMatch = (repIndex < prefixStartIndex) ?
4330f743729SConrad Meyer dictBase + (repIndex - dictIndexDelta) :
4340f743729SConrad Meyer base + repIndex;
435f7cd7fe5SConrad Meyer hashTable[h] = curr; /* update hash table */
4360c16b537SWarner Losh
4372b9c00cbSConrad Meyer if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
4380f743729SConrad Meyer && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
4390f743729SConrad Meyer const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
4400f743729SConrad Meyer mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
4410f743729SConrad Meyer ip++;
442*5ff13fbcSAllan Jude ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
4430f743729SConrad Meyer } else if ( (matchIndex <= prefixStartIndex) ) {
4440f743729SConrad Meyer size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
4450f743729SConrad Meyer U32 const dictMatchIndex = dictHashTable[dictHash];
4460f743729SConrad Meyer const BYTE* dictMatch = dictBase + dictMatchIndex;
4470f743729SConrad Meyer if (dictMatchIndex <= dictStartIndex ||
4480f743729SConrad Meyer MEM_read32(dictMatch) != MEM_read32(ip)) {
4490f743729SConrad Meyer assert(stepSize >= 1);
4500f743729SConrad Meyer ip += ((ip-anchor) >> kSearchStrength) + stepSize;
4510f743729SConrad Meyer continue;
4520c16b537SWarner Losh } else {
4530f743729SConrad Meyer /* found a dict match */
454f7cd7fe5SConrad Meyer U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
4550f743729SConrad Meyer mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
4560f743729SConrad Meyer while (((ip>anchor) & (dictMatch>dictStart))
4570f743729SConrad Meyer && (ip[-1] == dictMatch[-1])) {
4580f743729SConrad Meyer ip--; dictMatch--; mLength++;
4590f743729SConrad Meyer } /* catch up */
4600f743729SConrad Meyer offset_2 = offset_1;
4610f743729SConrad Meyer offset_1 = offset;
462*5ff13fbcSAllan Jude ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
4630f743729SConrad Meyer }
4640f743729SConrad Meyer } else if (MEM_read32(match) != MEM_read32(ip)) {
4650f743729SConrad Meyer /* it's not a match, and we're not going to check the dictionary */
4660f743729SConrad Meyer assert(stepSize >= 1);
4670f743729SConrad Meyer ip += ((ip-anchor) >> kSearchStrength) + stepSize;
4680f743729SConrad Meyer continue;
4690f743729SConrad Meyer } else {
4700f743729SConrad Meyer /* found a regular match */
4710f743729SConrad Meyer U32 const offset = (U32)(ip-match);
4720c16b537SWarner Losh mLength = ZSTD_count(ip+4, match+4, iend) + 4;
4730f743729SConrad Meyer while (((ip>anchor) & (match>prefixStart))
4740f743729SConrad Meyer && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
4750c16b537SWarner Losh offset_2 = offset_1;
4760c16b537SWarner Losh offset_1 = offset;
477*5ff13fbcSAllan Jude ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
4780f743729SConrad Meyer }
4790c16b537SWarner Losh
4800c16b537SWarner Losh /* match found */
4810c16b537SWarner Losh ip += mLength;
4820c16b537SWarner Losh anchor = ip;
4830c16b537SWarner Losh
4840c16b537SWarner Losh if (ip <= ilimit) {
4850c16b537SWarner Losh /* Fill Table */
486f7cd7fe5SConrad Meyer assert(base+curr+2 > istart); /* check base overflow */
487f7cd7fe5SConrad Meyer hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
48819fcbaf1SConrad Meyer hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
4890f743729SConrad Meyer
4900c16b537SWarner Losh /* check immediate repcode */
4910f743729SConrad Meyer while (ip <= ilimit) {
4920f743729SConrad Meyer U32 const current2 = (U32)(ip-base);
4930f743729SConrad Meyer U32 const repIndex2 = current2 - offset_2;
4940f743729SConrad Meyer const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
4950f743729SConrad Meyer dictBase - dictIndexDelta + repIndex2 :
4960f743729SConrad Meyer base + repIndex2;
4970f743729SConrad Meyer if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
4980f743729SConrad Meyer && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
4990f743729SConrad Meyer const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
5000f743729SConrad Meyer size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
5010f743729SConrad Meyer U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
502*5ff13fbcSAllan Jude ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
5030f743729SConrad Meyer hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
5040f743729SConrad Meyer ip += repLength2;
5050f743729SConrad Meyer anchor = ip;
5060f743729SConrad Meyer continue;
5070f743729SConrad Meyer }
5080f743729SConrad Meyer break;
5090f743729SConrad Meyer }
5100f743729SConrad Meyer }
5112b9c00cbSConrad Meyer }
5120c16b537SWarner Losh
5130c16b537SWarner Losh /* save reps for next block */
51419fcbaf1SConrad Meyer rep[0] = offset_1 ? offset_1 : offsetSaved;
51519fcbaf1SConrad Meyer rep[1] = offset_2 ? offset_2 : offsetSaved;
5160c16b537SWarner Losh
5170c16b537SWarner Losh /* Return the last literals size */
5184d3f1eafSConrad Meyer return (size_t)(iend - anchor);
5190c16b537SWarner Losh }
5200c16b537SWarner Losh
521*5ff13fbcSAllan Jude
522*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(dictMatchState, 4, 0)
523*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(dictMatchState, 5, 0)
524*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
525*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
526*5ff13fbcSAllan Jude
ZSTD_compressBlock_fast_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)5270f743729SConrad Meyer size_t ZSTD_compressBlock_fast_dictMatchState(
5280f743729SConrad Meyer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
5290f743729SConrad Meyer void const* src, size_t srcSize)
5300f743729SConrad Meyer {
5319cbefe25SConrad Meyer U32 const mls = ms->cParams.minMatch;
5320f743729SConrad Meyer assert(ms->dictMatchState != NULL);
5330f743729SConrad Meyer switch(mls)
5340f743729SConrad Meyer {
5350f743729SConrad Meyer default: /* includes case 3 */
5360f743729SConrad Meyer case 4 :
537*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize);
5380f743729SConrad Meyer case 5 :
539*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize);
5400f743729SConrad Meyer case 6 :
541*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize);
5420f743729SConrad Meyer case 7 :
543*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize);
5440c16b537SWarner Losh }
5450c16b537SWarner Losh }
5460c16b537SWarner Losh
5470c16b537SWarner Losh
ZSTD_compressBlock_fast_extDict_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize,U32 const mls,U32 const hasStep)54819fcbaf1SConrad Meyer static size_t ZSTD_compressBlock_fast_extDict_generic(
54919fcbaf1SConrad Meyer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
550*5ff13fbcSAllan Jude void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
5510c16b537SWarner Losh {
5520f743729SConrad Meyer const ZSTD_compressionParameters* const cParams = &ms->cParams;
5530f743729SConrad Meyer U32* const hashTable = ms->hashTable;
5540f743729SConrad Meyer U32 const hlog = cParams->hashLog;
5550f743729SConrad Meyer /* support stepSize of 0 */
5560f743729SConrad Meyer U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
55719fcbaf1SConrad Meyer const BYTE* const base = ms->window.base;
55819fcbaf1SConrad Meyer const BYTE* const dictBase = ms->window.dictBase;
5590c16b537SWarner Losh const BYTE* const istart = (const BYTE*)src;
5600c16b537SWarner Losh const BYTE* ip = istart;
5610c16b537SWarner Losh const BYTE* anchor = istart;
5624d3f1eafSConrad Meyer const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
5639cbefe25SConrad Meyer const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
5644d3f1eafSConrad Meyer const U32 dictStartIndex = lowLimit;
5650f743729SConrad Meyer const BYTE* const dictStart = dictBase + dictStartIndex;
5664d3f1eafSConrad Meyer const U32 dictLimit = ms->window.dictLimit;
5674d3f1eafSConrad Meyer const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
5680f743729SConrad Meyer const BYTE* const prefixStart = base + prefixStartIndex;
5690f743729SConrad Meyer const BYTE* const dictEnd = dictBase + prefixStartIndex;
5700c16b537SWarner Losh const BYTE* const iend = istart + srcSize;
5710c16b537SWarner Losh const BYTE* const ilimit = iend - 8;
57219fcbaf1SConrad Meyer U32 offset_1=rep[0], offset_2=rep[1];
5730c16b537SWarner Losh
574*5ff13fbcSAllan Jude (void)hasStep; /* not currently specialized on whether it's accelerated */
575*5ff13fbcSAllan Jude
57637f1f268SConrad Meyer DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
5779cbefe25SConrad Meyer
5784d3f1eafSConrad Meyer /* switch to "regular" variant if extDict is invalidated due to maxDistance */
5794d3f1eafSConrad Meyer if (prefixStartIndex == dictStartIndex)
580*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
5814d3f1eafSConrad Meyer
5820c16b537SWarner Losh /* Search Loop */
5830c16b537SWarner Losh while (ip < ilimit) { /* < instead of <=, because (ip+1) */
58419fcbaf1SConrad Meyer const size_t h = ZSTD_hashPtr(ip, hlog, mls);
5850c16b537SWarner Losh const U32 matchIndex = hashTable[h];
5860f743729SConrad Meyer const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
5870c16b537SWarner Losh const BYTE* match = matchBase + matchIndex;
588f7cd7fe5SConrad Meyer const U32 curr = (U32)(ip-base);
589f7cd7fe5SConrad Meyer const U32 repIndex = curr + 1 - offset_1;
5900f743729SConrad Meyer const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
5910f743729SConrad Meyer const BYTE* const repMatch = repBase + repIndex;
592f7cd7fe5SConrad Meyer hashTable[h] = curr; /* update hash table */
593f7cd7fe5SConrad Meyer DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
5940c16b537SWarner Losh
595*5ff13fbcSAllan Jude if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
596*5ff13fbcSAllan Jude & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
5970c16b537SWarner Losh && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
5989cbefe25SConrad Meyer const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
5999cbefe25SConrad Meyer size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
6000c16b537SWarner Losh ip++;
601*5ff13fbcSAllan Jude ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength);
6029cbefe25SConrad Meyer ip += rLength;
6039cbefe25SConrad Meyer anchor = ip;
6040c16b537SWarner Losh } else {
6050f743729SConrad Meyer if ( (matchIndex < dictStartIndex) ||
6060c16b537SWarner Losh (MEM_read32(match) != MEM_read32(ip)) ) {
60719fcbaf1SConrad Meyer assert(stepSize >= 1);
60819fcbaf1SConrad Meyer ip += ((ip-anchor) >> kSearchStrength) + stepSize;
6090c16b537SWarner Losh continue;
6100c16b537SWarner Losh }
6119cbefe25SConrad Meyer { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
6129cbefe25SConrad Meyer const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
613f7cd7fe5SConrad Meyer U32 const offset = curr - matchIndex;
6149cbefe25SConrad Meyer size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
6150c16b537SWarner Losh while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
6169cbefe25SConrad Meyer offset_2 = offset_1; offset_1 = offset; /* update offset history */
617*5ff13fbcSAllan Jude ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
6180c16b537SWarner Losh ip += mLength;
6190c16b537SWarner Losh anchor = ip;
6209cbefe25SConrad Meyer } }
6210c16b537SWarner Losh
6220c16b537SWarner Losh if (ip <= ilimit) {
6230c16b537SWarner Losh /* Fill Table */
624f7cd7fe5SConrad Meyer hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
62519fcbaf1SConrad Meyer hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
6260c16b537SWarner Losh /* check immediate repcode */
6270c16b537SWarner Losh while (ip <= ilimit) {
6280c16b537SWarner Losh U32 const current2 = (U32)(ip-base);
6290c16b537SWarner Losh U32 const repIndex2 = current2 - offset_2;
6309cbefe25SConrad Meyer const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
631*5ff13fbcSAllan Jude if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */
6320c16b537SWarner Losh && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
6330f743729SConrad Meyer const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
6340f743729SConrad Meyer size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
6359cbefe25SConrad Meyer { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
636*5ff13fbcSAllan Jude ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
63719fcbaf1SConrad Meyer hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
6380c16b537SWarner Losh ip += repLength2;
6390c16b537SWarner Losh anchor = ip;
6400c16b537SWarner Losh continue;
6410c16b537SWarner Losh }
6420c16b537SWarner Losh break;
6430c16b537SWarner Losh } } }
6440c16b537SWarner Losh
6450c16b537SWarner Losh /* save reps for next block */
64619fcbaf1SConrad Meyer rep[0] = offset_1;
64719fcbaf1SConrad Meyer rep[1] = offset_2;
6480c16b537SWarner Losh
6490c16b537SWarner Losh /* Return the last literals size */
6504d3f1eafSConrad Meyer return (size_t)(iend - anchor);
6510c16b537SWarner Losh }
6520c16b537SWarner Losh
653*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(extDict, 4, 0)
654*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(extDict, 5, 0)
655*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(extDict, 6, 0)
656*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(extDict, 7, 0)
6570c16b537SWarner Losh
ZSTD_compressBlock_fast_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)65819fcbaf1SConrad Meyer size_t ZSTD_compressBlock_fast_extDict(
65919fcbaf1SConrad Meyer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
6600f743729SConrad Meyer void const* src, size_t srcSize)
6610c16b537SWarner Losh {
6629cbefe25SConrad Meyer U32 const mls = ms->cParams.minMatch;
6630c16b537SWarner Losh switch(mls)
6640c16b537SWarner Losh {
6650c16b537SWarner Losh default: /* includes case 3 */
6660c16b537SWarner Losh case 4 :
667*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize);
6680c16b537SWarner Losh case 5 :
669*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize);
6700c16b537SWarner Losh case 6 :
671*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize);
6720c16b537SWarner Losh case 7 :
673*5ff13fbcSAllan Jude return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize);
6740c16b537SWarner Losh }
6750c16b537SWarner Losh }
676