xref: /freebsd/sys/contrib/zstd/lib/compress/zstd_fast.c (revision 5ff13fbc199bdf5f0572845351c68ee5ca828e71)
10c16b537SWarner Losh /*
2*5ff13fbcSAllan Jude  * Copyright (c) Yann Collet, Facebook, Inc.
30c16b537SWarner Losh  * All rights reserved.
40c16b537SWarner Losh  *
50c16b537SWarner Losh  * This source code is licensed under both the BSD-style license (found in the
60c16b537SWarner Losh  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
70c16b537SWarner Losh  * in the COPYING file in the root directory of this source tree).
80c16b537SWarner Losh  * You may select, at your option, one of the above-listed licenses.
90c16b537SWarner Losh  */
100c16b537SWarner Losh 
119cbefe25SConrad Meyer #include "zstd_compress_internal.h"  /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
120c16b537SWarner Losh #include "zstd_fast.h"
130c16b537SWarner Losh 
140c16b537SWarner Losh 
ZSTD_fillHashTable(ZSTD_matchState_t * ms,const void * const end,ZSTD_dictTableLoadMethod_e dtlm)1519fcbaf1SConrad Meyer void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
164d3f1eafSConrad Meyer                         const void* const end,
174d3f1eafSConrad Meyer                         ZSTD_dictTableLoadMethod_e dtlm)
180c16b537SWarner Losh {
190f743729SConrad Meyer     const ZSTD_compressionParameters* const cParams = &ms->cParams;
2019fcbaf1SConrad Meyer     U32* const hashTable = ms->hashTable;
2119fcbaf1SConrad Meyer     U32  const hBits = cParams->hashLog;
22a0483764SConrad Meyer     U32  const mls = cParams->minMatch;
2319fcbaf1SConrad Meyer     const BYTE* const base = ms->window.base;
2419fcbaf1SConrad Meyer     const BYTE* ip = base + ms->nextToUpdate;
250c16b537SWarner Losh     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
2619fcbaf1SConrad Meyer     const U32 fastHashFillStep = 3;
270c16b537SWarner Losh 
2819fcbaf1SConrad Meyer     /* Always insert every fastHashFillStep position into the hash table.
2919fcbaf1SConrad Meyer      * Insert the other positions if their hash entry is empty.
3019fcbaf1SConrad Meyer      */
31a0483764SConrad Meyer     for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
32f7cd7fe5SConrad Meyer         U32 const curr = (U32)(ip - base);
33a0483764SConrad Meyer         size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
34f7cd7fe5SConrad Meyer         hashTable[hash0] = curr;
35a0483764SConrad Meyer         if (dtlm == ZSTD_dtlm_fast) continue;
360f743729SConrad Meyer         /* Only load extra positions for ZSTD_dtlm_full */
37a0483764SConrad Meyer         {   U32 p;
38a0483764SConrad Meyer             for (p = 1; p < fastHashFillStep; ++p) {
39a0483764SConrad Meyer                 size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
40a0483764SConrad Meyer                 if (hashTable[hash] == 0) {  /* not yet filled */
41f7cd7fe5SConrad Meyer                     hashTable[hash] = curr + p;
42a0483764SConrad Meyer     }   }   }   }
4319fcbaf1SConrad Meyer }
440c16b537SWarner Losh 
454d3f1eafSConrad Meyer 
46*5ff13fbcSAllan Jude /**
47*5ff13fbcSAllan Jude  * If you squint hard enough (and ignore repcodes), the search operation at any
48*5ff13fbcSAllan Jude  * given position is broken into 4 stages:
49*5ff13fbcSAllan Jude  *
50*5ff13fbcSAllan Jude  * 1. Hash   (map position to hash value via input read)
51*5ff13fbcSAllan Jude  * 2. Lookup (map hash val to index via hashtable read)
52*5ff13fbcSAllan Jude  * 3. Load   (map index to value at that position via input read)
53*5ff13fbcSAllan Jude  * 4. Compare
54*5ff13fbcSAllan Jude  *
55*5ff13fbcSAllan Jude  * Each of these steps involves a memory read at an address which is computed
56*5ff13fbcSAllan Jude  * from the previous step. This means these steps must be sequenced and their
57*5ff13fbcSAllan Jude  * latencies are cumulative.
58*5ff13fbcSAllan Jude  *
59*5ff13fbcSAllan Jude  * Rather than do 1->2->3->4 sequentially for a single position before moving
60*5ff13fbcSAllan Jude  * onto the next, this implementation interleaves these operations across the
61*5ff13fbcSAllan Jude  * next few positions:
62*5ff13fbcSAllan Jude  *
63*5ff13fbcSAllan Jude  * R = Repcode Read & Compare
64*5ff13fbcSAllan Jude  * H = Hash
65*5ff13fbcSAllan Jude  * T = Table Lookup
66*5ff13fbcSAllan Jude  * M = Match Read & Compare
67*5ff13fbcSAllan Jude  *
68*5ff13fbcSAllan Jude  * Pos | Time -->
69*5ff13fbcSAllan Jude  * ----+-------------------
70*5ff13fbcSAllan Jude  * N   | ... M
71*5ff13fbcSAllan Jude  * N+1 | ...   TM
72*5ff13fbcSAllan Jude  * N+2 |    R H   T M
73*5ff13fbcSAllan Jude  * N+3 |         H    TM
74*5ff13fbcSAllan Jude  * N+4 |           R H   T M
75*5ff13fbcSAllan Jude  * N+5 |                H   ...
76*5ff13fbcSAllan Jude  * N+6 |                  R ...
77*5ff13fbcSAllan Jude  *
78*5ff13fbcSAllan Jude  * This is very much analogous to the pipelining of execution in a CPU. And just
79*5ff13fbcSAllan Jude  * like a CPU, we have to dump the pipeline when we find a match (i.e., take a
80*5ff13fbcSAllan Jude  * branch).
81*5ff13fbcSAllan Jude  *
82*5ff13fbcSAllan Jude  * When this happens, we throw away our current state, and do the following prep
83*5ff13fbcSAllan Jude  * to re-enter the loop:
84*5ff13fbcSAllan Jude  *
85*5ff13fbcSAllan Jude  * Pos | Time -->
86*5ff13fbcSAllan Jude  * ----+-------------------
87*5ff13fbcSAllan Jude  * N   | H T
88*5ff13fbcSAllan Jude  * N+1 |  H
89*5ff13fbcSAllan Jude  *
90*5ff13fbcSAllan Jude  * This is also the work we do at the beginning to enter the loop initially.
91*5ff13fbcSAllan Jude  */
929cbefe25SConrad Meyer FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_fast_noDict_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize,U32 const mls,U32 const hasStep)93*5ff13fbcSAllan Jude ZSTD_compressBlock_fast_noDict_generic(
9419fcbaf1SConrad Meyer         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
9519fcbaf1SConrad Meyer         void const* src, size_t srcSize,
96*5ff13fbcSAllan Jude         U32 const mls, U32 const hasStep)
972b9c00cbSConrad Meyer {
982b9c00cbSConrad Meyer     const ZSTD_compressionParameters* const cParams = &ms->cParams;
992b9c00cbSConrad Meyer     U32* const hashTable = ms->hashTable;
1002b9c00cbSConrad Meyer     U32 const hlog = cParams->hashLog;
1012b9c00cbSConrad Meyer     /* support stepSize of 0 */
102*5ff13fbcSAllan Jude     size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
1032b9c00cbSConrad Meyer     const BYTE* const base = ms->window.base;
1042b9c00cbSConrad Meyer     const BYTE* const istart = (const BYTE*)src;
1054d3f1eafSConrad Meyer     const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
10637f1f268SConrad Meyer     const U32   prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
1072b9c00cbSConrad Meyer     const BYTE* const prefixStart = base + prefixStartIndex;
1082b9c00cbSConrad Meyer     const BYTE* const iend = istart + srcSize;
1092b9c00cbSConrad Meyer     const BYTE* const ilimit = iend - HASH_READ_SIZE;
110*5ff13fbcSAllan Jude 
111*5ff13fbcSAllan Jude     const BYTE* anchor = istart;
112*5ff13fbcSAllan Jude     const BYTE* ip0 = istart;
113*5ff13fbcSAllan Jude     const BYTE* ip1;
114*5ff13fbcSAllan Jude     const BYTE* ip2;
115*5ff13fbcSAllan Jude     const BYTE* ip3;
116*5ff13fbcSAllan Jude     U32 current0;
117*5ff13fbcSAllan Jude 
118*5ff13fbcSAllan Jude     U32 rep_offset1 = rep[0];
119*5ff13fbcSAllan Jude     U32 rep_offset2 = rep[1];
1202b9c00cbSConrad Meyer     U32 offsetSaved = 0;
1212b9c00cbSConrad Meyer 
122*5ff13fbcSAllan Jude     size_t hash0; /* hash for ip0 */
123*5ff13fbcSAllan Jude     size_t hash1; /* hash for ip1 */
124*5ff13fbcSAllan Jude     U32 idx; /* match idx for ip0 */
125*5ff13fbcSAllan Jude     U32 mval; /* src value at match idx */
126*5ff13fbcSAllan Jude 
127*5ff13fbcSAllan Jude     U32 offcode;
128*5ff13fbcSAllan Jude     const BYTE* match0;
129*5ff13fbcSAllan Jude     size_t mLength;
130*5ff13fbcSAllan Jude 
131*5ff13fbcSAllan Jude     /* ip0 and ip1 are always adjacent. The targetLength skipping and
132*5ff13fbcSAllan Jude      * uncompressibility acceleration is applied to every other position,
133*5ff13fbcSAllan Jude      * matching the behavior of #1562. step therefore represents the gap
134*5ff13fbcSAllan Jude      * between pairs of positions, from ip0 to ip2 or ip1 to ip3. */
135*5ff13fbcSAllan Jude     size_t step;
136*5ff13fbcSAllan Jude     const BYTE* nextStep;
137*5ff13fbcSAllan Jude     const size_t kStepIncr = (1 << (kSearchStrength - 1));
138*5ff13fbcSAllan Jude 
1399cbefe25SConrad Meyer     DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
1402b9c00cbSConrad Meyer     ip0 += (ip0 == prefixStart);
141f7cd7fe5SConrad Meyer     {   U32 const curr = (U32)(ip0 - base);
142f7cd7fe5SConrad Meyer         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
143f7cd7fe5SConrad Meyer         U32 const maxRep = curr - windowLow;
144*5ff13fbcSAllan Jude         if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
145*5ff13fbcSAllan Jude         if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
1462b9c00cbSConrad Meyer     }
1472b9c00cbSConrad Meyer 
148*5ff13fbcSAllan Jude     /* start each op */
149*5ff13fbcSAllan Jude _start: /* Requires: ip0 */
15037f1f268SConrad Meyer 
151*5ff13fbcSAllan Jude     step = stepSize;
152*5ff13fbcSAllan Jude     nextStep = ip0 + kStepIncr;
15337f1f268SConrad Meyer 
154*5ff13fbcSAllan Jude     /* calculate positions, ip0 - anchor == 0, so we skip step calc */
155*5ff13fbcSAllan Jude     ip1 = ip0 + 1;
156*5ff13fbcSAllan Jude     ip2 = ip0 + step;
157*5ff13fbcSAllan Jude     ip3 = ip2 + 1;
1582b9c00cbSConrad Meyer 
159*5ff13fbcSAllan Jude     if (ip3 >= ilimit) {
160*5ff13fbcSAllan Jude         goto _cleanup;
161*5ff13fbcSAllan Jude     }
1622b9c00cbSConrad Meyer 
163*5ff13fbcSAllan Jude     hash0 = ZSTD_hashPtr(ip0, hlog, mls);
164*5ff13fbcSAllan Jude     hash1 = ZSTD_hashPtr(ip1, hlog, mls);
165*5ff13fbcSAllan Jude 
166*5ff13fbcSAllan Jude     idx = hashTable[hash0];
167*5ff13fbcSAllan Jude 
168*5ff13fbcSAllan Jude     do {
169*5ff13fbcSAllan Jude         /* load repcode match for ip[2]*/
170*5ff13fbcSAllan Jude         const U32 rval = MEM_read32(ip2 - rep_offset1);
171*5ff13fbcSAllan Jude 
172*5ff13fbcSAllan Jude         /* write back hash table entry */
173*5ff13fbcSAllan Jude         current0 = (U32)(ip0 - base);
174*5ff13fbcSAllan Jude         hashTable[hash0] = current0;
175*5ff13fbcSAllan Jude 
176*5ff13fbcSAllan Jude         /* check repcode at ip[2] */
177*5ff13fbcSAllan Jude         if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) {
178*5ff13fbcSAllan Jude             ip0 = ip2;
179*5ff13fbcSAllan Jude             match0 = ip0 - rep_offset1;
180*5ff13fbcSAllan Jude             mLength = ip0[-1] == match0[-1];
181*5ff13fbcSAllan Jude             ip0 -= mLength;
182*5ff13fbcSAllan Jude             match0 -= mLength;
183*5ff13fbcSAllan Jude             offcode = STORE_REPCODE_1;
18437f1f268SConrad Meyer             mLength += 4;
1852b9c00cbSConrad Meyer             goto _match;
1862b9c00cbSConrad Meyer         }
187*5ff13fbcSAllan Jude 
188*5ff13fbcSAllan Jude         /* load match for ip[0] */
189*5ff13fbcSAllan Jude         if (idx >= prefixStartIndex) {
190*5ff13fbcSAllan Jude             mval = MEM_read32(base + idx);
191*5ff13fbcSAllan Jude         } else {
192*5ff13fbcSAllan Jude             mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
193*5ff13fbcSAllan Jude         }
194*5ff13fbcSAllan Jude 
195*5ff13fbcSAllan Jude         /* check match at ip[0] */
196*5ff13fbcSAllan Jude         if (MEM_read32(ip0) == mval) {
197*5ff13fbcSAllan Jude             /* found a match! */
1982b9c00cbSConrad Meyer             goto _offset;
1992b9c00cbSConrad Meyer         }
200*5ff13fbcSAllan Jude 
201*5ff13fbcSAllan Jude         /* lookup ip[1] */
202*5ff13fbcSAllan Jude         idx = hashTable[hash1];
203*5ff13fbcSAllan Jude 
204*5ff13fbcSAllan Jude         /* hash ip[2] */
205*5ff13fbcSAllan Jude         hash0 = hash1;
206*5ff13fbcSAllan Jude         hash1 = ZSTD_hashPtr(ip2, hlog, mls);
207*5ff13fbcSAllan Jude 
208*5ff13fbcSAllan Jude         /* advance to next positions */
2092b9c00cbSConrad Meyer         ip0 = ip1;
210*5ff13fbcSAllan Jude         ip1 = ip2;
211*5ff13fbcSAllan Jude         ip2 = ip3;
212*5ff13fbcSAllan Jude 
213*5ff13fbcSAllan Jude         /* write back hash table entry */
214*5ff13fbcSAllan Jude         current0 = (U32)(ip0 - base);
215*5ff13fbcSAllan Jude         hashTable[hash0] = current0;
216*5ff13fbcSAllan Jude 
217*5ff13fbcSAllan Jude         /* load match for ip[0] */
218*5ff13fbcSAllan Jude         if (idx >= prefixStartIndex) {
219*5ff13fbcSAllan Jude             mval = MEM_read32(base + idx);
220*5ff13fbcSAllan Jude         } else {
221*5ff13fbcSAllan Jude             mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
222*5ff13fbcSAllan Jude         }
223*5ff13fbcSAllan Jude 
224*5ff13fbcSAllan Jude         /* check match at ip[0] */
225*5ff13fbcSAllan Jude         if (MEM_read32(ip0) == mval) {
226*5ff13fbcSAllan Jude             /* found a match! */
2272b9c00cbSConrad Meyer             goto _offset;
2282b9c00cbSConrad Meyer         }
229*5ff13fbcSAllan Jude 
230*5ff13fbcSAllan Jude         /* lookup ip[1] */
231*5ff13fbcSAllan Jude         idx = hashTable[hash1];
232*5ff13fbcSAllan Jude 
233*5ff13fbcSAllan Jude         /* hash ip[2] */
234*5ff13fbcSAllan Jude         hash0 = hash1;
235*5ff13fbcSAllan Jude         hash1 = ZSTD_hashPtr(ip2, hlog, mls);
236*5ff13fbcSAllan Jude 
237*5ff13fbcSAllan Jude         /* advance to next positions */
238*5ff13fbcSAllan Jude         ip0 = ip1;
239*5ff13fbcSAllan Jude         ip1 = ip2;
240*5ff13fbcSAllan Jude         ip2 = ip0 + step;
241*5ff13fbcSAllan Jude         ip3 = ip1 + step;
242*5ff13fbcSAllan Jude 
243*5ff13fbcSAllan Jude         /* calculate step */
244*5ff13fbcSAllan Jude         if (ip2 >= nextStep) {
245*5ff13fbcSAllan Jude             step++;
246*5ff13fbcSAllan Jude             PREFETCH_L1(ip1 + 64);
247*5ff13fbcSAllan Jude             PREFETCH_L1(ip1 + 128);
248*5ff13fbcSAllan Jude             nextStep += kStepIncr;
2492b9c00cbSConrad Meyer         }
250*5ff13fbcSAllan Jude     } while (ip3 < ilimit);
251*5ff13fbcSAllan Jude 
252*5ff13fbcSAllan Jude _cleanup:
253*5ff13fbcSAllan Jude     /* Note that there are probably still a couple positions we could search.
254*5ff13fbcSAllan Jude      * However, it seems to be a meaningful performance hit to try to search
255*5ff13fbcSAllan Jude      * them. So let's not. */
256*5ff13fbcSAllan Jude 
257*5ff13fbcSAllan Jude     /* save reps for next block */
258*5ff13fbcSAllan Jude     rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
259*5ff13fbcSAllan Jude     rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
260*5ff13fbcSAllan Jude 
261*5ff13fbcSAllan Jude     /* Return the last literals size */
262*5ff13fbcSAllan Jude     return (size_t)(iend - anchor);
263*5ff13fbcSAllan Jude 
264*5ff13fbcSAllan Jude _offset: /* Requires: ip0, idx */
265*5ff13fbcSAllan Jude 
266*5ff13fbcSAllan Jude     /* Compute the offset code. */
267*5ff13fbcSAllan Jude     match0 = base + idx;
268*5ff13fbcSAllan Jude     rep_offset2 = rep_offset1;
269*5ff13fbcSAllan Jude     rep_offset1 = (U32)(ip0-match0);
270*5ff13fbcSAllan Jude     offcode = STORE_OFFSET(rep_offset1);
27137f1f268SConrad Meyer     mLength = 4;
272*5ff13fbcSAllan Jude 
273*5ff13fbcSAllan Jude     /* Count the backwards match length. */
274*5ff13fbcSAllan Jude     while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) {
275*5ff13fbcSAllan Jude         ip0--;
276*5ff13fbcSAllan Jude         match0--;
277*5ff13fbcSAllan Jude         mLength++;
278*5ff13fbcSAllan Jude     }
2792b9c00cbSConrad Meyer 
2802b9c00cbSConrad Meyer _match: /* Requires: ip0, match0, offcode */
281*5ff13fbcSAllan Jude 
282*5ff13fbcSAllan Jude     /* Count the forward length. */
28337f1f268SConrad Meyer     mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend);
284*5ff13fbcSAllan Jude 
285*5ff13fbcSAllan Jude     ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
286*5ff13fbcSAllan Jude 
2872b9c00cbSConrad Meyer     ip0 += mLength;
2882b9c00cbSConrad Meyer     anchor = ip0;
2892b9c00cbSConrad Meyer 
290*5ff13fbcSAllan Jude     /* write next hash table entry */
291*5ff13fbcSAllan Jude     if (ip1 < ip0) {
292*5ff13fbcSAllan Jude         hashTable[hash1] = (U32)(ip1 - base);
293*5ff13fbcSAllan Jude     }
294*5ff13fbcSAllan Jude 
295*5ff13fbcSAllan Jude     /* Fill table and check for immediate repcode. */
2962b9c00cbSConrad Meyer     if (ip0 <= ilimit) {
2972b9c00cbSConrad Meyer         /* Fill Table */
2982b9c00cbSConrad Meyer         assert(base+current0+2 > istart);  /* check base overflow */
2992b9c00cbSConrad Meyer         hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
3002b9c00cbSConrad Meyer         hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
3012b9c00cbSConrad Meyer 
302*5ff13fbcSAllan Jude         if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */
303*5ff13fbcSAllan Jude             while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) {
3042b9c00cbSConrad Meyer                 /* store sequence */
305*5ff13fbcSAllan Jude                 size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4;
306*5ff13fbcSAllan Jude                 { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
3072b9c00cbSConrad Meyer                 hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
3082b9c00cbSConrad Meyer                 ip0 += rLength;
309*5ff13fbcSAllan Jude                 ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength);
3102b9c00cbSConrad Meyer                 anchor = ip0;
3112b9c00cbSConrad Meyer                 continue;   /* faster when present (confirmed on gcc-8) ... (?) */
31237f1f268SConrad Meyer     }   }   }
313*5ff13fbcSAllan Jude 
314*5ff13fbcSAllan Jude     goto _start;
3152b9c00cbSConrad Meyer }
3162b9c00cbSConrad Meyer 
317*5ff13fbcSAllan Jude #define ZSTD_GEN_FAST_FN(dictMode, mls, step)                                                            \
318*5ff13fbcSAllan Jude     static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step(                                      \
319*5ff13fbcSAllan Jude             ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                    \
320*5ff13fbcSAllan Jude             void const* src, size_t srcSize)                                                       \
321*5ff13fbcSAllan Jude     {                                                                                              \
322*5ff13fbcSAllan Jude         return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
3232b9c00cbSConrad Meyer     }
3242b9c00cbSConrad Meyer 
325*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 4, 1)
326*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 5, 1)
327*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 6, 1)
328*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 7, 1)
329*5ff13fbcSAllan Jude 
330*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 4, 0)
331*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 5, 0)
332*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 6, 0)
333*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(noDict, 7, 0)
3342b9c00cbSConrad Meyer 
ZSTD_compressBlock_fast(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)3352b9c00cbSConrad Meyer size_t ZSTD_compressBlock_fast(
3362b9c00cbSConrad Meyer         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
3372b9c00cbSConrad Meyer         void const* src, size_t srcSize)
3382b9c00cbSConrad Meyer {
3399cbefe25SConrad Meyer     U32 const mls = ms->cParams.minMatch;
3402b9c00cbSConrad Meyer     assert(ms->dictMatchState == NULL);
341*5ff13fbcSAllan Jude     if (ms->cParams.targetLength > 1) {
3422b9c00cbSConrad Meyer         switch(mls)
3432b9c00cbSConrad Meyer         {
3442b9c00cbSConrad Meyer         default: /* includes case 3 */
3452b9c00cbSConrad Meyer         case 4 :
346*5ff13fbcSAllan Jude             return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize);
3472b9c00cbSConrad Meyer         case 5 :
348*5ff13fbcSAllan Jude             return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize);
3492b9c00cbSConrad Meyer         case 6 :
350*5ff13fbcSAllan Jude             return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize);
3512b9c00cbSConrad Meyer         case 7 :
352*5ff13fbcSAllan Jude             return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
353*5ff13fbcSAllan Jude         }
354*5ff13fbcSAllan Jude     } else {
355*5ff13fbcSAllan Jude         switch(mls)
356*5ff13fbcSAllan Jude         {
357*5ff13fbcSAllan Jude         default: /* includes case 3 */
358*5ff13fbcSAllan Jude         case 4 :
359*5ff13fbcSAllan Jude             return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize);
360*5ff13fbcSAllan Jude         case 5 :
361*5ff13fbcSAllan Jude             return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize);
362*5ff13fbcSAllan Jude         case 6 :
363*5ff13fbcSAllan Jude             return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize);
364*5ff13fbcSAllan Jude         case 7 :
365*5ff13fbcSAllan Jude             return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
366*5ff13fbcSAllan Jude         }
367*5ff13fbcSAllan Jude 
3682b9c00cbSConrad Meyer     }
3692b9c00cbSConrad Meyer }
3702b9c00cbSConrad Meyer 
3712b9c00cbSConrad Meyer FORCE_INLINE_TEMPLATE
ZSTD_compressBlock_fast_dictMatchState_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize,U32 const mls,U32 const hasStep)3722b9c00cbSConrad Meyer size_t ZSTD_compressBlock_fast_dictMatchState_generic(
3732b9c00cbSConrad Meyer         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
374*5ff13fbcSAllan Jude         void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
3750c16b537SWarner Losh {
3760f743729SConrad Meyer     const ZSTD_compressionParameters* const cParams = &ms->cParams;
37719fcbaf1SConrad Meyer     U32* const hashTable = ms->hashTable;
3780f743729SConrad Meyer     U32 const hlog = cParams->hashLog;
3790f743729SConrad Meyer     /* support stepSize of 0 */
3800f743729SConrad Meyer     U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
38119fcbaf1SConrad Meyer     const BYTE* const base = ms->window.base;
3820c16b537SWarner Losh     const BYTE* const istart = (const BYTE*)src;
3830c16b537SWarner Losh     const BYTE* ip = istart;
3840c16b537SWarner Losh     const BYTE* anchor = istart;
3850f743729SConrad Meyer     const U32   prefixStartIndex = ms->window.dictLimit;
3860f743729SConrad Meyer     const BYTE* const prefixStart = base + prefixStartIndex;
3870c16b537SWarner Losh     const BYTE* const iend = istart + srcSize;
3880c16b537SWarner Losh     const BYTE* const ilimit = iend - HASH_READ_SIZE;
38919fcbaf1SConrad Meyer     U32 offset_1=rep[0], offset_2=rep[1];
3900c16b537SWarner Losh     U32 offsetSaved = 0;
3910c16b537SWarner Losh 
3920f743729SConrad Meyer     const ZSTD_matchState_t* const dms = ms->dictMatchState;
3932b9c00cbSConrad Meyer     const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
3942b9c00cbSConrad Meyer     const U32* const dictHashTable = dms->hashTable;
3952b9c00cbSConrad Meyer     const U32 dictStartIndex       = dms->window.dictLimit;
3962b9c00cbSConrad Meyer     const BYTE* const dictBase     = dms->window.base;
3972b9c00cbSConrad Meyer     const BYTE* const dictStart    = dictBase + dictStartIndex;
3982b9c00cbSConrad Meyer     const BYTE* const dictEnd      = dms->window.nextSrc;
3992b9c00cbSConrad Meyer     const U32 dictIndexDelta       = prefixStartIndex - (U32)(dictEnd - dictBase);
4000f743729SConrad Meyer     const U32 dictAndPrefixLength  = (U32)(ip - prefixStart + dictEnd - dictStart);
4012b9c00cbSConrad Meyer     const U32 dictHLog             = dictCParams->hashLog;
4020f743729SConrad Meyer 
4034d3f1eafSConrad Meyer     /* if a dictionary is still attached, it necessarily means that
4044d3f1eafSConrad Meyer      * it is within window size. So we just check it. */
4054d3f1eafSConrad Meyer     const U32 maxDistance = 1U << cParams->windowLog;
4064d3f1eafSConrad Meyer     const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
4074d3f1eafSConrad Meyer     assert(endIndex - prefixStartIndex <= maxDistance);
4084d3f1eafSConrad Meyer     (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
4094d3f1eafSConrad Meyer 
410*5ff13fbcSAllan Jude     (void)hasStep; /* not currently specialized on whether it's accelerated */
411*5ff13fbcSAllan Jude 
412*5ff13fbcSAllan Jude     /* ensure there will be no underflow
4134d3f1eafSConrad Meyer      * when translating a dict index into a local index */
4142b9c00cbSConrad Meyer     assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
4150f743729SConrad Meyer 
4160c16b537SWarner Losh     /* init */
4179cbefe25SConrad Meyer     DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
4180f743729SConrad Meyer     ip += (dictAndPrefixLength == 0);
4190f743729SConrad Meyer     /* dictMatchState repCode checks don't currently handle repCode == 0
4200f743729SConrad Meyer      * disabling. */
4210f743729SConrad Meyer     assert(offset_1 <= dictAndPrefixLength);
4220f743729SConrad Meyer     assert(offset_2 <= dictAndPrefixLength);
4230c16b537SWarner Losh 
4240c16b537SWarner Losh     /* Main Search Loop */
4250c16b537SWarner Losh     while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
4260c16b537SWarner Losh         size_t mLength;
42719fcbaf1SConrad Meyer         size_t const h = ZSTD_hashPtr(ip, hlog, mls);
428f7cd7fe5SConrad Meyer         U32 const curr = (U32)(ip-base);
4290c16b537SWarner Losh         U32 const matchIndex = hashTable[h];
4300c16b537SWarner Losh         const BYTE* match = base + matchIndex;
431f7cd7fe5SConrad Meyer         const U32 repIndex = curr + 1 - offset_1;
4322b9c00cbSConrad Meyer         const BYTE* repMatch = (repIndex < prefixStartIndex) ?
4330f743729SConrad Meyer                                dictBase + (repIndex - dictIndexDelta) :
4340f743729SConrad Meyer                                base + repIndex;
435f7cd7fe5SConrad Meyer         hashTable[h] = curr;   /* update hash table */
4360c16b537SWarner Losh 
4372b9c00cbSConrad Meyer         if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
4380f743729SConrad Meyer           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
4390f743729SConrad Meyer             const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
4400f743729SConrad Meyer             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
4410f743729SConrad Meyer             ip++;
442*5ff13fbcSAllan Jude             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
4430f743729SConrad Meyer         } else if ( (matchIndex <= prefixStartIndex) ) {
4440f743729SConrad Meyer             size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
4450f743729SConrad Meyer             U32 const dictMatchIndex = dictHashTable[dictHash];
4460f743729SConrad Meyer             const BYTE* dictMatch = dictBase + dictMatchIndex;
4470f743729SConrad Meyer             if (dictMatchIndex <= dictStartIndex ||
4480f743729SConrad Meyer                 MEM_read32(dictMatch) != MEM_read32(ip)) {
4490f743729SConrad Meyer                 assert(stepSize >= 1);
4500f743729SConrad Meyer                 ip += ((ip-anchor) >> kSearchStrength) + stepSize;
4510f743729SConrad Meyer                 continue;
4520c16b537SWarner Losh             } else {
4530f743729SConrad Meyer                 /* found a dict match */
454f7cd7fe5SConrad Meyer                 U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
4550f743729SConrad Meyer                 mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
4560f743729SConrad Meyer                 while (((ip>anchor) & (dictMatch>dictStart))
4570f743729SConrad Meyer                      && (ip[-1] == dictMatch[-1])) {
4580f743729SConrad Meyer                     ip--; dictMatch--; mLength++;
4590f743729SConrad Meyer                 } /* catch up */
4600f743729SConrad Meyer                 offset_2 = offset_1;
4610f743729SConrad Meyer                 offset_1 = offset;
462*5ff13fbcSAllan Jude                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
4630f743729SConrad Meyer             }
4640f743729SConrad Meyer         } else if (MEM_read32(match) != MEM_read32(ip)) {
4650f743729SConrad Meyer             /* it's not a match, and we're not going to check the dictionary */
4660f743729SConrad Meyer             assert(stepSize >= 1);
4670f743729SConrad Meyer             ip += ((ip-anchor) >> kSearchStrength) + stepSize;
4680f743729SConrad Meyer             continue;
4690f743729SConrad Meyer         } else {
4700f743729SConrad Meyer             /* found a regular match */
4710f743729SConrad Meyer             U32 const offset = (U32)(ip-match);
4720c16b537SWarner Losh             mLength = ZSTD_count(ip+4, match+4, iend) + 4;
4730f743729SConrad Meyer             while (((ip>anchor) & (match>prefixStart))
4740f743729SConrad Meyer                  && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
4750c16b537SWarner Losh             offset_2 = offset_1;
4760c16b537SWarner Losh             offset_1 = offset;
477*5ff13fbcSAllan Jude             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
4780f743729SConrad Meyer         }
4790c16b537SWarner Losh 
4800c16b537SWarner Losh         /* match found */
4810c16b537SWarner Losh         ip += mLength;
4820c16b537SWarner Losh         anchor = ip;
4830c16b537SWarner Losh 
4840c16b537SWarner Losh         if (ip <= ilimit) {
4850c16b537SWarner Losh             /* Fill Table */
486f7cd7fe5SConrad Meyer             assert(base+curr+2 > istart);  /* check base overflow */
487f7cd7fe5SConrad Meyer             hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;  /* here because curr+2 could be > iend-8 */
48819fcbaf1SConrad Meyer             hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
4890f743729SConrad Meyer 
4900c16b537SWarner Losh             /* check immediate repcode */
4910f743729SConrad Meyer             while (ip <= ilimit) {
4920f743729SConrad Meyer                 U32 const current2 = (U32)(ip-base);
4930f743729SConrad Meyer                 U32 const repIndex2 = current2 - offset_2;
4940f743729SConrad Meyer                 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
4950f743729SConrad Meyer                         dictBase - dictIndexDelta + repIndex2 :
4960f743729SConrad Meyer                         base + repIndex2;
4970f743729SConrad Meyer                 if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
4980f743729SConrad Meyer                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
4990f743729SConrad Meyer                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
5000f743729SConrad Meyer                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
5010f743729SConrad Meyer                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
502*5ff13fbcSAllan Jude                     ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
5030f743729SConrad Meyer                     hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
5040f743729SConrad Meyer                     ip += repLength2;
5050f743729SConrad Meyer                     anchor = ip;
5060f743729SConrad Meyer                     continue;
5070f743729SConrad Meyer                 }
5080f743729SConrad Meyer                 break;
5090f743729SConrad Meyer             }
5100f743729SConrad Meyer         }
5112b9c00cbSConrad Meyer     }
5120c16b537SWarner Losh 
5130c16b537SWarner Losh     /* save reps for next block */
51419fcbaf1SConrad Meyer     rep[0] = offset_1 ? offset_1 : offsetSaved;
51519fcbaf1SConrad Meyer     rep[1] = offset_2 ? offset_2 : offsetSaved;
5160c16b537SWarner Losh 
5170c16b537SWarner Losh     /* Return the last literals size */
5184d3f1eafSConrad Meyer     return (size_t)(iend - anchor);
5190c16b537SWarner Losh }
5200c16b537SWarner Losh 
521*5ff13fbcSAllan Jude 
522*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(dictMatchState, 4, 0)
523*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(dictMatchState, 5, 0)
524*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
525*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
526*5ff13fbcSAllan Jude 
ZSTD_compressBlock_fast_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)5270f743729SConrad Meyer size_t ZSTD_compressBlock_fast_dictMatchState(
5280f743729SConrad Meyer         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
5290f743729SConrad Meyer         void const* src, size_t srcSize)
5300f743729SConrad Meyer {
5319cbefe25SConrad Meyer     U32 const mls = ms->cParams.minMatch;
5320f743729SConrad Meyer     assert(ms->dictMatchState != NULL);
5330f743729SConrad Meyer     switch(mls)
5340f743729SConrad Meyer     {
5350f743729SConrad Meyer     default: /* includes case 3 */
5360f743729SConrad Meyer     case 4 :
537*5ff13fbcSAllan Jude         return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize);
5380f743729SConrad Meyer     case 5 :
539*5ff13fbcSAllan Jude         return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize);
5400f743729SConrad Meyer     case 6 :
541*5ff13fbcSAllan Jude         return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize);
5420f743729SConrad Meyer     case 7 :
543*5ff13fbcSAllan Jude         return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize);
5440c16b537SWarner Losh     }
5450c16b537SWarner Losh }
5460c16b537SWarner Losh 
5470c16b537SWarner Losh 
ZSTD_compressBlock_fast_extDict_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize,U32 const mls,U32 const hasStep)54819fcbaf1SConrad Meyer static size_t ZSTD_compressBlock_fast_extDict_generic(
54919fcbaf1SConrad Meyer         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
550*5ff13fbcSAllan Jude         void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
5510c16b537SWarner Losh {
5520f743729SConrad Meyer     const ZSTD_compressionParameters* const cParams = &ms->cParams;
5530f743729SConrad Meyer     U32* const hashTable = ms->hashTable;
5540f743729SConrad Meyer     U32 const hlog = cParams->hashLog;
5550f743729SConrad Meyer     /* support stepSize of 0 */
5560f743729SConrad Meyer     U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
55719fcbaf1SConrad Meyer     const BYTE* const base = ms->window.base;
55819fcbaf1SConrad Meyer     const BYTE* const dictBase = ms->window.dictBase;
5590c16b537SWarner Losh     const BYTE* const istart = (const BYTE*)src;
5600c16b537SWarner Losh     const BYTE* ip = istart;
5610c16b537SWarner Losh     const BYTE* anchor = istart;
5624d3f1eafSConrad Meyer     const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
5639cbefe25SConrad Meyer     const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
5644d3f1eafSConrad Meyer     const U32   dictStartIndex = lowLimit;
5650f743729SConrad Meyer     const BYTE* const dictStart = dictBase + dictStartIndex;
5664d3f1eafSConrad Meyer     const U32   dictLimit = ms->window.dictLimit;
5674d3f1eafSConrad Meyer     const U32   prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
5680f743729SConrad Meyer     const BYTE* const prefixStart = base + prefixStartIndex;
5690f743729SConrad Meyer     const BYTE* const dictEnd = dictBase + prefixStartIndex;
5700c16b537SWarner Losh     const BYTE* const iend = istart + srcSize;
5710c16b537SWarner Losh     const BYTE* const ilimit = iend - 8;
57219fcbaf1SConrad Meyer     U32 offset_1=rep[0], offset_2=rep[1];
5730c16b537SWarner Losh 
574*5ff13fbcSAllan Jude     (void)hasStep; /* not currently specialized on whether it's accelerated */
575*5ff13fbcSAllan Jude 
57637f1f268SConrad Meyer     DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
5779cbefe25SConrad Meyer 
5784d3f1eafSConrad Meyer     /* switch to "regular" variant if extDict is invalidated due to maxDistance */
5794d3f1eafSConrad Meyer     if (prefixStartIndex == dictStartIndex)
580*5ff13fbcSAllan Jude         return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
5814d3f1eafSConrad Meyer 
5820c16b537SWarner Losh     /* Search Loop */
5830c16b537SWarner Losh     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
58419fcbaf1SConrad Meyer         const size_t h = ZSTD_hashPtr(ip, hlog, mls);
5850c16b537SWarner Losh         const U32    matchIndex = hashTable[h];
5860f743729SConrad Meyer         const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
5870c16b537SWarner Losh         const BYTE*  match = matchBase + matchIndex;
588f7cd7fe5SConrad Meyer         const U32    curr = (U32)(ip-base);
589f7cd7fe5SConrad Meyer         const U32    repIndex = curr + 1 - offset_1;
5900f743729SConrad Meyer         const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
5910f743729SConrad Meyer         const BYTE* const repMatch = repBase + repIndex;
592f7cd7fe5SConrad Meyer         hashTable[h] = curr;   /* update hash table */
593f7cd7fe5SConrad Meyer         DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
5940c16b537SWarner Losh 
595*5ff13fbcSAllan Jude         if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
596*5ff13fbcSAllan Jude              & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
5970c16b537SWarner Losh            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
5989cbefe25SConrad Meyer             const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
5999cbefe25SConrad Meyer             size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
6000c16b537SWarner Losh             ip++;
601*5ff13fbcSAllan Jude             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength);
6029cbefe25SConrad Meyer             ip += rLength;
6039cbefe25SConrad Meyer             anchor = ip;
6040c16b537SWarner Losh         } else {
6050f743729SConrad Meyer             if ( (matchIndex < dictStartIndex) ||
6060c16b537SWarner Losh                  (MEM_read32(match) != MEM_read32(ip)) ) {
60719fcbaf1SConrad Meyer                 assert(stepSize >= 1);
60819fcbaf1SConrad Meyer                 ip += ((ip-anchor) >> kSearchStrength) + stepSize;
6090c16b537SWarner Losh                 continue;
6100c16b537SWarner Losh             }
6119cbefe25SConrad Meyer             {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
6129cbefe25SConrad Meyer                 const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
613f7cd7fe5SConrad Meyer                 U32 const offset = curr - matchIndex;
6149cbefe25SConrad Meyer                 size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
6150c16b537SWarner Losh                 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
6169cbefe25SConrad Meyer                 offset_2 = offset_1; offset_1 = offset;  /* update offset history */
617*5ff13fbcSAllan Jude                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
6180c16b537SWarner Losh                 ip += mLength;
6190c16b537SWarner Losh                 anchor = ip;
6209cbefe25SConrad Meyer         }   }
6210c16b537SWarner Losh 
6220c16b537SWarner Losh         if (ip <= ilimit) {
6230c16b537SWarner Losh             /* Fill Table */
624f7cd7fe5SConrad Meyer             hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
62519fcbaf1SConrad Meyer             hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
6260c16b537SWarner Losh             /* check immediate repcode */
6270c16b537SWarner Losh             while (ip <= ilimit) {
6280c16b537SWarner Losh                 U32 const current2 = (U32)(ip-base);
6290c16b537SWarner Losh                 U32 const repIndex2 = current2 - offset_2;
6309cbefe25SConrad Meyer                 const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
631*5ff13fbcSAllan Jude                 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex))  /* intentional overflow */
6320c16b537SWarner Losh                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
6330f743729SConrad Meyer                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
6340f743729SConrad Meyer                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
6359cbefe25SConrad Meyer                     { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
636*5ff13fbcSAllan Jude                     ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
63719fcbaf1SConrad Meyer                     hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
6380c16b537SWarner Losh                     ip += repLength2;
6390c16b537SWarner Losh                     anchor = ip;
6400c16b537SWarner Losh                     continue;
6410c16b537SWarner Losh                 }
6420c16b537SWarner Losh                 break;
6430c16b537SWarner Losh     }   }   }
6440c16b537SWarner Losh 
6450c16b537SWarner Losh     /* save reps for next block */
64619fcbaf1SConrad Meyer     rep[0] = offset_1;
64719fcbaf1SConrad Meyer     rep[1] = offset_2;
6480c16b537SWarner Losh 
6490c16b537SWarner Losh     /* Return the last literals size */
6504d3f1eafSConrad Meyer     return (size_t)(iend - anchor);
6510c16b537SWarner Losh }
6520c16b537SWarner Losh 
653*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(extDict, 4, 0)
654*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(extDict, 5, 0)
655*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(extDict, 6, 0)
656*5ff13fbcSAllan Jude ZSTD_GEN_FAST_FN(extDict, 7, 0)
6570c16b537SWarner Losh 
ZSTD_compressBlock_fast_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)65819fcbaf1SConrad Meyer size_t ZSTD_compressBlock_fast_extDict(
65919fcbaf1SConrad Meyer         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
6600f743729SConrad Meyer         void const* src, size_t srcSize)
6610c16b537SWarner Losh {
6629cbefe25SConrad Meyer     U32 const mls = ms->cParams.minMatch;
6630c16b537SWarner Losh     switch(mls)
6640c16b537SWarner Losh     {
6650c16b537SWarner Losh     default: /* includes case 3 */
6660c16b537SWarner Losh     case 4 :
667*5ff13fbcSAllan Jude         return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize);
6680c16b537SWarner Losh     case 5 :
669*5ff13fbcSAllan Jude         return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize);
6700c16b537SWarner Losh     case 6 :
671*5ff13fbcSAllan Jude         return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize);
6720c16b537SWarner Losh     case 7 :
673*5ff13fbcSAllan Jude         return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize);
6740c16b537SWarner Losh     }
6750c16b537SWarner Losh }
676