xref: /freebsd/sys/contrib/zstd/lib/compress/zstd_ldm.c (revision d3d381b2b194b4d24853e92eecef55f262688d1a)
1 /*
2  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  */
9 
10 #include "zstd_ldm.h"
11 
12 #include "zstd_fast.h"          /* ZSTD_fillHashTable() */
13 #include "zstd_double_fast.h"   /* ZSTD_fillDoubleHashTable() */
14 
15 #define LDM_BUCKET_SIZE_LOG 3
16 #define LDM_MIN_MATCH_LENGTH 64
17 #define LDM_HASH_RLOG 7
18 #define LDM_HASH_CHAR_OFFSET 10
19 
20 void ZSTD_ldm_adjustParameters(ldmParams_t* params,
21                                ZSTD_compressionParameters const* cParams)
22 {
23     U32 const windowLog = cParams->windowLog;
24     ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
25     DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
26     if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
27     if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
28     if (cParams->strategy >= ZSTD_btopt) {
29       /* Get out of the way of the optimal parser */
30       U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
31       assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
32       assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
33       params->minMatchLength = minMatch;
34     }
35     if (params->hashLog == 0) {
36         params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
37         assert(params->hashLog <= ZSTD_HASHLOG_MAX);
38     }
39     if (params->hashEveryLog == 0) {
40         params->hashEveryLog =
41                 windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
42     }
43     params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
44 }
45 
46 size_t ZSTD_ldm_getTableSize(ldmParams_t params)
47 {
48     size_t const ldmHSize = ((size_t)1) << params.hashLog;
49     size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
50     size_t const ldmBucketSize =
51         ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
52     size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
53     return params.enableLdm ? totalSize : 0;
54 }
55 
56 size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
57 {
58     return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
59 }
60 
61 /** ZSTD_ldm_getSmallHash() :
62  *  numBits should be <= 32
63  *  If numBits==0, returns 0.
64  *  @return : the most significant numBits of value. */
65 static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
66 {
67     assert(numBits <= 32);
68     return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
69 }
70 
71 /** ZSTD_ldm_getChecksum() :
72  *  numBitsToDiscard should be <= 32
73  *  @return : the next most significant 32 bits after numBitsToDiscard */
74 static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
75 {
76     assert(numBitsToDiscard <= 32);
77     return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
78 }
79 
80 /** ZSTD_ldm_getTag() ;
81  *  Given the hash, returns the most significant numTagBits bits
82  *  after (32 + hbits) bits.
83  *
84  *  If there are not enough bits remaining, return the last
85  *  numTagBits bits. */
86 static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
87 {
88     assert(numTagBits < 32 && hbits <= 32);
89     if (32 - hbits < numTagBits) {
90         return hash & (((U32)1 << numTagBits) - 1);
91     } else {
92         return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
93     }
94 }
95 
96 /** ZSTD_ldm_getBucket() :
97  *  Returns a pointer to the start of the bucket associated with hash. */
98 static ldmEntry_t* ZSTD_ldm_getBucket(
99         ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
100 {
101     return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
102 }
103 
104 /** ZSTD_ldm_insertEntry() :
105  *  Insert the entry with corresponding hash into the hash table */
106 static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
107                                  size_t const hash, const ldmEntry_t entry,
108                                  ldmParams_t const ldmParams)
109 {
110     BYTE* const bucketOffsets = ldmState->bucketOffsets;
111     *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
112     bucketOffsets[hash]++;
113     bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
114 }
115 
116 /** ZSTD_ldm_makeEntryAndInsertByTag() :
117  *
118  *  Gets the small hash, checksum, and tag from the rollingHash.
119  *
120  *  If the tag matches (1 << ldmParams.hashEveryLog)-1, then
121  *  creates an ldmEntry from the offset, and inserts it into the hash table.
122  *
123  *  hBits is the length of the small hash, which is the most significant hBits
124  *  of rollingHash. The checksum is the next 32 most significant bits, followed
125  *  by ldmParams.hashEveryLog bits that make up the tag. */
126 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
127                                              U64 const rollingHash,
128                                              U32 const hBits,
129                                              U32 const offset,
130                                              ldmParams_t const ldmParams)
131 {
132     U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
133     U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
134     if (tag == tagMask) {
135         U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
136         U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
137         ldmEntry_t entry;
138         entry.offset = offset;
139         entry.checksum = checksum;
140         ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
141     }
142 }
143 
144 /** ZSTD_ldm_getRollingHash() :
145  *  Get a 64-bit hash using the first len bytes from buf.
146  *
147  *  Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
148  *  H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
149  *
150  *  where the constant a is defined to be prime8bytes.
151  *
152  *  The implementation adds an offset to each byte, so
153  *  H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */
154 static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len)
155 {
156     U64 ret = 0;
157     U32 i;
158     for (i = 0; i < len; i++) {
159         ret *= prime8bytes;
160         ret += buf[i] + LDM_HASH_CHAR_OFFSET;
161     }
162     return ret;
163 }
164 
165 /** ZSTD_ldm_ipow() :
166  *  Return base^exp. */
167 static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
168 {
169     U64 ret = 1;
170     while (exp) {
171         if (exp & 1) { ret *= base; }
172         exp >>= 1;
173         base *= base;
174     }
175     return ret;
176 }
177 
178 U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
179     DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength);
180     assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
181     return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
182 }
183 
184 /** ZSTD_ldm_updateHash() :
185  *  Updates hash by removing toRemove and adding toAdd. */
186 static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower)
187 {
188     hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower);
189     hash *= prime8bytes;
190     hash += toAdd + LDM_HASH_CHAR_OFFSET;
191     return hash;
192 }
193 
194 /** ZSTD_ldm_countBackwardsMatch() :
195  *  Returns the number of bytes that match backwards before pIn and pMatch.
196  *
197  *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
198 static size_t ZSTD_ldm_countBackwardsMatch(
199             const BYTE* pIn, const BYTE* pAnchor,
200             const BYTE* pMatch, const BYTE* pBase)
201 {
202     size_t matchLength = 0;
203     while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
204         pIn--;
205         pMatch--;
206         matchLength++;
207     }
208     return matchLength;
209 }
210 
211 /** ZSTD_ldm_fillFastTables() :
212  *
213  *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
214  *  This is similar to ZSTD_loadDictionaryContent.
215  *
216  *  The tables for the other strategies are filled within their
217  *  block compressors. */
218 static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
219                                       ZSTD_compressionParameters const* cParams,
220                                       void const* end)
221 {
222     const BYTE* const iend = (const BYTE*)end;
223 
224     switch(cParams->strategy)
225     {
226     case ZSTD_fast:
227         ZSTD_fillHashTable(ms, cParams, iend);
228         ms->nextToUpdate = (U32)(iend - ms->window.base);
229         break;
230 
231     case ZSTD_dfast:
232         ZSTD_fillDoubleHashTable(ms, cParams, iend);
233         ms->nextToUpdate = (U32)(iend - ms->window.base);
234         break;
235 
236     case ZSTD_greedy:
237     case ZSTD_lazy:
238     case ZSTD_lazy2:
239     case ZSTD_btlazy2:
240     case ZSTD_btopt:
241     case ZSTD_btultra:
242         break;
243     default:
244         assert(0);  /* not possible : not a valid strategy id */
245     }
246 
247     return 0;
248 }
249 
250 /** ZSTD_ldm_fillLdmHashTable() :
251  *
252  *  Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
253  *  lastHash is the rolling hash that corresponds to lastHashed.
254  *
255  *  Returns the rolling hash corresponding to position iend-1. */
256 static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
257                                      U64 lastHash, const BYTE* lastHashed,
258                                      const BYTE* iend, const BYTE* base,
259                                      U32 hBits, ldmParams_t const ldmParams)
260 {
261     U64 rollingHash = lastHash;
262     const BYTE* cur = lastHashed + 1;
263 
264     while (cur < iend) {
265         rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1],
266                                           cur[ldmParams.minMatchLength-1],
267                                           state->hashPower);
268         ZSTD_ldm_makeEntryAndInsertByTag(state,
269                                          rollingHash, hBits,
270                                          (U32)(cur - base), ldmParams);
271         ++cur;
272     }
273     return rollingHash;
274 }
275 
276 
277 /** ZSTD_ldm_limitTableUpdate() :
278  *
279  *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
280  *  if it is far way
281  *  (after a long match, only update tables a limited amount). */
282 static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
283 {
284     U32 const current = (U32)(anchor - ms->window.base);
285     if (current > ms->nextToUpdate + 1024) {
286         ms->nextToUpdate =
287             current - MIN(512, current - ms->nextToUpdate - 1024);
288     }
289 }
290 
291 static size_t ZSTD_ldm_generateSequences_internal(
292         ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
293         ldmParams_t const* params, void const* src, size_t srcSize)
294 {
295     /* LDM parameters */
296     int const extDict = ZSTD_window_hasExtDict(ldmState->window);
297     U32 const minMatchLength = params->minMatchLength;
298     U64 const hashPower = ldmState->hashPower;
299     U32 const hBits = params->hashLog - params->bucketSizeLog;
300     U32 const ldmBucketSize = 1U << params->bucketSizeLog;
301     U32 const hashEveryLog = params->hashEveryLog;
302     U32 const ldmTagMask = (1U << params->hashEveryLog) - 1;
303     /* Prefix and extDict parameters */
304     U32 const dictLimit = ldmState->window.dictLimit;
305     U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
306     BYTE const* const base = ldmState->window.base;
307     BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
308     BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
309     BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
310     BYTE const* const lowPrefixPtr = base + dictLimit;
311     /* Input bounds */
312     BYTE const* const istart = (BYTE const*)src;
313     BYTE const* const iend = istart + srcSize;
314     BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
315     /* Input positions */
316     BYTE const* anchor = istart;
317     BYTE const* ip = istart;
318     /* Rolling hash */
319     BYTE const* lastHashed = NULL;
320     U64 rollingHash = 0;
321 
322     while (ip <= ilimit) {
323         size_t mLength;
324         U32 const current = (U32)(ip - base);
325         size_t forwardMatchLength = 0, backwardMatchLength = 0;
326         ldmEntry_t* bestEntry = NULL;
327         if (ip != istart) {
328             rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
329                                               lastHashed[minMatchLength],
330                                               hashPower);
331         } else {
332             rollingHash = ZSTD_ldm_getRollingHash(ip, minMatchLength);
333         }
334         lastHashed = ip;
335 
336         /* Do not insert and do not look for a match */
337         if (ZSTD_ldm_getTag(rollingHash, hBits, hashEveryLog) != ldmTagMask) {
338            ip++;
339            continue;
340         }
341 
342         /* Get the best entry and compute the match lengths */
343         {
344             ldmEntry_t* const bucket =
345                 ZSTD_ldm_getBucket(ldmState,
346                                    ZSTD_ldm_getSmallHash(rollingHash, hBits),
347                                    *params);
348             ldmEntry_t* cur;
349             size_t bestMatchLength = 0;
350             U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
351 
352             for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
353                 size_t curForwardMatchLength, curBackwardMatchLength,
354                        curTotalMatchLength;
355                 if (cur->checksum != checksum || cur->offset <= lowestIndex) {
356                     continue;
357                 }
358                 if (extDict) {
359                     BYTE const* const curMatchBase =
360                         cur->offset < dictLimit ? dictBase : base;
361                     BYTE const* const pMatch = curMatchBase + cur->offset;
362                     BYTE const* const matchEnd =
363                         cur->offset < dictLimit ? dictEnd : iend;
364                     BYTE const* const lowMatchPtr =
365                         cur->offset < dictLimit ? dictStart : lowPrefixPtr;
366 
367                     curForwardMatchLength = ZSTD_count_2segments(
368                                                 ip, pMatch, iend,
369                                                 matchEnd, lowPrefixPtr);
370                     if (curForwardMatchLength < minMatchLength) {
371                         continue;
372                     }
373                     curBackwardMatchLength =
374                         ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
375                                                      lowMatchPtr);
376                     curTotalMatchLength = curForwardMatchLength +
377                                           curBackwardMatchLength;
378                 } else { /* !extDict */
379                     BYTE const* const pMatch = base + cur->offset;
380                     curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
381                     if (curForwardMatchLength < minMatchLength) {
382                         continue;
383                     }
384                     curBackwardMatchLength =
385                         ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
386                                                      lowPrefixPtr);
387                     curTotalMatchLength = curForwardMatchLength +
388                                           curBackwardMatchLength;
389                 }
390 
391                 if (curTotalMatchLength > bestMatchLength) {
392                     bestMatchLength = curTotalMatchLength;
393                     forwardMatchLength = curForwardMatchLength;
394                     backwardMatchLength = curBackwardMatchLength;
395                     bestEntry = cur;
396                 }
397             }
398         }
399 
400         /* No match found -- continue searching */
401         if (bestEntry == NULL) {
402             ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
403                                              hBits, current,
404                                              *params);
405             ip++;
406             continue;
407         }
408 
409         /* Match found */
410         mLength = forwardMatchLength + backwardMatchLength;
411         ip -= backwardMatchLength;
412 
413         {
414             /* Store the sequence:
415              * ip = current - backwardMatchLength
416              * The match is at (bestEntry->offset - backwardMatchLength)
417              */
418             U32 const matchIndex = bestEntry->offset;
419             U32 const offset = current - matchIndex;
420             rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
421 
422             /* Out of sequence storage */
423             if (rawSeqStore->size == rawSeqStore->capacity)
424                 return ERROR(dstSize_tooSmall);
425             seq->litLength = (U32)(ip - anchor);
426             seq->matchLength = (U32)mLength;
427             seq->offset = offset;
428             rawSeqStore->size++;
429         }
430 
431         /* Insert the current entry into the hash table */
432         ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
433                                          (U32)(lastHashed - base),
434                                          *params);
435 
436         assert(ip + backwardMatchLength == lastHashed);
437 
438         /* Fill the hash table from lastHashed+1 to ip+mLength*/
439         /* Heuristic: don't need to fill the entire table at end of block */
440         if (ip + mLength <= ilimit) {
441             rollingHash = ZSTD_ldm_fillLdmHashTable(
442                               ldmState, rollingHash, lastHashed,
443                               ip + mLength, base, hBits, *params);
444             lastHashed = ip + mLength - 1;
445         }
446         ip += mLength;
447         anchor = ip;
448     }
449     return iend - anchor;
450 }
451 
452 /*! ZSTD_ldm_reduceTable() :
453  *  reduce table indexes by `reducerValue` */
454 static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
455                                  U32 const reducerValue)
456 {
457     U32 u;
458     for (u = 0; u < size; u++) {
459         if (table[u].offset < reducerValue) table[u].offset = 0;
460         else table[u].offset -= reducerValue;
461     }
462 }
463 
464 size_t ZSTD_ldm_generateSequences(
465         ldmState_t* ldmState, rawSeqStore_t* sequences,
466         ldmParams_t const* params, void const* src, size_t srcSize)
467 {
468     U32 const maxDist = 1U << params->windowLog;
469     BYTE const* const istart = (BYTE const*)src;
470     BYTE const* const iend = istart + srcSize;
471     size_t const kMaxChunkSize = 1 << 20;
472     size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
473     size_t chunk;
474     size_t leftoverSize = 0;
475 
476     assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
477     /* Check that ZSTD_window_update() has been called for this chunk prior
478      * to passing it to this function.
479      */
480     assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
481     /* The input could be very large (in zstdmt), so it must be broken up into
482      * chunks to enforce the maximmum distance and handle overflow correction.
483      */
484     assert(sequences->pos <= sequences->size);
485     assert(sequences->size <= sequences->capacity);
486     for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
487         BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
488         size_t const remaining = (size_t)(iend - chunkStart);
489         BYTE const *const chunkEnd =
490             (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
491         size_t const chunkSize = chunkEnd - chunkStart;
492         size_t newLeftoverSize;
493         size_t const prevSize = sequences->size;
494 
495         assert(chunkStart < iend);
496         /* 1. Perform overflow correction if necessary. */
497         if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
498             U32 const ldmHSize = 1U << params->hashLog;
499             U32 const correction = ZSTD_window_correctOverflow(
500                 &ldmState->window, /* cycleLog */ 0, maxDist, src);
501             ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
502         }
503         /* 2. We enforce the maximum offset allowed.
504          *
505          * kMaxChunkSize should be small enough that we don't lose too much of
506          * the window through early invalidation.
507          * TODO: * Test the chunk size.
508          *       * Try invalidation after the sequence generation and test the
509          *         the offset against maxDist directly.
510          */
511         ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL);
512         /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
513         newLeftoverSize = ZSTD_ldm_generateSequences_internal(
514             ldmState, sequences, params, chunkStart, chunkSize);
515         if (ZSTD_isError(newLeftoverSize))
516             return newLeftoverSize;
517         /* 4. We add the leftover literals from previous iterations to the first
518          *    newly generated sequence, or add the `newLeftoverSize` if none are
519          *    generated.
520          */
521         /* Prepend the leftover literals from the last call */
522         if (prevSize < sequences->size) {
523             sequences->seq[prevSize].litLength += (U32)leftoverSize;
524             leftoverSize = newLeftoverSize;
525         } else {
526             assert(newLeftoverSize == chunkSize);
527             leftoverSize += chunkSize;
528         }
529     }
530     return 0;
531 }
532 
533 void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
534     while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
535         rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
536         if (srcSize <= seq->litLength) {
537             /* Skip past srcSize literals */
538             seq->litLength -= (U32)srcSize;
539             return;
540         }
541         srcSize -= seq->litLength;
542         seq->litLength = 0;
543         if (srcSize < seq->matchLength) {
544             /* Skip past the first srcSize of the match */
545             seq->matchLength -= (U32)srcSize;
546             if (seq->matchLength < minMatch) {
547                 /* The match is too short, omit it */
548                 if (rawSeqStore->pos + 1 < rawSeqStore->size) {
549                     seq[1].litLength += seq[0].matchLength;
550                 }
551                 rawSeqStore->pos++;
552             }
553             return;
554         }
555         srcSize -= seq->matchLength;
556         seq->matchLength = 0;
557         rawSeqStore->pos++;
558     }
559 }
560 
561 /**
562  * If the sequence length is longer than remaining then the sequence is split
563  * between this block and the next.
564  *
565  * Returns the current sequence to handle, or if the rest of the block should
566  * be literals, it returns a sequence with offset == 0.
567  */
568 static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
569                                  U32 const remaining, U32 const minMatch)
570 {
571     rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
572     assert(sequence.offset > 0);
573     /* Likely: No partial sequence */
574     if (remaining >= sequence.litLength + sequence.matchLength) {
575         rawSeqStore->pos++;
576         return sequence;
577     }
578     /* Cut the sequence short (offset == 0 ==> rest is literals). */
579     if (remaining <= sequence.litLength) {
580         sequence.offset = 0;
581     } else if (remaining < sequence.litLength + sequence.matchLength) {
582         sequence.matchLength = remaining - sequence.litLength;
583         if (sequence.matchLength < minMatch) {
584             sequence.offset = 0;
585         }
586     }
587     /* Skip past `remaining` bytes for the future sequences. */
588     ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
589     return sequence;
590 }
591 
592 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
593     ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
594     ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
595     int const extDict)
596 {
597     unsigned const minMatch = cParams->searchLength;
598     ZSTD_blockCompressor const blockCompressor =
599         ZSTD_selectBlockCompressor(cParams->strategy, extDict);
600     BYTE const* const base = ms->window.base;
601     /* Input bounds */
602     BYTE const* const istart = (BYTE const*)src;
603     BYTE const* const iend = istart + srcSize;
604     /* Input positions */
605     BYTE const* ip = istart;
606 
607     assert(rawSeqStore->pos <= rawSeqStore->size);
608     assert(rawSeqStore->size <= rawSeqStore->capacity);
609     /* Loop through each sequence and apply the block compressor to the lits */
610     while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
611         /* maybeSplitSequence updates rawSeqStore->pos */
612         rawSeq const sequence = maybeSplitSequence(rawSeqStore,
613                                                    (U32)(iend - ip), minMatch);
614         int i;
615         /* End signal */
616         if (sequence.offset == 0)
617             break;
618 
619         assert(sequence.offset <= (1U << cParams->windowLog));
620         assert(ip + sequence.litLength + sequence.matchLength <= iend);
621 
622         /* Fill tables for block compressor */
623         ZSTD_ldm_limitTableUpdate(ms, ip);
624         ZSTD_ldm_fillFastTables(ms, cParams, ip);
625         /* Run the block compressor */
626         {
627             size_t const newLitLength =
628                 blockCompressor(ms, seqStore, rep, cParams, ip,
629                                 sequence.litLength);
630             ip += sequence.litLength;
631             ms->nextToUpdate = (U32)(ip - base);
632             /* Update the repcodes */
633             for (i = ZSTD_REP_NUM - 1; i > 0; i--)
634                 rep[i] = rep[i-1];
635             rep[0] = sequence.offset;
636             /* Store the sequence */
637             ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
638                           sequence.offset + ZSTD_REP_MOVE,
639                           sequence.matchLength - MINMATCH);
640             ip += sequence.matchLength;
641         }
642     }
643     /* Fill the tables for the block compressor */
644     ZSTD_ldm_limitTableUpdate(ms, ip);
645     ZSTD_ldm_fillFastTables(ms, cParams, ip);
646     /* Compress the last literals */
647     {
648         size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams,
649                                                     ip, iend - ip);
650         ms->nextToUpdate = (U32)(iend - base);
651         return lastLiterals;
652     }
653 }
654