1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
2 /*
3 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
4 * All rights reserved.
5 *
6 * This source code is licensed under both the BSD-style license (found in the
7 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8 * in the COPYING file in the root directory of this source tree).
9 * You may select, at your option, one of the above-listed licenses.
10 */
11
12 #include "zstd_ldm.h"
13
14 #include "../common/debug.h"
15 #include "zstd_fast.h" /* ZSTD_fillHashTable() */
16 #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
17
18 #define LDM_BUCKET_SIZE_LOG 3
19 #define LDM_MIN_MATCH_LENGTH 64
20 #define LDM_HASH_RLOG 7
21 #define LDM_HASH_CHAR_OFFSET 10
22
ZSTD_ldm_adjustParameters(ldmParams_t * params,ZSTD_compressionParameters const * cParams)23 void ZSTD_ldm_adjustParameters(ldmParams_t* params,
24 ZSTD_compressionParameters const* cParams)
25 {
26 params->windowLog = cParams->windowLog;
27 ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
28 DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
29 if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
30 if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
31 if (cParams->strategy >= ZSTD_btopt) {
32 /* Get out of the way of the optimal parser */
33 U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
34 assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
35 assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
36 params->minMatchLength = minMatch;
37 }
38 if (params->hashLog == 0) {
39 params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
40 assert(params->hashLog <= ZSTD_HASHLOG_MAX);
41 }
42 if (params->hashRateLog == 0) {
43 params->hashRateLog = params->windowLog < params->hashLog
44 ? 0
45 : params->windowLog - params->hashLog;
46 }
47 params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
48 }
49
ZSTD_ldm_getTableSize(ldmParams_t params)50 size_t ZSTD_ldm_getTableSize(ldmParams_t params)
51 {
52 size_t const ldmHSize = ((size_t)1) << params.hashLog;
53 size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
54 size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
55 size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
56 + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
57 return params.enableLdm ? totalSize : 0;
58 }
59
ZSTD_ldm_getMaxNbSeq(ldmParams_t params,size_t maxChunkSize)60 size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
61 {
62 return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
63 }
64
65 /** ZSTD_ldm_getSmallHash() :
66 * numBits should be <= 32
67 * If numBits==0, returns 0.
68 * @return : the most significant numBits of value. */
ZSTD_ldm_getSmallHash(U64 value,U32 numBits)69 static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
70 {
71 assert(numBits <= 32);
72 return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
73 }
74
75 /** ZSTD_ldm_getChecksum() :
76 * numBitsToDiscard should be <= 32
77 * @return : the next most significant 32 bits after numBitsToDiscard */
ZSTD_ldm_getChecksum(U64 hash,U32 numBitsToDiscard)78 static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
79 {
80 assert(numBitsToDiscard <= 32);
81 return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
82 }
83
84 /** ZSTD_ldm_getTag() ;
85 * Given the hash, returns the most significant numTagBits bits
86 * after (32 + hbits) bits.
87 *
88 * If there are not enough bits remaining, return the last
89 * numTagBits bits. */
ZSTD_ldm_getTag(U64 hash,U32 hbits,U32 numTagBits)90 static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
91 {
92 assert(numTagBits < 32 && hbits <= 32);
93 if (32 - hbits < numTagBits) {
94 return hash & (((U32)1 << numTagBits) - 1);
95 } else {
96 return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
97 }
98 }
99
100 /** ZSTD_ldm_getBucket() :
101 * Returns a pointer to the start of the bucket associated with hash. */
ZSTD_ldm_getBucket(ldmState_t * ldmState,size_t hash,ldmParams_t const ldmParams)102 static ldmEntry_t* ZSTD_ldm_getBucket(
103 ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
104 {
105 return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
106 }
107
108 /** ZSTD_ldm_insertEntry() :
109 * Insert the entry with corresponding hash into the hash table */
ZSTD_ldm_insertEntry(ldmState_t * ldmState,size_t const hash,const ldmEntry_t entry,ldmParams_t const ldmParams)110 static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
111 size_t const hash, const ldmEntry_t entry,
112 ldmParams_t const ldmParams)
113 {
114 BYTE* const bucketOffsets = ldmState->bucketOffsets;
115 *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
116 bucketOffsets[hash]++;
117 bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
118 }
119
120 /** ZSTD_ldm_makeEntryAndInsertByTag() :
121 *
122 * Gets the small hash, checksum, and tag from the rollingHash.
123 *
124 * If the tag matches (1 << ldmParams.hashRateLog)-1, then
125 * creates an ldmEntry from the offset, and inserts it into the hash table.
126 *
127 * hBits is the length of the small hash, which is the most significant hBits
128 * of rollingHash. The checksum is the next 32 most significant bits, followed
129 * by ldmParams.hashRateLog bits that make up the tag. */
ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t * ldmState,U64 const rollingHash,U32 const hBits,U32 const offset,ldmParams_t const ldmParams)130 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
131 U64 const rollingHash,
132 U32 const hBits,
133 U32 const offset,
134 ldmParams_t const ldmParams)
135 {
136 U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
137 U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
138 if (tag == tagMask) {
139 U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
140 U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
141 ldmEntry_t entry;
142 entry.offset = offset;
143 entry.checksum = checksum;
144 ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
145 }
146 }
147
148 /** ZSTD_ldm_countBackwardsMatch() :
149 * Returns the number of bytes that match backwards before pIn and pMatch.
150 *
151 * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
ZSTD_ldm_countBackwardsMatch(const BYTE * pIn,const BYTE * pAnchor,const BYTE * pMatch,const BYTE * pBase)152 static size_t ZSTD_ldm_countBackwardsMatch(
153 const BYTE* pIn, const BYTE* pAnchor,
154 const BYTE* pMatch, const BYTE* pBase)
155 {
156 size_t matchLength = 0;
157 while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
158 pIn--;
159 pMatch--;
160 matchLength++;
161 }
162 return matchLength;
163 }
164
165 /** ZSTD_ldm_fillFastTables() :
166 *
167 * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
168 * This is similar to ZSTD_loadDictionaryContent.
169 *
170 * The tables for the other strategies are filled within their
171 * block compressors. */
ZSTD_ldm_fillFastTables(ZSTD_matchState_t * ms,void const * end)172 static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
173 void const* end)
174 {
175 const BYTE* const iend = (const BYTE*)end;
176
177 switch(ms->cParams.strategy)
178 {
179 case ZSTD_fast:
180 ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
181 break;
182
183 case ZSTD_dfast:
184 ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
185 break;
186
187 case ZSTD_greedy:
188 case ZSTD_lazy:
189 case ZSTD_lazy2:
190 case ZSTD_btlazy2:
191 case ZSTD_btopt:
192 case ZSTD_btultra:
193 case ZSTD_btultra2:
194 break;
195 default:
196 assert(0); /* not possible : not a valid strategy id */
197 }
198
199 return 0;
200 }
201
202 /** ZSTD_ldm_fillLdmHashTable() :
203 *
204 * Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
205 * lastHash is the rolling hash that corresponds to lastHashed.
206 *
207 * Returns the rolling hash corresponding to position iend-1. */
ZSTD_ldm_fillLdmHashTable(ldmState_t * state,U64 lastHash,const BYTE * lastHashed,const BYTE * iend,const BYTE * base,U32 hBits,ldmParams_t const ldmParams)208 static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
209 U64 lastHash, const BYTE* lastHashed,
210 const BYTE* iend, const BYTE* base,
211 U32 hBits, ldmParams_t const ldmParams)
212 {
213 U64 rollingHash = lastHash;
214 const BYTE* cur = lastHashed + 1;
215
216 while (cur < iend) {
217 rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
218 cur[ldmParams.minMatchLength-1],
219 state->hashPower);
220 ZSTD_ldm_makeEntryAndInsertByTag(state,
221 rollingHash, hBits,
222 (U32)(cur - base), ldmParams);
223 ++cur;
224 }
225 return rollingHash;
226 }
227
ZSTD_ldm_fillHashTable(ldmState_t * state,const BYTE * ip,const BYTE * iend,ldmParams_t const * params)228 void ZSTD_ldm_fillHashTable(
229 ldmState_t* state, const BYTE* ip,
230 const BYTE* iend, ldmParams_t const* params)
231 {
232 DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
233 if ((size_t)(iend - ip) >= params->minMatchLength) {
234 U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
235 ZSTD_ldm_fillLdmHashTable(
236 state, startingHash, ip, iend - params->minMatchLength, state->window.base,
237 params->hashLog - params->bucketSizeLog,
238 *params);
239 }
240 }
241
242
243 /** ZSTD_ldm_limitTableUpdate() :
244 *
245 * Sets cctx->nextToUpdate to a position corresponding closer to anchor
246 * if it is far way
247 * (after a long match, only update tables a limited amount). */
ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t * ms,const BYTE * anchor)248 static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
249 {
250 U32 const current = (U32)(anchor - ms->window.base);
251 if (current > ms->nextToUpdate + 1024) {
252 ms->nextToUpdate =
253 current - MIN(512, current - ms->nextToUpdate - 1024);
254 }
255 }
256
ZSTD_ldm_generateSequences_internal(ldmState_t * ldmState,rawSeqStore_t * rawSeqStore,ldmParams_t const * params,void const * src,size_t srcSize)257 static size_t ZSTD_ldm_generateSequences_internal(
258 ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
259 ldmParams_t const* params, void const* src, size_t srcSize)
260 {
261 /* LDM parameters */
262 int const extDict = ZSTD_window_hasExtDict(ldmState->window);
263 U32 const minMatchLength = params->minMatchLength;
264 U64 const hashPower = ldmState->hashPower;
265 U32 const hBits = params->hashLog - params->bucketSizeLog;
266 U32 const ldmBucketSize = 1U << params->bucketSizeLog;
267 U32 const hashRateLog = params->hashRateLog;
268 U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
269 /* Prefix and extDict parameters */
270 U32 const dictLimit = ldmState->window.dictLimit;
271 U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
272 BYTE const* const base = ldmState->window.base;
273 BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
274 BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
275 BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
276 BYTE const* const lowPrefixPtr = base + dictLimit;
277 /* Input bounds */
278 BYTE const* const istart = (BYTE const*)src;
279 BYTE const* const iend = istart + srcSize;
280 BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
281 /* Input positions */
282 BYTE const* anchor = istart;
283 BYTE const* ip = istart;
284 /* Rolling hash */
285 BYTE const* lastHashed = NULL;
286 U64 rollingHash = 0;
287
288 while (ip <= ilimit) {
289 size_t mLength;
290 U32 const current = (U32)(ip - base);
291 size_t forwardMatchLength = 0, backwardMatchLength = 0;
292 ldmEntry_t* bestEntry = NULL;
293 if (ip != istart) {
294 rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
295 lastHashed[minMatchLength],
296 hashPower);
297 } else {
298 rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
299 }
300 lastHashed = ip;
301
302 /* Do not insert and do not look for a match */
303 if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
304 ip++;
305 continue;
306 }
307
308 /* Get the best entry and compute the match lengths */
309 {
310 ldmEntry_t* const bucket =
311 ZSTD_ldm_getBucket(ldmState,
312 ZSTD_ldm_getSmallHash(rollingHash, hBits),
313 *params);
314 ldmEntry_t* cur;
315 size_t bestMatchLength = 0;
316 U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
317
318 for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
319 size_t curForwardMatchLength, curBackwardMatchLength,
320 curTotalMatchLength;
321 if (cur->checksum != checksum || cur->offset <= lowestIndex) {
322 continue;
323 }
324 if (extDict) {
325 BYTE const* const curMatchBase =
326 cur->offset < dictLimit ? dictBase : base;
327 BYTE const* const pMatch = curMatchBase + cur->offset;
328 BYTE const* const matchEnd =
329 cur->offset < dictLimit ? dictEnd : iend;
330 BYTE const* const lowMatchPtr =
331 cur->offset < dictLimit ? dictStart : lowPrefixPtr;
332
333 curForwardMatchLength = ZSTD_count_2segments(
334 ip, pMatch, iend,
335 matchEnd, lowPrefixPtr);
336 if (curForwardMatchLength < minMatchLength) {
337 continue;
338 }
339 curBackwardMatchLength =
340 ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
341 lowMatchPtr);
342 curTotalMatchLength = curForwardMatchLength +
343 curBackwardMatchLength;
344 } else { /* !extDict */
345 BYTE const* const pMatch = base + cur->offset;
346 curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
347 if (curForwardMatchLength < minMatchLength) {
348 continue;
349 }
350 curBackwardMatchLength =
351 ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
352 lowPrefixPtr);
353 curTotalMatchLength = curForwardMatchLength +
354 curBackwardMatchLength;
355 }
356
357 if (curTotalMatchLength > bestMatchLength) {
358 bestMatchLength = curTotalMatchLength;
359 forwardMatchLength = curForwardMatchLength;
360 backwardMatchLength = curBackwardMatchLength;
361 bestEntry = cur;
362 }
363 }
364 }
365
366 /* No match found -- continue searching */
367 if (bestEntry == NULL) {
368 ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
369 hBits, current,
370 *params);
371 ip++;
372 continue;
373 }
374
375 /* Match found */
376 mLength = forwardMatchLength + backwardMatchLength;
377 ip -= backwardMatchLength;
378
379 {
380 /* Store the sequence:
381 * ip = current - backwardMatchLength
382 * The match is at (bestEntry->offset - backwardMatchLength)
383 */
384 U32 const matchIndex = bestEntry->offset;
385 U32 const offset = current - matchIndex;
386 rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
387
388 /* Out of sequence storage */
389 if (rawSeqStore->size == rawSeqStore->capacity)
390 return ERROR(dstSize_tooSmall);
391 seq->litLength = (U32)(ip - anchor);
392 seq->matchLength = (U32)mLength;
393 seq->offset = offset;
394 rawSeqStore->size++;
395 }
396
397 /* Insert the current entry into the hash table */
398 ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
399 (U32)(lastHashed - base),
400 *params);
401
402 assert(ip + backwardMatchLength == lastHashed);
403
404 /* Fill the hash table from lastHashed+1 to ip+mLength*/
405 /* Heuristic: don't need to fill the entire table at end of block */
406 if (ip + mLength <= ilimit) {
407 rollingHash = ZSTD_ldm_fillLdmHashTable(
408 ldmState, rollingHash, lastHashed,
409 ip + mLength, base, hBits, *params);
410 lastHashed = ip + mLength - 1;
411 }
412 ip += mLength;
413 anchor = ip;
414 }
415 return iend - anchor;
416 }
417
418 /*! ZSTD_ldm_reduceTable() :
419 * reduce table indexes by `reducerValue` */
ZSTD_ldm_reduceTable(ldmEntry_t * const table,U32 const size,U32 const reducerValue)420 static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
421 U32 const reducerValue)
422 {
423 U32 u;
424 for (u = 0; u < size; u++) {
425 if (table[u].offset < reducerValue) table[u].offset = 0;
426 else table[u].offset -= reducerValue;
427 }
428 }
429
ZSTD_ldm_generateSequences(ldmState_t * ldmState,rawSeqStore_t * sequences,ldmParams_t const * params,void const * src,size_t srcSize)430 size_t ZSTD_ldm_generateSequences(
431 ldmState_t* ldmState, rawSeqStore_t* sequences,
432 ldmParams_t const* params, void const* src, size_t srcSize)
433 {
434 U32 const maxDist = 1U << params->windowLog;
435 BYTE const* const istart = (BYTE const*)src;
436 BYTE const* const iend = istart + srcSize;
437 size_t const kMaxChunkSize = 1 << 20;
438 size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
439 size_t chunk;
440 size_t leftoverSize = 0;
441
442 assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
443 /* Check that ZSTD_window_update() has been called for this chunk prior
444 * to passing it to this function.
445 */
446 assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
447 /* The input could be very large (in zstdmt), so it must be broken up into
448 * chunks to enforce the maximum distance and handle overflow correction.
449 */
450 assert(sequences->pos <= sequences->size);
451 assert(sequences->size <= sequences->capacity);
452 for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
453 BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
454 size_t const remaining = (size_t)(iend - chunkStart);
455 BYTE const *const chunkEnd =
456 (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
457 size_t const chunkSize = chunkEnd - chunkStart;
458 size_t newLeftoverSize;
459 size_t const prevSize = sequences->size;
460
461 assert(chunkStart < iend);
462 /* 1. Perform overflow correction if necessary. */
463 if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
464 U32 const ldmHSize = 1U << params->hashLog;
465 U32 const correction = ZSTD_window_correctOverflow(
466 &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
467 ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
468 /* invalidate dictionaries on overflow correction */
469 ldmState->loadedDictEnd = 0;
470 }
471 /* 2. We enforce the maximum offset allowed.
472 *
473 * kMaxChunkSize should be small enough that we don't lose too much of
474 * the window through early invalidation.
475 * TODO: * Test the chunk size.
476 * * Try invalidation after the sequence generation and test the
477 * the offset against maxDist directly.
478 *
479 * NOTE: Because of dictionaries + sequence splitting we MUST make sure
480 * that any offset used is valid at the END of the sequence, since it may
481 * be split into two sequences. This condition holds when using
482 * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
483 * against maxDist directly, we'll have to carefully handle that case.
484 */
485 ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
486 /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
487 newLeftoverSize = ZSTD_ldm_generateSequences_internal(
488 ldmState, sequences, params, chunkStart, chunkSize);
489 if (ZSTD_isError(newLeftoverSize))
490 return newLeftoverSize;
491 /* 4. We add the leftover literals from previous iterations to the first
492 * newly generated sequence, or add the `newLeftoverSize` if none are
493 * generated.
494 */
495 /* Prepend the leftover literals from the last call */
496 if (prevSize < sequences->size) {
497 sequences->seq[prevSize].litLength += (U32)leftoverSize;
498 leftoverSize = newLeftoverSize;
499 } else {
500 assert(newLeftoverSize == chunkSize);
501 leftoverSize += chunkSize;
502 }
503 }
504 return 0;
505 }
506
ZSTD_ldm_skipSequences(rawSeqStore_t * rawSeqStore,size_t srcSize,U32 const minMatch)507 void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
508 while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
509 rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
510 if (srcSize <= seq->litLength) {
511 /* Skip past srcSize literals */
512 seq->litLength -= (U32)srcSize;
513 return;
514 }
515 srcSize -= seq->litLength;
516 seq->litLength = 0;
517 if (srcSize < seq->matchLength) {
518 /* Skip past the first srcSize of the match */
519 seq->matchLength -= (U32)srcSize;
520 if (seq->matchLength < minMatch) {
521 /* The match is too short, omit it */
522 if (rawSeqStore->pos + 1 < rawSeqStore->size) {
523 seq[1].litLength += seq[0].matchLength;
524 }
525 rawSeqStore->pos++;
526 }
527 return;
528 }
529 srcSize -= seq->matchLength;
530 seq->matchLength = 0;
531 rawSeqStore->pos++;
532 }
533 }
534
535 /**
536 * If the sequence length is longer than remaining then the sequence is split
537 * between this block and the next.
538 *
539 * Returns the current sequence to handle, or if the rest of the block should
540 * be literals, it returns a sequence with offset == 0.
541 */
maybeSplitSequence(rawSeqStore_t * rawSeqStore,U32 const remaining,U32 const minMatch)542 static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
543 U32 const remaining, U32 const minMatch)
544 {
545 rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
546 assert(sequence.offset > 0);
547 /* Likely: No partial sequence */
548 if (remaining >= sequence.litLength + sequence.matchLength) {
549 rawSeqStore->pos++;
550 return sequence;
551 }
552 /* Cut the sequence short (offset == 0 ==> rest is literals). */
553 if (remaining <= sequence.litLength) {
554 sequence.offset = 0;
555 } else if (remaining < sequence.litLength + sequence.matchLength) {
556 sequence.matchLength = remaining - sequence.litLength;
557 if (sequence.matchLength < minMatch) {
558 sequence.offset = 0;
559 }
560 }
561 /* Skip past `remaining` bytes for the future sequences. */
562 ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
563 return sequence;
564 }
565
ZSTD_ldm_blockCompress(rawSeqStore_t * rawSeqStore,ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)566 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
567 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
568 void const* src, size_t srcSize)
569 {
570 const ZSTD_compressionParameters* const cParams = &ms->cParams;
571 unsigned const minMatch = cParams->minMatch;
572 ZSTD_blockCompressor const blockCompressor =
573 ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
574 /* Input bounds */
575 BYTE const* const istart = (BYTE const*)src;
576 BYTE const* const iend = istart + srcSize;
577 /* Input positions */
578 BYTE const* ip = istart;
579
580 DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
581 assert(rawSeqStore->pos <= rawSeqStore->size);
582 assert(rawSeqStore->size <= rawSeqStore->capacity);
583 /* Loop through each sequence and apply the block compressor to the lits */
584 while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
585 /* maybeSplitSequence updates rawSeqStore->pos */
586 rawSeq const sequence = maybeSplitSequence(rawSeqStore,
587 (U32)(iend - ip), minMatch);
588 int i;
589 /* End signal */
590 if (sequence.offset == 0)
591 break;
592
593 assert(ip + sequence.litLength + sequence.matchLength <= iend);
594
595 /* Fill tables for block compressor */
596 ZSTD_ldm_limitTableUpdate(ms, ip);
597 ZSTD_ldm_fillFastTables(ms, ip);
598 /* Run the block compressor */
599 DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
600 {
601 size_t const newLitLength =
602 blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
603 ip += sequence.litLength;
604 /* Update the repcodes */
605 for (i = ZSTD_REP_NUM - 1; i > 0; i--)
606 rep[i] = rep[i-1];
607 rep[0] = sequence.offset;
608 /* Store the sequence */
609 ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
610 sequence.offset + ZSTD_REP_MOVE,
611 sequence.matchLength - MINMATCH);
612 ip += sequence.matchLength;
613 }
614 }
615 /* Fill the tables for the block compressor */
616 ZSTD_ldm_limitTableUpdate(ms, ip);
617 ZSTD_ldm_fillFastTables(ms, ip);
618 /* Compress the last literals */
619 return blockCompressor(ms, seqStore, rep, ip, iend - ip);
620 }
621