1e0c1b49fSNick Terrell /*
2e0c1b49fSNick Terrell * Copyright (c) Yann Collet, Facebook, Inc.
3e0c1b49fSNick Terrell * All rights reserved.
4e0c1b49fSNick Terrell *
5e0c1b49fSNick Terrell * This source code is licensed under both the BSD-style license (found in the
6e0c1b49fSNick Terrell * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7e0c1b49fSNick Terrell * in the COPYING file in the root directory of this source tree).
8e0c1b49fSNick Terrell * You may select, at your option, one of the above-listed licenses.
9e0c1b49fSNick Terrell */
10e0c1b49fSNick Terrell
11e0c1b49fSNick Terrell /*-*************************************
12e0c1b49fSNick Terrell * Dependencies
13e0c1b49fSNick Terrell ***************************************/
14e0c1b49fSNick Terrell #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
15e0c1b49fSNick Terrell #include "../common/mem.h"
16e0c1b49fSNick Terrell #include "hist.h" /* HIST_countFast_wksp */
17e0c1b49fSNick Terrell #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
18e0c1b49fSNick Terrell #include "../common/fse.h"
19e0c1b49fSNick Terrell #define HUF_STATIC_LINKING_ONLY
20e0c1b49fSNick Terrell #include "../common/huf.h"
21e0c1b49fSNick Terrell #include "zstd_compress_internal.h"
22e0c1b49fSNick Terrell #include "zstd_compress_sequences.h"
23e0c1b49fSNick Terrell #include "zstd_compress_literals.h"
24e0c1b49fSNick Terrell #include "zstd_fast.h"
25e0c1b49fSNick Terrell #include "zstd_double_fast.h"
26e0c1b49fSNick Terrell #include "zstd_lazy.h"
27e0c1b49fSNick Terrell #include "zstd_opt.h"
28e0c1b49fSNick Terrell #include "zstd_ldm.h"
29e0c1b49fSNick Terrell #include "zstd_compress_superblock.h"
30e0c1b49fSNick Terrell
31e0c1b49fSNick Terrell /* ***************************************************************
32e0c1b49fSNick Terrell * Tuning parameters
33e0c1b49fSNick Terrell *****************************************************************/
34e0c1b49fSNick Terrell /*!
35e0c1b49fSNick Terrell * COMPRESS_HEAPMODE :
36e0c1b49fSNick Terrell * Select how default decompression function ZSTD_compress() allocates its context,
37e0c1b49fSNick Terrell * on stack (0, default), or into heap (1).
38e0c1b49fSNick Terrell * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected.
39e0c1b49fSNick Terrell */
40e0c1b49fSNick Terrell
41*2aa14b1aSNick Terrell /*!
42*2aa14b1aSNick Terrell * ZSTD_HASHLOG3_MAX :
43*2aa14b1aSNick Terrell * Maximum size of the hash table dedicated to find 3-bytes matches,
44*2aa14b1aSNick Terrell * in log format, aka 17 => 1 << 17 == 128Ki positions.
45*2aa14b1aSNick Terrell * This structure is only used in zstd_opt.
46*2aa14b1aSNick Terrell * Since allocation is centralized for all strategies, it has to be known here.
47*2aa14b1aSNick Terrell * The actual (selected) size of the hash table is then stored in ZSTD_matchState_t.hashLog3,
48*2aa14b1aSNick Terrell * so that zstd_opt.c doesn't need to know about this constant.
49*2aa14b1aSNick Terrell */
50*2aa14b1aSNick Terrell #ifndef ZSTD_HASHLOG3_MAX
51*2aa14b1aSNick Terrell # define ZSTD_HASHLOG3_MAX 17
52*2aa14b1aSNick Terrell #endif
53e0c1b49fSNick Terrell
54e0c1b49fSNick Terrell /*-*************************************
55e0c1b49fSNick Terrell * Helper functions
56e0c1b49fSNick Terrell ***************************************/
57e0c1b49fSNick Terrell /* ZSTD_compressBound()
58e0c1b49fSNick Terrell * Note that the result from this function is only compatible with the "normal"
59e0c1b49fSNick Terrell * full-block strategy.
60e0c1b49fSNick Terrell * When there are a lot of small blocks due to frequent flush in streaming mode
61e0c1b49fSNick Terrell * the overhead of headers can make the compressed data to be larger than the
62e0c1b49fSNick Terrell * return value of ZSTD_compressBound().
63e0c1b49fSNick Terrell */
ZSTD_compressBound(size_t srcSize)64e0c1b49fSNick Terrell size_t ZSTD_compressBound(size_t srcSize) {
65e0c1b49fSNick Terrell return ZSTD_COMPRESSBOUND(srcSize);
66e0c1b49fSNick Terrell }
67e0c1b49fSNick Terrell
68e0c1b49fSNick Terrell
69e0c1b49fSNick Terrell /*-*************************************
70e0c1b49fSNick Terrell * Context memory management
71e0c1b49fSNick Terrell ***************************************/
72e0c1b49fSNick Terrell struct ZSTD_CDict_s {
73e0c1b49fSNick Terrell const void* dictContent;
74e0c1b49fSNick Terrell size_t dictContentSize;
75e0c1b49fSNick Terrell ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */
76e0c1b49fSNick Terrell U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
77e0c1b49fSNick Terrell ZSTD_cwksp workspace;
78e0c1b49fSNick Terrell ZSTD_matchState_t matchState;
79e0c1b49fSNick Terrell ZSTD_compressedBlockState_t cBlockState;
80e0c1b49fSNick Terrell ZSTD_customMem customMem;
81e0c1b49fSNick Terrell U32 dictID;
82e0c1b49fSNick Terrell int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
83*2aa14b1aSNick Terrell ZSTD_paramSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use
84*2aa14b1aSNick Terrell * row-based matchfinder. Unless the cdict is reloaded, we will use
85*2aa14b1aSNick Terrell * the same greedy/lazy matchfinder at compression time.
86*2aa14b1aSNick Terrell */
87e0c1b49fSNick Terrell }; /* typedef'd to ZSTD_CDict within "zstd.h" */
88e0c1b49fSNick Terrell
ZSTD_createCCtx(void)89e0c1b49fSNick Terrell ZSTD_CCtx* ZSTD_createCCtx(void)
90e0c1b49fSNick Terrell {
91e0c1b49fSNick Terrell return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
92e0c1b49fSNick Terrell }
93e0c1b49fSNick Terrell
ZSTD_initCCtx(ZSTD_CCtx * cctx,ZSTD_customMem memManager)94e0c1b49fSNick Terrell static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
95e0c1b49fSNick Terrell {
96e0c1b49fSNick Terrell assert(cctx != NULL);
97e0c1b49fSNick Terrell ZSTD_memset(cctx, 0, sizeof(*cctx));
98e0c1b49fSNick Terrell cctx->customMem = memManager;
99*2aa14b1aSNick Terrell cctx->bmi2 = ZSTD_cpuSupportsBmi2();
100e0c1b49fSNick Terrell { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
101e0c1b49fSNick Terrell assert(!ZSTD_isError(err));
102e0c1b49fSNick Terrell (void)err;
103e0c1b49fSNick Terrell }
104e0c1b49fSNick Terrell }
105e0c1b49fSNick Terrell
ZSTD_createCCtx_advanced(ZSTD_customMem customMem)106e0c1b49fSNick Terrell ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
107e0c1b49fSNick Terrell {
108e0c1b49fSNick Terrell ZSTD_STATIC_ASSERT(zcss_init==0);
109e0c1b49fSNick Terrell ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
110e0c1b49fSNick Terrell if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
111e0c1b49fSNick Terrell { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem);
112e0c1b49fSNick Terrell if (!cctx) return NULL;
113e0c1b49fSNick Terrell ZSTD_initCCtx(cctx, customMem);
114e0c1b49fSNick Terrell return cctx;
115e0c1b49fSNick Terrell }
116e0c1b49fSNick Terrell }
117e0c1b49fSNick Terrell
ZSTD_initStaticCCtx(void * workspace,size_t workspaceSize)118e0c1b49fSNick Terrell ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)
119e0c1b49fSNick Terrell {
120e0c1b49fSNick Terrell ZSTD_cwksp ws;
121e0c1b49fSNick Terrell ZSTD_CCtx* cctx;
122e0c1b49fSNick Terrell if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */
123e0c1b49fSNick Terrell if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */
124e0c1b49fSNick Terrell ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
125e0c1b49fSNick Terrell
126e0c1b49fSNick Terrell cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
127e0c1b49fSNick Terrell if (cctx == NULL) return NULL;
128e0c1b49fSNick Terrell
129e0c1b49fSNick Terrell ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx));
130e0c1b49fSNick Terrell ZSTD_cwksp_move(&cctx->workspace, &ws);
131e0c1b49fSNick Terrell cctx->staticSize = workspaceSize;
132e0c1b49fSNick Terrell
133e0c1b49fSNick Terrell /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
134e0c1b49fSNick Terrell if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
135e0c1b49fSNick Terrell cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
136e0c1b49fSNick Terrell cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
137e0c1b49fSNick Terrell cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE);
138e0c1b49fSNick Terrell cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
139e0c1b49fSNick Terrell return cctx;
140e0c1b49fSNick Terrell }
141e0c1b49fSNick Terrell
142e0c1b49fSNick Terrell /*
143e0c1b49fSNick Terrell * Clears and frees all of the dictionaries in the CCtx.
144e0c1b49fSNick Terrell */
ZSTD_clearAllDicts(ZSTD_CCtx * cctx)145e0c1b49fSNick Terrell static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx)
146e0c1b49fSNick Terrell {
147e0c1b49fSNick Terrell ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem);
148e0c1b49fSNick Terrell ZSTD_freeCDict(cctx->localDict.cdict);
149e0c1b49fSNick Terrell ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict));
150e0c1b49fSNick Terrell ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));
151e0c1b49fSNick Terrell cctx->cdict = NULL;
152e0c1b49fSNick Terrell }
153e0c1b49fSNick Terrell
ZSTD_sizeof_localDict(ZSTD_localDict dict)154e0c1b49fSNick Terrell static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict)
155e0c1b49fSNick Terrell {
156e0c1b49fSNick Terrell size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0;
157e0c1b49fSNick Terrell size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict);
158e0c1b49fSNick Terrell return bufferSize + cdictSize;
159e0c1b49fSNick Terrell }
160e0c1b49fSNick Terrell
ZSTD_freeCCtxContent(ZSTD_CCtx * cctx)161e0c1b49fSNick Terrell static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
162e0c1b49fSNick Terrell {
163e0c1b49fSNick Terrell assert(cctx != NULL);
164e0c1b49fSNick Terrell assert(cctx->staticSize == 0);
165e0c1b49fSNick Terrell ZSTD_clearAllDicts(cctx);
166e0c1b49fSNick Terrell ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);
167e0c1b49fSNick Terrell }
168e0c1b49fSNick Terrell
ZSTD_freeCCtx(ZSTD_CCtx * cctx)169e0c1b49fSNick Terrell size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
170e0c1b49fSNick Terrell {
171e0c1b49fSNick Terrell if (cctx==NULL) return 0; /* support free on NULL */
172e0c1b49fSNick Terrell RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
173e0c1b49fSNick Terrell "not compatible with static CCtx");
174e0c1b49fSNick Terrell {
175e0c1b49fSNick Terrell int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
176e0c1b49fSNick Terrell ZSTD_freeCCtxContent(cctx);
177e0c1b49fSNick Terrell if (!cctxInWorkspace) {
178e0c1b49fSNick Terrell ZSTD_customFree(cctx, cctx->customMem);
179e0c1b49fSNick Terrell }
180e0c1b49fSNick Terrell }
181e0c1b49fSNick Terrell return 0;
182e0c1b49fSNick Terrell }
183e0c1b49fSNick Terrell
184e0c1b49fSNick Terrell
ZSTD_sizeof_mtctx(const ZSTD_CCtx * cctx)185e0c1b49fSNick Terrell static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
186e0c1b49fSNick Terrell {
187e0c1b49fSNick Terrell (void)cctx;
188e0c1b49fSNick Terrell return 0;
189e0c1b49fSNick Terrell }
190e0c1b49fSNick Terrell
191e0c1b49fSNick Terrell
ZSTD_sizeof_CCtx(const ZSTD_CCtx * cctx)192e0c1b49fSNick Terrell size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
193e0c1b49fSNick Terrell {
194e0c1b49fSNick Terrell if (cctx==NULL) return 0; /* support sizeof on NULL */
195e0c1b49fSNick Terrell /* cctx may be in the workspace */
196e0c1b49fSNick Terrell return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))
197e0c1b49fSNick Terrell + ZSTD_cwksp_sizeof(&cctx->workspace)
198e0c1b49fSNick Terrell + ZSTD_sizeof_localDict(cctx->localDict)
199e0c1b49fSNick Terrell + ZSTD_sizeof_mtctx(cctx);
200e0c1b49fSNick Terrell }
201e0c1b49fSNick Terrell
ZSTD_sizeof_CStream(const ZSTD_CStream * zcs)202e0c1b49fSNick Terrell size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
203e0c1b49fSNick Terrell {
204e0c1b49fSNick Terrell return ZSTD_sizeof_CCtx(zcs); /* same object */
205e0c1b49fSNick Terrell }
206e0c1b49fSNick Terrell
207e0c1b49fSNick Terrell /* private API call, for dictBuilder only */
ZSTD_getSeqStore(const ZSTD_CCtx * ctx)208e0c1b49fSNick Terrell const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
209e0c1b49fSNick Terrell
210*2aa14b1aSNick Terrell /* Returns true if the strategy supports using a row based matchfinder */
ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy)211*2aa14b1aSNick Terrell static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) {
212*2aa14b1aSNick Terrell return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2);
213*2aa14b1aSNick Terrell }
214*2aa14b1aSNick Terrell
215*2aa14b1aSNick Terrell /* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder
216*2aa14b1aSNick Terrell * for this compression.
217*2aa14b1aSNick Terrell */
ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy,const ZSTD_paramSwitch_e mode)218*2aa14b1aSNick Terrell static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_paramSwitch_e mode) {
219*2aa14b1aSNick Terrell assert(mode != ZSTD_ps_auto);
220*2aa14b1aSNick Terrell return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_ps_enable);
221*2aa14b1aSNick Terrell }
222*2aa14b1aSNick Terrell
223*2aa14b1aSNick Terrell /* Returns row matchfinder usage given an initial mode and cParams */
ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode,const ZSTD_compressionParameters * const cParams)224*2aa14b1aSNick Terrell static ZSTD_paramSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode,
225*2aa14b1aSNick Terrell const ZSTD_compressionParameters* const cParams) {
226*2aa14b1aSNick Terrell #if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON)
227*2aa14b1aSNick Terrell int const kHasSIMD128 = 1;
228*2aa14b1aSNick Terrell #else
229*2aa14b1aSNick Terrell int const kHasSIMD128 = 0;
230*2aa14b1aSNick Terrell #endif
231*2aa14b1aSNick Terrell if (mode != ZSTD_ps_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */
232*2aa14b1aSNick Terrell mode = ZSTD_ps_disable;
233*2aa14b1aSNick Terrell if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode;
234*2aa14b1aSNick Terrell if (kHasSIMD128) {
235*2aa14b1aSNick Terrell if (cParams->windowLog > 14) mode = ZSTD_ps_enable;
236*2aa14b1aSNick Terrell } else {
237*2aa14b1aSNick Terrell if (cParams->windowLog > 17) mode = ZSTD_ps_enable;
238*2aa14b1aSNick Terrell }
239*2aa14b1aSNick Terrell return mode;
240*2aa14b1aSNick Terrell }
241*2aa14b1aSNick Terrell
242*2aa14b1aSNick Terrell /* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */
ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode,const ZSTD_compressionParameters * const cParams)243*2aa14b1aSNick Terrell static ZSTD_paramSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode,
244*2aa14b1aSNick Terrell const ZSTD_compressionParameters* const cParams) {
245*2aa14b1aSNick Terrell if (mode != ZSTD_ps_auto) return mode;
246*2aa14b1aSNick Terrell return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable;
247*2aa14b1aSNick Terrell }
248*2aa14b1aSNick Terrell
249*2aa14b1aSNick Terrell /* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */
ZSTD_allocateChainTable(const ZSTD_strategy strategy,const ZSTD_paramSwitch_e useRowMatchFinder,const U32 forDDSDict)250*2aa14b1aSNick Terrell static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
251*2aa14b1aSNick Terrell const ZSTD_paramSwitch_e useRowMatchFinder,
252*2aa14b1aSNick Terrell const U32 forDDSDict) {
253*2aa14b1aSNick Terrell assert(useRowMatchFinder != ZSTD_ps_auto);
254*2aa14b1aSNick Terrell /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate.
255*2aa14b1aSNick Terrell * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder.
256*2aa14b1aSNick Terrell */
257*2aa14b1aSNick Terrell return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));
258*2aa14b1aSNick Terrell }
259*2aa14b1aSNick Terrell
260e0c1b49fSNick Terrell /* Returns 1 if compression parameters are such that we should
261e0c1b49fSNick Terrell * enable long distance matching (wlog >= 27, strategy >= btopt).
262e0c1b49fSNick Terrell * Returns 0 otherwise.
263e0c1b49fSNick Terrell */
ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,const ZSTD_compressionParameters * const cParams)264*2aa14b1aSNick Terrell static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
265*2aa14b1aSNick Terrell const ZSTD_compressionParameters* const cParams) {
266*2aa14b1aSNick Terrell if (mode != ZSTD_ps_auto) return mode;
267*2aa14b1aSNick Terrell return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;
268e0c1b49fSNick Terrell }
269e0c1b49fSNick Terrell
ZSTD_makeCCtxParamsFromCParams(ZSTD_compressionParameters cParams)270e0c1b49fSNick Terrell static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
271e0c1b49fSNick Terrell ZSTD_compressionParameters cParams)
272e0c1b49fSNick Terrell {
273e0c1b49fSNick Terrell ZSTD_CCtx_params cctxParams;
274e0c1b49fSNick Terrell /* should not matter, as all cParams are presumed properly defined */
275e0c1b49fSNick Terrell ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);
276e0c1b49fSNick Terrell cctxParams.cParams = cParams;
277e0c1b49fSNick Terrell
278*2aa14b1aSNick Terrell /* Adjust advanced params according to cParams */
279*2aa14b1aSNick Terrell cctxParams.ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams.ldmParams.enableLdm, &cParams);
280*2aa14b1aSNick Terrell if (cctxParams.ldmParams.enableLdm == ZSTD_ps_enable) {
281e0c1b49fSNick Terrell ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams);
282e0c1b49fSNick Terrell assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);
283e0c1b49fSNick Terrell assert(cctxParams.ldmParams.hashRateLog < 32);
284e0c1b49fSNick Terrell }
285*2aa14b1aSNick Terrell cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams);
286*2aa14b1aSNick Terrell cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
287e0c1b49fSNick Terrell assert(!ZSTD_checkCParams(cParams));
288e0c1b49fSNick Terrell return cctxParams;
289e0c1b49fSNick Terrell }
290e0c1b49fSNick Terrell
ZSTD_createCCtxParams_advanced(ZSTD_customMem customMem)291e0c1b49fSNick Terrell static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
292e0c1b49fSNick Terrell ZSTD_customMem customMem)
293e0c1b49fSNick Terrell {
294e0c1b49fSNick Terrell ZSTD_CCtx_params* params;
295e0c1b49fSNick Terrell if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
296e0c1b49fSNick Terrell params = (ZSTD_CCtx_params*)ZSTD_customCalloc(
297e0c1b49fSNick Terrell sizeof(ZSTD_CCtx_params), customMem);
298e0c1b49fSNick Terrell if (!params) { return NULL; }
299e0c1b49fSNick Terrell ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
300e0c1b49fSNick Terrell params->customMem = customMem;
301e0c1b49fSNick Terrell return params;
302e0c1b49fSNick Terrell }
303e0c1b49fSNick Terrell
ZSTD_createCCtxParams(void)304e0c1b49fSNick Terrell ZSTD_CCtx_params* ZSTD_createCCtxParams(void)
305e0c1b49fSNick Terrell {
306e0c1b49fSNick Terrell return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem);
307e0c1b49fSNick Terrell }
308e0c1b49fSNick Terrell
ZSTD_freeCCtxParams(ZSTD_CCtx_params * params)309e0c1b49fSNick Terrell size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
310e0c1b49fSNick Terrell {
311e0c1b49fSNick Terrell if (params == NULL) { return 0; }
312e0c1b49fSNick Terrell ZSTD_customFree(params, params->customMem);
313e0c1b49fSNick Terrell return 0;
314e0c1b49fSNick Terrell }
315e0c1b49fSNick Terrell
ZSTD_CCtxParams_reset(ZSTD_CCtx_params * params)316e0c1b49fSNick Terrell size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
317e0c1b49fSNick Terrell {
318e0c1b49fSNick Terrell return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
319e0c1b49fSNick Terrell }
320e0c1b49fSNick Terrell
ZSTD_CCtxParams_init(ZSTD_CCtx_params * cctxParams,int compressionLevel)321e0c1b49fSNick Terrell size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
322e0c1b49fSNick Terrell RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
323e0c1b49fSNick Terrell ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
324e0c1b49fSNick Terrell cctxParams->compressionLevel = compressionLevel;
325e0c1b49fSNick Terrell cctxParams->fParams.contentSizeFlag = 1;
326e0c1b49fSNick Terrell return 0;
327e0c1b49fSNick Terrell }
328e0c1b49fSNick Terrell
329e0c1b49fSNick Terrell #define ZSTD_NO_CLEVEL 0
330e0c1b49fSNick Terrell
331e0c1b49fSNick Terrell /*
332e0c1b49fSNick Terrell * Initializes the cctxParams from params and compressionLevel.
333e0c1b49fSNick Terrell * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
334e0c1b49fSNick Terrell */
ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params * cctxParams,ZSTD_parameters const * params,int compressionLevel)335e0c1b49fSNick Terrell static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel)
336e0c1b49fSNick Terrell {
337e0c1b49fSNick Terrell assert(!ZSTD_checkCParams(params->cParams));
338e0c1b49fSNick Terrell ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
339e0c1b49fSNick Terrell cctxParams->cParams = params->cParams;
340e0c1b49fSNick Terrell cctxParams->fParams = params->fParams;
341e0c1b49fSNick Terrell /* Should not matter, as all cParams are presumed properly defined.
342e0c1b49fSNick Terrell * But, set it for tracing anyway.
343e0c1b49fSNick Terrell */
344e0c1b49fSNick Terrell cctxParams->compressionLevel = compressionLevel;
345*2aa14b1aSNick Terrell cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams);
346*2aa14b1aSNick Terrell cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams);
347*2aa14b1aSNick Terrell cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams);
348*2aa14b1aSNick Terrell DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
349*2aa14b1aSNick Terrell cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);
350e0c1b49fSNick Terrell }
351e0c1b49fSNick Terrell
ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params * cctxParams,ZSTD_parameters params)352e0c1b49fSNick Terrell size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
353e0c1b49fSNick Terrell {
354e0c1b49fSNick Terrell RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
355e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
356e0c1b49fSNick Terrell ZSTD_CCtxParams_init_internal(cctxParams, ¶ms, ZSTD_NO_CLEVEL);
357e0c1b49fSNick Terrell return 0;
358e0c1b49fSNick Terrell }
359e0c1b49fSNick Terrell
360e0c1b49fSNick Terrell /*
361e0c1b49fSNick Terrell * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
362e0c1b49fSNick Terrell * @param param Validated zstd parameters.
363e0c1b49fSNick Terrell */
ZSTD_CCtxParams_setZstdParams(ZSTD_CCtx_params * cctxParams,const ZSTD_parameters * params)364e0c1b49fSNick Terrell static void ZSTD_CCtxParams_setZstdParams(
365e0c1b49fSNick Terrell ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
366e0c1b49fSNick Terrell {
367e0c1b49fSNick Terrell assert(!ZSTD_checkCParams(params->cParams));
368e0c1b49fSNick Terrell cctxParams->cParams = params->cParams;
369e0c1b49fSNick Terrell cctxParams->fParams = params->fParams;
370e0c1b49fSNick Terrell /* Should not matter, as all cParams are presumed properly defined.
371e0c1b49fSNick Terrell * But, set it for tracing anyway.
372e0c1b49fSNick Terrell */
373e0c1b49fSNick Terrell cctxParams->compressionLevel = ZSTD_NO_CLEVEL;
374e0c1b49fSNick Terrell }
375e0c1b49fSNick Terrell
ZSTD_cParam_getBounds(ZSTD_cParameter param)376e0c1b49fSNick Terrell ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
377e0c1b49fSNick Terrell {
378e0c1b49fSNick Terrell ZSTD_bounds bounds = { 0, 0, 0 };
379e0c1b49fSNick Terrell
380e0c1b49fSNick Terrell switch(param)
381e0c1b49fSNick Terrell {
382e0c1b49fSNick Terrell case ZSTD_c_compressionLevel:
383e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_minCLevel();
384e0c1b49fSNick Terrell bounds.upperBound = ZSTD_maxCLevel();
385e0c1b49fSNick Terrell return bounds;
386e0c1b49fSNick Terrell
387e0c1b49fSNick Terrell case ZSTD_c_windowLog:
388e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_WINDOWLOG_MIN;
389e0c1b49fSNick Terrell bounds.upperBound = ZSTD_WINDOWLOG_MAX;
390e0c1b49fSNick Terrell return bounds;
391e0c1b49fSNick Terrell
392e0c1b49fSNick Terrell case ZSTD_c_hashLog:
393e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_HASHLOG_MIN;
394e0c1b49fSNick Terrell bounds.upperBound = ZSTD_HASHLOG_MAX;
395e0c1b49fSNick Terrell return bounds;
396e0c1b49fSNick Terrell
397e0c1b49fSNick Terrell case ZSTD_c_chainLog:
398e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_CHAINLOG_MIN;
399e0c1b49fSNick Terrell bounds.upperBound = ZSTD_CHAINLOG_MAX;
400e0c1b49fSNick Terrell return bounds;
401e0c1b49fSNick Terrell
402e0c1b49fSNick Terrell case ZSTD_c_searchLog:
403e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_SEARCHLOG_MIN;
404e0c1b49fSNick Terrell bounds.upperBound = ZSTD_SEARCHLOG_MAX;
405e0c1b49fSNick Terrell return bounds;
406e0c1b49fSNick Terrell
407e0c1b49fSNick Terrell case ZSTD_c_minMatch:
408e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_MINMATCH_MIN;
409e0c1b49fSNick Terrell bounds.upperBound = ZSTD_MINMATCH_MAX;
410e0c1b49fSNick Terrell return bounds;
411e0c1b49fSNick Terrell
412e0c1b49fSNick Terrell case ZSTD_c_targetLength:
413e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;
414e0c1b49fSNick Terrell bounds.upperBound = ZSTD_TARGETLENGTH_MAX;
415e0c1b49fSNick Terrell return bounds;
416e0c1b49fSNick Terrell
417e0c1b49fSNick Terrell case ZSTD_c_strategy:
418e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_STRATEGY_MIN;
419e0c1b49fSNick Terrell bounds.upperBound = ZSTD_STRATEGY_MAX;
420e0c1b49fSNick Terrell return bounds;
421e0c1b49fSNick Terrell
422e0c1b49fSNick Terrell case ZSTD_c_contentSizeFlag:
423e0c1b49fSNick Terrell bounds.lowerBound = 0;
424e0c1b49fSNick Terrell bounds.upperBound = 1;
425e0c1b49fSNick Terrell return bounds;
426e0c1b49fSNick Terrell
427e0c1b49fSNick Terrell case ZSTD_c_checksumFlag:
428e0c1b49fSNick Terrell bounds.lowerBound = 0;
429e0c1b49fSNick Terrell bounds.upperBound = 1;
430e0c1b49fSNick Terrell return bounds;
431e0c1b49fSNick Terrell
432e0c1b49fSNick Terrell case ZSTD_c_dictIDFlag:
433e0c1b49fSNick Terrell bounds.lowerBound = 0;
434e0c1b49fSNick Terrell bounds.upperBound = 1;
435e0c1b49fSNick Terrell return bounds;
436e0c1b49fSNick Terrell
437e0c1b49fSNick Terrell case ZSTD_c_nbWorkers:
438e0c1b49fSNick Terrell bounds.lowerBound = 0;
439e0c1b49fSNick Terrell bounds.upperBound = 0;
440e0c1b49fSNick Terrell return bounds;
441e0c1b49fSNick Terrell
442e0c1b49fSNick Terrell case ZSTD_c_jobSize:
443e0c1b49fSNick Terrell bounds.lowerBound = 0;
444e0c1b49fSNick Terrell bounds.upperBound = 0;
445e0c1b49fSNick Terrell return bounds;
446e0c1b49fSNick Terrell
447e0c1b49fSNick Terrell case ZSTD_c_overlapLog:
448e0c1b49fSNick Terrell bounds.lowerBound = 0;
449e0c1b49fSNick Terrell bounds.upperBound = 0;
450e0c1b49fSNick Terrell return bounds;
451e0c1b49fSNick Terrell
452e0c1b49fSNick Terrell case ZSTD_c_enableDedicatedDictSearch:
453e0c1b49fSNick Terrell bounds.lowerBound = 0;
454e0c1b49fSNick Terrell bounds.upperBound = 1;
455e0c1b49fSNick Terrell return bounds;
456e0c1b49fSNick Terrell
457e0c1b49fSNick Terrell case ZSTD_c_enableLongDistanceMatching:
458e0c1b49fSNick Terrell bounds.lowerBound = 0;
459e0c1b49fSNick Terrell bounds.upperBound = 1;
460e0c1b49fSNick Terrell return bounds;
461e0c1b49fSNick Terrell
462e0c1b49fSNick Terrell case ZSTD_c_ldmHashLog:
463e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN;
464e0c1b49fSNick Terrell bounds.upperBound = ZSTD_LDM_HASHLOG_MAX;
465e0c1b49fSNick Terrell return bounds;
466e0c1b49fSNick Terrell
467e0c1b49fSNick Terrell case ZSTD_c_ldmMinMatch:
468e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN;
469e0c1b49fSNick Terrell bounds.upperBound = ZSTD_LDM_MINMATCH_MAX;
470e0c1b49fSNick Terrell return bounds;
471e0c1b49fSNick Terrell
472e0c1b49fSNick Terrell case ZSTD_c_ldmBucketSizeLog:
473e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;
474e0c1b49fSNick Terrell bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;
475e0c1b49fSNick Terrell return bounds;
476e0c1b49fSNick Terrell
477e0c1b49fSNick Terrell case ZSTD_c_ldmHashRateLog:
478e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN;
479e0c1b49fSNick Terrell bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX;
480e0c1b49fSNick Terrell return bounds;
481e0c1b49fSNick Terrell
482e0c1b49fSNick Terrell /* experimental parameters */
483e0c1b49fSNick Terrell case ZSTD_c_rsyncable:
484e0c1b49fSNick Terrell bounds.lowerBound = 0;
485e0c1b49fSNick Terrell bounds.upperBound = 1;
486e0c1b49fSNick Terrell return bounds;
487e0c1b49fSNick Terrell
488e0c1b49fSNick Terrell case ZSTD_c_forceMaxWindow :
489e0c1b49fSNick Terrell bounds.lowerBound = 0;
490e0c1b49fSNick Terrell bounds.upperBound = 1;
491e0c1b49fSNick Terrell return bounds;
492e0c1b49fSNick Terrell
493e0c1b49fSNick Terrell case ZSTD_c_format:
494e0c1b49fSNick Terrell ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
495e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_f_zstd1;
496e0c1b49fSNick Terrell bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */
497e0c1b49fSNick Terrell return bounds;
498e0c1b49fSNick Terrell
499e0c1b49fSNick Terrell case ZSTD_c_forceAttachDict:
500e0c1b49fSNick Terrell ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad);
501e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_dictDefaultAttach;
502e0c1b49fSNick Terrell bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */
503e0c1b49fSNick Terrell return bounds;
504e0c1b49fSNick Terrell
505e0c1b49fSNick Terrell case ZSTD_c_literalCompressionMode:
506*2aa14b1aSNick Terrell ZSTD_STATIC_ASSERT(ZSTD_ps_auto < ZSTD_ps_enable && ZSTD_ps_enable < ZSTD_ps_disable);
507*2aa14b1aSNick Terrell bounds.lowerBound = (int)ZSTD_ps_auto;
508*2aa14b1aSNick Terrell bounds.upperBound = (int)ZSTD_ps_disable;
509e0c1b49fSNick Terrell return bounds;
510e0c1b49fSNick Terrell
511e0c1b49fSNick Terrell case ZSTD_c_targetCBlockSize:
512e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
513e0c1b49fSNick Terrell bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
514e0c1b49fSNick Terrell return bounds;
515e0c1b49fSNick Terrell
516e0c1b49fSNick Terrell case ZSTD_c_srcSizeHint:
517e0c1b49fSNick Terrell bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
518e0c1b49fSNick Terrell bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
519e0c1b49fSNick Terrell return bounds;
520e0c1b49fSNick Terrell
521e0c1b49fSNick Terrell case ZSTD_c_stableInBuffer:
522e0c1b49fSNick Terrell case ZSTD_c_stableOutBuffer:
523e0c1b49fSNick Terrell bounds.lowerBound = (int)ZSTD_bm_buffered;
524e0c1b49fSNick Terrell bounds.upperBound = (int)ZSTD_bm_stable;
525e0c1b49fSNick Terrell return bounds;
526e0c1b49fSNick Terrell
527e0c1b49fSNick Terrell case ZSTD_c_blockDelimiters:
528e0c1b49fSNick Terrell bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;
529e0c1b49fSNick Terrell bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;
530e0c1b49fSNick Terrell return bounds;
531e0c1b49fSNick Terrell
532e0c1b49fSNick Terrell case ZSTD_c_validateSequences:
533e0c1b49fSNick Terrell bounds.lowerBound = 0;
534e0c1b49fSNick Terrell bounds.upperBound = 1;
535e0c1b49fSNick Terrell return bounds;
536e0c1b49fSNick Terrell
537*2aa14b1aSNick Terrell case ZSTD_c_useBlockSplitter:
538*2aa14b1aSNick Terrell bounds.lowerBound = (int)ZSTD_ps_auto;
539*2aa14b1aSNick Terrell bounds.upperBound = (int)ZSTD_ps_disable;
540*2aa14b1aSNick Terrell return bounds;
541*2aa14b1aSNick Terrell
542*2aa14b1aSNick Terrell case ZSTD_c_useRowMatchFinder:
543*2aa14b1aSNick Terrell bounds.lowerBound = (int)ZSTD_ps_auto;
544*2aa14b1aSNick Terrell bounds.upperBound = (int)ZSTD_ps_disable;
545*2aa14b1aSNick Terrell return bounds;
546*2aa14b1aSNick Terrell
547*2aa14b1aSNick Terrell case ZSTD_c_deterministicRefPrefix:
548*2aa14b1aSNick Terrell bounds.lowerBound = 0;
549*2aa14b1aSNick Terrell bounds.upperBound = 1;
550*2aa14b1aSNick Terrell return bounds;
551*2aa14b1aSNick Terrell
552e0c1b49fSNick Terrell default:
553e0c1b49fSNick Terrell bounds.error = ERROR(parameter_unsupported);
554e0c1b49fSNick Terrell return bounds;
555e0c1b49fSNick Terrell }
556e0c1b49fSNick Terrell }
557e0c1b49fSNick Terrell
558e0c1b49fSNick Terrell /* ZSTD_cParam_clampBounds:
559e0c1b49fSNick Terrell * Clamps the value into the bounded range.
560e0c1b49fSNick Terrell */
ZSTD_cParam_clampBounds(ZSTD_cParameter cParam,int * value)561e0c1b49fSNick Terrell static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
562e0c1b49fSNick Terrell {
563e0c1b49fSNick Terrell ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
564e0c1b49fSNick Terrell if (ZSTD_isError(bounds.error)) return bounds.error;
565e0c1b49fSNick Terrell if (*value < bounds.lowerBound) *value = bounds.lowerBound;
566e0c1b49fSNick Terrell if (*value > bounds.upperBound) *value = bounds.upperBound;
567e0c1b49fSNick Terrell return 0;
568e0c1b49fSNick Terrell }
569e0c1b49fSNick Terrell
570e0c1b49fSNick Terrell #define BOUNDCHECK(cParam, val) { \
571e0c1b49fSNick Terrell RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
572e0c1b49fSNick Terrell parameter_outOfBound, "Param out of bounds"); \
573e0c1b49fSNick Terrell }
574e0c1b49fSNick Terrell
575e0c1b49fSNick Terrell
ZSTD_isUpdateAuthorized(ZSTD_cParameter param)576e0c1b49fSNick Terrell static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
577e0c1b49fSNick Terrell {
578e0c1b49fSNick Terrell switch(param)
579e0c1b49fSNick Terrell {
580e0c1b49fSNick Terrell case ZSTD_c_compressionLevel:
581e0c1b49fSNick Terrell case ZSTD_c_hashLog:
582e0c1b49fSNick Terrell case ZSTD_c_chainLog:
583e0c1b49fSNick Terrell case ZSTD_c_searchLog:
584e0c1b49fSNick Terrell case ZSTD_c_minMatch:
585e0c1b49fSNick Terrell case ZSTD_c_targetLength:
586e0c1b49fSNick Terrell case ZSTD_c_strategy:
587e0c1b49fSNick Terrell return 1;
588e0c1b49fSNick Terrell
589e0c1b49fSNick Terrell case ZSTD_c_format:
590e0c1b49fSNick Terrell case ZSTD_c_windowLog:
591e0c1b49fSNick Terrell case ZSTD_c_contentSizeFlag:
592e0c1b49fSNick Terrell case ZSTD_c_checksumFlag:
593e0c1b49fSNick Terrell case ZSTD_c_dictIDFlag:
594e0c1b49fSNick Terrell case ZSTD_c_forceMaxWindow :
595e0c1b49fSNick Terrell case ZSTD_c_nbWorkers:
596e0c1b49fSNick Terrell case ZSTD_c_jobSize:
597e0c1b49fSNick Terrell case ZSTD_c_overlapLog:
598e0c1b49fSNick Terrell case ZSTD_c_rsyncable:
599e0c1b49fSNick Terrell case ZSTD_c_enableDedicatedDictSearch:
600e0c1b49fSNick Terrell case ZSTD_c_enableLongDistanceMatching:
601e0c1b49fSNick Terrell case ZSTD_c_ldmHashLog:
602e0c1b49fSNick Terrell case ZSTD_c_ldmMinMatch:
603e0c1b49fSNick Terrell case ZSTD_c_ldmBucketSizeLog:
604e0c1b49fSNick Terrell case ZSTD_c_ldmHashRateLog:
605e0c1b49fSNick Terrell case ZSTD_c_forceAttachDict:
606e0c1b49fSNick Terrell case ZSTD_c_literalCompressionMode:
607e0c1b49fSNick Terrell case ZSTD_c_targetCBlockSize:
608e0c1b49fSNick Terrell case ZSTD_c_srcSizeHint:
609e0c1b49fSNick Terrell case ZSTD_c_stableInBuffer:
610e0c1b49fSNick Terrell case ZSTD_c_stableOutBuffer:
611e0c1b49fSNick Terrell case ZSTD_c_blockDelimiters:
612e0c1b49fSNick Terrell case ZSTD_c_validateSequences:
613*2aa14b1aSNick Terrell case ZSTD_c_useBlockSplitter:
614*2aa14b1aSNick Terrell case ZSTD_c_useRowMatchFinder:
615*2aa14b1aSNick Terrell case ZSTD_c_deterministicRefPrefix:
616e0c1b49fSNick Terrell default:
617e0c1b49fSNick Terrell return 0;
618e0c1b49fSNick Terrell }
619e0c1b49fSNick Terrell }
620e0c1b49fSNick Terrell
ZSTD_CCtx_setParameter(ZSTD_CCtx * cctx,ZSTD_cParameter param,int value)621e0c1b49fSNick Terrell size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
622e0c1b49fSNick Terrell {
623e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value);
624e0c1b49fSNick Terrell if (cctx->streamStage != zcss_init) {
625e0c1b49fSNick Terrell if (ZSTD_isUpdateAuthorized(param)) {
626e0c1b49fSNick Terrell cctx->cParamsChanged = 1;
627e0c1b49fSNick Terrell } else {
628e0c1b49fSNick Terrell RETURN_ERROR(stage_wrong, "can only set params in ctx init stage");
629e0c1b49fSNick Terrell } }
630e0c1b49fSNick Terrell
631e0c1b49fSNick Terrell switch(param)
632e0c1b49fSNick Terrell {
633e0c1b49fSNick Terrell case ZSTD_c_nbWorkers:
634e0c1b49fSNick Terrell RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported,
635e0c1b49fSNick Terrell "MT not compatible with static alloc");
636e0c1b49fSNick Terrell break;
637e0c1b49fSNick Terrell
638e0c1b49fSNick Terrell case ZSTD_c_compressionLevel:
639e0c1b49fSNick Terrell case ZSTD_c_windowLog:
640e0c1b49fSNick Terrell case ZSTD_c_hashLog:
641e0c1b49fSNick Terrell case ZSTD_c_chainLog:
642e0c1b49fSNick Terrell case ZSTD_c_searchLog:
643e0c1b49fSNick Terrell case ZSTD_c_minMatch:
644e0c1b49fSNick Terrell case ZSTD_c_targetLength:
645e0c1b49fSNick Terrell case ZSTD_c_strategy:
646e0c1b49fSNick Terrell case ZSTD_c_ldmHashRateLog:
647e0c1b49fSNick Terrell case ZSTD_c_format:
648e0c1b49fSNick Terrell case ZSTD_c_contentSizeFlag:
649e0c1b49fSNick Terrell case ZSTD_c_checksumFlag:
650e0c1b49fSNick Terrell case ZSTD_c_dictIDFlag:
651e0c1b49fSNick Terrell case ZSTD_c_forceMaxWindow:
652e0c1b49fSNick Terrell case ZSTD_c_forceAttachDict:
653e0c1b49fSNick Terrell case ZSTD_c_literalCompressionMode:
654e0c1b49fSNick Terrell case ZSTD_c_jobSize:
655e0c1b49fSNick Terrell case ZSTD_c_overlapLog:
656e0c1b49fSNick Terrell case ZSTD_c_rsyncable:
657e0c1b49fSNick Terrell case ZSTD_c_enableDedicatedDictSearch:
658e0c1b49fSNick Terrell case ZSTD_c_enableLongDistanceMatching:
659e0c1b49fSNick Terrell case ZSTD_c_ldmHashLog:
660e0c1b49fSNick Terrell case ZSTD_c_ldmMinMatch:
661e0c1b49fSNick Terrell case ZSTD_c_ldmBucketSizeLog:
662e0c1b49fSNick Terrell case ZSTD_c_targetCBlockSize:
663e0c1b49fSNick Terrell case ZSTD_c_srcSizeHint:
664e0c1b49fSNick Terrell case ZSTD_c_stableInBuffer:
665e0c1b49fSNick Terrell case ZSTD_c_stableOutBuffer:
666e0c1b49fSNick Terrell case ZSTD_c_blockDelimiters:
667e0c1b49fSNick Terrell case ZSTD_c_validateSequences:
668*2aa14b1aSNick Terrell case ZSTD_c_useBlockSplitter:
669*2aa14b1aSNick Terrell case ZSTD_c_useRowMatchFinder:
670*2aa14b1aSNick Terrell case ZSTD_c_deterministicRefPrefix:
671e0c1b49fSNick Terrell break;
672e0c1b49fSNick Terrell
673e0c1b49fSNick Terrell default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
674e0c1b49fSNick Terrell }
675e0c1b49fSNick Terrell return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value);
676e0c1b49fSNick Terrell }
677e0c1b49fSNick Terrell
ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params * CCtxParams,ZSTD_cParameter param,int value)678e0c1b49fSNick Terrell size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
679e0c1b49fSNick Terrell ZSTD_cParameter param, int value)
680e0c1b49fSNick Terrell {
681e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value);
682e0c1b49fSNick Terrell switch(param)
683e0c1b49fSNick Terrell {
684e0c1b49fSNick Terrell case ZSTD_c_format :
685e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_format, value);
686e0c1b49fSNick Terrell CCtxParams->format = (ZSTD_format_e)value;
687e0c1b49fSNick Terrell return (size_t)CCtxParams->format;
688e0c1b49fSNick Terrell
689e0c1b49fSNick Terrell case ZSTD_c_compressionLevel : {
690e0c1b49fSNick Terrell FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
691e0c1b49fSNick Terrell if (value == 0)
692e0c1b49fSNick Terrell CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
693e0c1b49fSNick Terrell else
694e0c1b49fSNick Terrell CCtxParams->compressionLevel = value;
695e0c1b49fSNick Terrell if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;
696e0c1b49fSNick Terrell return 0; /* return type (size_t) cannot represent negative values */
697e0c1b49fSNick Terrell }
698e0c1b49fSNick Terrell
699e0c1b49fSNick Terrell case ZSTD_c_windowLog :
700e0c1b49fSNick Terrell if (value!=0) /* 0 => use default */
701e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_windowLog, value);
702e0c1b49fSNick Terrell CCtxParams->cParams.windowLog = (U32)value;
703e0c1b49fSNick Terrell return CCtxParams->cParams.windowLog;
704e0c1b49fSNick Terrell
705e0c1b49fSNick Terrell case ZSTD_c_hashLog :
706e0c1b49fSNick Terrell if (value!=0) /* 0 => use default */
707e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_hashLog, value);
708e0c1b49fSNick Terrell CCtxParams->cParams.hashLog = (U32)value;
709e0c1b49fSNick Terrell return CCtxParams->cParams.hashLog;
710e0c1b49fSNick Terrell
711e0c1b49fSNick Terrell case ZSTD_c_chainLog :
712e0c1b49fSNick Terrell if (value!=0) /* 0 => use default */
713e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_chainLog, value);
714e0c1b49fSNick Terrell CCtxParams->cParams.chainLog = (U32)value;
715e0c1b49fSNick Terrell return CCtxParams->cParams.chainLog;
716e0c1b49fSNick Terrell
717e0c1b49fSNick Terrell case ZSTD_c_searchLog :
718e0c1b49fSNick Terrell if (value!=0) /* 0 => use default */
719e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_searchLog, value);
720e0c1b49fSNick Terrell CCtxParams->cParams.searchLog = (U32)value;
721e0c1b49fSNick Terrell return (size_t)value;
722e0c1b49fSNick Terrell
723e0c1b49fSNick Terrell case ZSTD_c_minMatch :
724e0c1b49fSNick Terrell if (value!=0) /* 0 => use default */
725e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_minMatch, value);
726e0c1b49fSNick Terrell CCtxParams->cParams.minMatch = value;
727e0c1b49fSNick Terrell return CCtxParams->cParams.minMatch;
728e0c1b49fSNick Terrell
729e0c1b49fSNick Terrell case ZSTD_c_targetLength :
730e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_targetLength, value);
731e0c1b49fSNick Terrell CCtxParams->cParams.targetLength = value;
732e0c1b49fSNick Terrell return CCtxParams->cParams.targetLength;
733e0c1b49fSNick Terrell
734e0c1b49fSNick Terrell case ZSTD_c_strategy :
735e0c1b49fSNick Terrell if (value!=0) /* 0 => use default */
736e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_strategy, value);
737e0c1b49fSNick Terrell CCtxParams->cParams.strategy = (ZSTD_strategy)value;
738e0c1b49fSNick Terrell return (size_t)CCtxParams->cParams.strategy;
739e0c1b49fSNick Terrell
740e0c1b49fSNick Terrell case ZSTD_c_contentSizeFlag :
741e0c1b49fSNick Terrell /* Content size written in frame header _when known_ (default:1) */
742e0c1b49fSNick Terrell DEBUGLOG(4, "set content size flag = %u", (value!=0));
743e0c1b49fSNick Terrell CCtxParams->fParams.contentSizeFlag = value != 0;
744e0c1b49fSNick Terrell return CCtxParams->fParams.contentSizeFlag;
745e0c1b49fSNick Terrell
746e0c1b49fSNick Terrell case ZSTD_c_checksumFlag :
747e0c1b49fSNick Terrell /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
748e0c1b49fSNick Terrell CCtxParams->fParams.checksumFlag = value != 0;
749e0c1b49fSNick Terrell return CCtxParams->fParams.checksumFlag;
750e0c1b49fSNick Terrell
751e0c1b49fSNick Terrell case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
752e0c1b49fSNick Terrell DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
753e0c1b49fSNick Terrell CCtxParams->fParams.noDictIDFlag = !value;
754e0c1b49fSNick Terrell return !CCtxParams->fParams.noDictIDFlag;
755e0c1b49fSNick Terrell
756e0c1b49fSNick Terrell case ZSTD_c_forceMaxWindow :
757e0c1b49fSNick Terrell CCtxParams->forceWindow = (value != 0);
758e0c1b49fSNick Terrell return CCtxParams->forceWindow;
759e0c1b49fSNick Terrell
760e0c1b49fSNick Terrell case ZSTD_c_forceAttachDict : {
761e0c1b49fSNick Terrell const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
762e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
763e0c1b49fSNick Terrell CCtxParams->attachDictPref = pref;
764e0c1b49fSNick Terrell return CCtxParams->attachDictPref;
765e0c1b49fSNick Terrell }
766e0c1b49fSNick Terrell
767e0c1b49fSNick Terrell case ZSTD_c_literalCompressionMode : {
768*2aa14b1aSNick Terrell const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value;
769e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
770e0c1b49fSNick Terrell CCtxParams->literalCompressionMode = lcm;
771e0c1b49fSNick Terrell return CCtxParams->literalCompressionMode;
772e0c1b49fSNick Terrell }
773e0c1b49fSNick Terrell
774e0c1b49fSNick Terrell case ZSTD_c_nbWorkers :
775e0c1b49fSNick Terrell RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
776e0c1b49fSNick Terrell return 0;
777e0c1b49fSNick Terrell
778e0c1b49fSNick Terrell case ZSTD_c_jobSize :
779e0c1b49fSNick Terrell RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
780e0c1b49fSNick Terrell return 0;
781e0c1b49fSNick Terrell
782e0c1b49fSNick Terrell case ZSTD_c_overlapLog :
783e0c1b49fSNick Terrell RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
784e0c1b49fSNick Terrell return 0;
785e0c1b49fSNick Terrell
786e0c1b49fSNick Terrell case ZSTD_c_rsyncable :
787e0c1b49fSNick Terrell RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
788e0c1b49fSNick Terrell return 0;
789e0c1b49fSNick Terrell
790e0c1b49fSNick Terrell case ZSTD_c_enableDedicatedDictSearch :
791e0c1b49fSNick Terrell CCtxParams->enableDedicatedDictSearch = (value!=0);
792e0c1b49fSNick Terrell return CCtxParams->enableDedicatedDictSearch;
793e0c1b49fSNick Terrell
794e0c1b49fSNick Terrell case ZSTD_c_enableLongDistanceMatching :
795*2aa14b1aSNick Terrell CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value;
796e0c1b49fSNick Terrell return CCtxParams->ldmParams.enableLdm;
797e0c1b49fSNick Terrell
798e0c1b49fSNick Terrell case ZSTD_c_ldmHashLog :
799e0c1b49fSNick Terrell if (value!=0) /* 0 ==> auto */
800e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_ldmHashLog, value);
801e0c1b49fSNick Terrell CCtxParams->ldmParams.hashLog = value;
802e0c1b49fSNick Terrell return CCtxParams->ldmParams.hashLog;
803e0c1b49fSNick Terrell
804e0c1b49fSNick Terrell case ZSTD_c_ldmMinMatch :
805e0c1b49fSNick Terrell if (value!=0) /* 0 ==> default */
806e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
807e0c1b49fSNick Terrell CCtxParams->ldmParams.minMatchLength = value;
808e0c1b49fSNick Terrell return CCtxParams->ldmParams.minMatchLength;
809e0c1b49fSNick Terrell
810e0c1b49fSNick Terrell case ZSTD_c_ldmBucketSizeLog :
811e0c1b49fSNick Terrell if (value!=0) /* 0 ==> default */
812e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
813e0c1b49fSNick Terrell CCtxParams->ldmParams.bucketSizeLog = value;
814e0c1b49fSNick Terrell return CCtxParams->ldmParams.bucketSizeLog;
815e0c1b49fSNick Terrell
816e0c1b49fSNick Terrell case ZSTD_c_ldmHashRateLog :
817e0c1b49fSNick Terrell if (value!=0) /* 0 ==> default */
818e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_ldmHashRateLog, value);
819e0c1b49fSNick Terrell CCtxParams->ldmParams.hashRateLog = value;
820e0c1b49fSNick Terrell return CCtxParams->ldmParams.hashRateLog;
821e0c1b49fSNick Terrell
822e0c1b49fSNick Terrell case ZSTD_c_targetCBlockSize :
823e0c1b49fSNick Terrell if (value!=0) /* 0 ==> default */
824e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
825e0c1b49fSNick Terrell CCtxParams->targetCBlockSize = value;
826e0c1b49fSNick Terrell return CCtxParams->targetCBlockSize;
827e0c1b49fSNick Terrell
828e0c1b49fSNick Terrell case ZSTD_c_srcSizeHint :
829e0c1b49fSNick Terrell if (value!=0) /* 0 ==> default */
830e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_srcSizeHint, value);
831e0c1b49fSNick Terrell CCtxParams->srcSizeHint = value;
832e0c1b49fSNick Terrell return CCtxParams->srcSizeHint;
833e0c1b49fSNick Terrell
834e0c1b49fSNick Terrell case ZSTD_c_stableInBuffer:
835e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_stableInBuffer, value);
836e0c1b49fSNick Terrell CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value;
837e0c1b49fSNick Terrell return CCtxParams->inBufferMode;
838e0c1b49fSNick Terrell
839e0c1b49fSNick Terrell case ZSTD_c_stableOutBuffer:
840e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_stableOutBuffer, value);
841e0c1b49fSNick Terrell CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value;
842e0c1b49fSNick Terrell return CCtxParams->outBufferMode;
843e0c1b49fSNick Terrell
844e0c1b49fSNick Terrell case ZSTD_c_blockDelimiters:
845e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_blockDelimiters, value);
846e0c1b49fSNick Terrell CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
847e0c1b49fSNick Terrell return CCtxParams->blockDelimiters;
848e0c1b49fSNick Terrell
849e0c1b49fSNick Terrell case ZSTD_c_validateSequences:
850e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_validateSequences, value);
851e0c1b49fSNick Terrell CCtxParams->validateSequences = value;
852e0c1b49fSNick Terrell return CCtxParams->validateSequences;
853e0c1b49fSNick Terrell
854*2aa14b1aSNick Terrell case ZSTD_c_useBlockSplitter:
855*2aa14b1aSNick Terrell BOUNDCHECK(ZSTD_c_useBlockSplitter, value);
856*2aa14b1aSNick Terrell CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value;
857*2aa14b1aSNick Terrell return CCtxParams->useBlockSplitter;
858*2aa14b1aSNick Terrell
859*2aa14b1aSNick Terrell case ZSTD_c_useRowMatchFinder:
860*2aa14b1aSNick Terrell BOUNDCHECK(ZSTD_c_useRowMatchFinder, value);
861*2aa14b1aSNick Terrell CCtxParams->useRowMatchFinder = (ZSTD_paramSwitch_e)value;
862*2aa14b1aSNick Terrell return CCtxParams->useRowMatchFinder;
863*2aa14b1aSNick Terrell
864*2aa14b1aSNick Terrell case ZSTD_c_deterministicRefPrefix:
865*2aa14b1aSNick Terrell BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
866*2aa14b1aSNick Terrell CCtxParams->deterministicRefPrefix = !!value;
867*2aa14b1aSNick Terrell return CCtxParams->deterministicRefPrefix;
868*2aa14b1aSNick Terrell
869e0c1b49fSNick Terrell default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
870e0c1b49fSNick Terrell }
871e0c1b49fSNick Terrell }
872e0c1b49fSNick Terrell
ZSTD_CCtx_getParameter(ZSTD_CCtx const * cctx,ZSTD_cParameter param,int * value)873e0c1b49fSNick Terrell size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value)
874e0c1b49fSNick Terrell {
875e0c1b49fSNick Terrell return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value);
876e0c1b49fSNick Terrell }
877e0c1b49fSNick Terrell
ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params const * CCtxParams,ZSTD_cParameter param,int * value)878e0c1b49fSNick Terrell size_t ZSTD_CCtxParams_getParameter(
879e0c1b49fSNick Terrell ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value)
880e0c1b49fSNick Terrell {
881e0c1b49fSNick Terrell switch(param)
882e0c1b49fSNick Terrell {
883e0c1b49fSNick Terrell case ZSTD_c_format :
884e0c1b49fSNick Terrell *value = CCtxParams->format;
885e0c1b49fSNick Terrell break;
886e0c1b49fSNick Terrell case ZSTD_c_compressionLevel :
887e0c1b49fSNick Terrell *value = CCtxParams->compressionLevel;
888e0c1b49fSNick Terrell break;
889e0c1b49fSNick Terrell case ZSTD_c_windowLog :
890e0c1b49fSNick Terrell *value = (int)CCtxParams->cParams.windowLog;
891e0c1b49fSNick Terrell break;
892e0c1b49fSNick Terrell case ZSTD_c_hashLog :
893e0c1b49fSNick Terrell *value = (int)CCtxParams->cParams.hashLog;
894e0c1b49fSNick Terrell break;
895e0c1b49fSNick Terrell case ZSTD_c_chainLog :
896e0c1b49fSNick Terrell *value = (int)CCtxParams->cParams.chainLog;
897e0c1b49fSNick Terrell break;
898e0c1b49fSNick Terrell case ZSTD_c_searchLog :
899e0c1b49fSNick Terrell *value = CCtxParams->cParams.searchLog;
900e0c1b49fSNick Terrell break;
901e0c1b49fSNick Terrell case ZSTD_c_minMatch :
902e0c1b49fSNick Terrell *value = CCtxParams->cParams.minMatch;
903e0c1b49fSNick Terrell break;
904e0c1b49fSNick Terrell case ZSTD_c_targetLength :
905e0c1b49fSNick Terrell *value = CCtxParams->cParams.targetLength;
906e0c1b49fSNick Terrell break;
907e0c1b49fSNick Terrell case ZSTD_c_strategy :
908e0c1b49fSNick Terrell *value = (unsigned)CCtxParams->cParams.strategy;
909e0c1b49fSNick Terrell break;
910e0c1b49fSNick Terrell case ZSTD_c_contentSizeFlag :
911e0c1b49fSNick Terrell *value = CCtxParams->fParams.contentSizeFlag;
912e0c1b49fSNick Terrell break;
913e0c1b49fSNick Terrell case ZSTD_c_checksumFlag :
914e0c1b49fSNick Terrell *value = CCtxParams->fParams.checksumFlag;
915e0c1b49fSNick Terrell break;
916e0c1b49fSNick Terrell case ZSTD_c_dictIDFlag :
917e0c1b49fSNick Terrell *value = !CCtxParams->fParams.noDictIDFlag;
918e0c1b49fSNick Terrell break;
919e0c1b49fSNick Terrell case ZSTD_c_forceMaxWindow :
920e0c1b49fSNick Terrell *value = CCtxParams->forceWindow;
921e0c1b49fSNick Terrell break;
922e0c1b49fSNick Terrell case ZSTD_c_forceAttachDict :
923e0c1b49fSNick Terrell *value = CCtxParams->attachDictPref;
924e0c1b49fSNick Terrell break;
925e0c1b49fSNick Terrell case ZSTD_c_literalCompressionMode :
926e0c1b49fSNick Terrell *value = CCtxParams->literalCompressionMode;
927e0c1b49fSNick Terrell break;
928e0c1b49fSNick Terrell case ZSTD_c_nbWorkers :
929e0c1b49fSNick Terrell assert(CCtxParams->nbWorkers == 0);
930e0c1b49fSNick Terrell *value = CCtxParams->nbWorkers;
931e0c1b49fSNick Terrell break;
932e0c1b49fSNick Terrell case ZSTD_c_jobSize :
933e0c1b49fSNick Terrell RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
934e0c1b49fSNick Terrell case ZSTD_c_overlapLog :
935e0c1b49fSNick Terrell RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
936e0c1b49fSNick Terrell case ZSTD_c_rsyncable :
937e0c1b49fSNick Terrell RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
938e0c1b49fSNick Terrell case ZSTD_c_enableDedicatedDictSearch :
939e0c1b49fSNick Terrell *value = CCtxParams->enableDedicatedDictSearch;
940e0c1b49fSNick Terrell break;
941e0c1b49fSNick Terrell case ZSTD_c_enableLongDistanceMatching :
942e0c1b49fSNick Terrell *value = CCtxParams->ldmParams.enableLdm;
943e0c1b49fSNick Terrell break;
944e0c1b49fSNick Terrell case ZSTD_c_ldmHashLog :
945e0c1b49fSNick Terrell *value = CCtxParams->ldmParams.hashLog;
946e0c1b49fSNick Terrell break;
947e0c1b49fSNick Terrell case ZSTD_c_ldmMinMatch :
948e0c1b49fSNick Terrell *value = CCtxParams->ldmParams.minMatchLength;
949e0c1b49fSNick Terrell break;
950e0c1b49fSNick Terrell case ZSTD_c_ldmBucketSizeLog :
951e0c1b49fSNick Terrell *value = CCtxParams->ldmParams.bucketSizeLog;
952e0c1b49fSNick Terrell break;
953e0c1b49fSNick Terrell case ZSTD_c_ldmHashRateLog :
954e0c1b49fSNick Terrell *value = CCtxParams->ldmParams.hashRateLog;
955e0c1b49fSNick Terrell break;
956e0c1b49fSNick Terrell case ZSTD_c_targetCBlockSize :
957e0c1b49fSNick Terrell *value = (int)CCtxParams->targetCBlockSize;
958e0c1b49fSNick Terrell break;
959e0c1b49fSNick Terrell case ZSTD_c_srcSizeHint :
960e0c1b49fSNick Terrell *value = (int)CCtxParams->srcSizeHint;
961e0c1b49fSNick Terrell break;
962e0c1b49fSNick Terrell case ZSTD_c_stableInBuffer :
963e0c1b49fSNick Terrell *value = (int)CCtxParams->inBufferMode;
964e0c1b49fSNick Terrell break;
965e0c1b49fSNick Terrell case ZSTD_c_stableOutBuffer :
966e0c1b49fSNick Terrell *value = (int)CCtxParams->outBufferMode;
967e0c1b49fSNick Terrell break;
968e0c1b49fSNick Terrell case ZSTD_c_blockDelimiters :
969e0c1b49fSNick Terrell *value = (int)CCtxParams->blockDelimiters;
970e0c1b49fSNick Terrell break;
971e0c1b49fSNick Terrell case ZSTD_c_validateSequences :
972e0c1b49fSNick Terrell *value = (int)CCtxParams->validateSequences;
973e0c1b49fSNick Terrell break;
974*2aa14b1aSNick Terrell case ZSTD_c_useBlockSplitter :
975*2aa14b1aSNick Terrell *value = (int)CCtxParams->useBlockSplitter;
976*2aa14b1aSNick Terrell break;
977*2aa14b1aSNick Terrell case ZSTD_c_useRowMatchFinder :
978*2aa14b1aSNick Terrell *value = (int)CCtxParams->useRowMatchFinder;
979*2aa14b1aSNick Terrell break;
980*2aa14b1aSNick Terrell case ZSTD_c_deterministicRefPrefix:
981*2aa14b1aSNick Terrell *value = (int)CCtxParams->deterministicRefPrefix;
982*2aa14b1aSNick Terrell break;
983e0c1b49fSNick Terrell default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
984e0c1b49fSNick Terrell }
985e0c1b49fSNick Terrell return 0;
986e0c1b49fSNick Terrell }
987e0c1b49fSNick Terrell
988e0c1b49fSNick Terrell /* ZSTD_CCtx_setParametersUsingCCtxParams() :
989e0c1b49fSNick Terrell * just applies `params` into `cctx`
990e0c1b49fSNick Terrell * no action is performed, parameters are merely stored.
991e0c1b49fSNick Terrell * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
992e0c1b49fSNick Terrell * This is possible even if a compression is ongoing.
993e0c1b49fSNick Terrell * In which case, new parameters will be applied on the fly, starting with next compression job.
994e0c1b49fSNick Terrell */
ZSTD_CCtx_setParametersUsingCCtxParams(ZSTD_CCtx * cctx,const ZSTD_CCtx_params * params)995e0c1b49fSNick Terrell size_t ZSTD_CCtx_setParametersUsingCCtxParams(
996e0c1b49fSNick Terrell ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
997e0c1b49fSNick Terrell {
998e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
999e0c1b49fSNick Terrell RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
1000e0c1b49fSNick Terrell "The context is in the wrong stage!");
1001e0c1b49fSNick Terrell RETURN_ERROR_IF(cctx->cdict, stage_wrong,
1002e0c1b49fSNick Terrell "Can't override parameters with cdict attached (some must "
1003e0c1b49fSNick Terrell "be inherited from the cdict).");
1004e0c1b49fSNick Terrell
1005e0c1b49fSNick Terrell cctx->requestedParams = *params;
1006e0c1b49fSNick Terrell return 0;
1007e0c1b49fSNick Terrell }
1008e0c1b49fSNick Terrell
ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx * cctx,unsigned long long pledgedSrcSize)1009*2aa14b1aSNick Terrell size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
1010e0c1b49fSNick Terrell {
1011e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
1012e0c1b49fSNick Terrell RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
1013e0c1b49fSNick Terrell "Can't set pledgedSrcSize when not in init stage.");
1014e0c1b49fSNick Terrell cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
1015e0c1b49fSNick Terrell return 0;
1016e0c1b49fSNick Terrell }
1017e0c1b49fSNick Terrell
1018e0c1b49fSNick Terrell static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(
1019e0c1b49fSNick Terrell int const compressionLevel,
1020e0c1b49fSNick Terrell size_t const dictSize);
1021e0c1b49fSNick Terrell static int ZSTD_dedicatedDictSearch_isSupported(
1022e0c1b49fSNick Terrell const ZSTD_compressionParameters* cParams);
1023e0c1b49fSNick Terrell static void ZSTD_dedicatedDictSearch_revertCParams(
1024e0c1b49fSNick Terrell ZSTD_compressionParameters* cParams);
1025e0c1b49fSNick Terrell
1026e0c1b49fSNick Terrell /*
1027e0c1b49fSNick Terrell * Initializes the local dict using the requested parameters.
1028e0c1b49fSNick Terrell * NOTE: This does not use the pledged src size, because it may be used for more
1029e0c1b49fSNick Terrell * than one compression.
1030e0c1b49fSNick Terrell */
ZSTD_initLocalDict(ZSTD_CCtx * cctx)1031e0c1b49fSNick Terrell static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
1032e0c1b49fSNick Terrell {
1033e0c1b49fSNick Terrell ZSTD_localDict* const dl = &cctx->localDict;
1034e0c1b49fSNick Terrell if (dl->dict == NULL) {
1035e0c1b49fSNick Terrell /* No local dictionary. */
1036e0c1b49fSNick Terrell assert(dl->dictBuffer == NULL);
1037e0c1b49fSNick Terrell assert(dl->cdict == NULL);
1038e0c1b49fSNick Terrell assert(dl->dictSize == 0);
1039e0c1b49fSNick Terrell return 0;
1040e0c1b49fSNick Terrell }
1041e0c1b49fSNick Terrell if (dl->cdict != NULL) {
1042e0c1b49fSNick Terrell assert(cctx->cdict == dl->cdict);
1043e0c1b49fSNick Terrell /* Local dictionary already initialized. */
1044e0c1b49fSNick Terrell return 0;
1045e0c1b49fSNick Terrell }
1046e0c1b49fSNick Terrell assert(dl->dictSize > 0);
1047e0c1b49fSNick Terrell assert(cctx->cdict == NULL);
1048e0c1b49fSNick Terrell assert(cctx->prefixDict.dict == NULL);
1049e0c1b49fSNick Terrell
1050e0c1b49fSNick Terrell dl->cdict = ZSTD_createCDict_advanced2(
1051e0c1b49fSNick Terrell dl->dict,
1052e0c1b49fSNick Terrell dl->dictSize,
1053e0c1b49fSNick Terrell ZSTD_dlm_byRef,
1054e0c1b49fSNick Terrell dl->dictContentType,
1055e0c1b49fSNick Terrell &cctx->requestedParams,
1056e0c1b49fSNick Terrell cctx->customMem);
1057e0c1b49fSNick Terrell RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed");
1058e0c1b49fSNick Terrell cctx->cdict = dl->cdict;
1059e0c1b49fSNick Terrell return 0;
1060e0c1b49fSNick Terrell }
1061e0c1b49fSNick Terrell
ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)1062e0c1b49fSNick Terrell size_t ZSTD_CCtx_loadDictionary_advanced(
1063e0c1b49fSNick Terrell ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
1064e0c1b49fSNick Terrell ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
1065e0c1b49fSNick Terrell {
1066e0c1b49fSNick Terrell RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
1067e0c1b49fSNick Terrell "Can't load a dictionary when ctx is not in init stage.");
1068e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
1069e0c1b49fSNick Terrell ZSTD_clearAllDicts(cctx); /* in case one already exists */
1070e0c1b49fSNick Terrell if (dict == NULL || dictSize == 0) /* no dictionary mode */
1071e0c1b49fSNick Terrell return 0;
1072e0c1b49fSNick Terrell if (dictLoadMethod == ZSTD_dlm_byRef) {
1073e0c1b49fSNick Terrell cctx->localDict.dict = dict;
1074e0c1b49fSNick Terrell } else {
1075e0c1b49fSNick Terrell void* dictBuffer;
1076e0c1b49fSNick Terrell RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
1077e0c1b49fSNick Terrell "no malloc for static CCtx");
1078e0c1b49fSNick Terrell dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);
1079e0c1b49fSNick Terrell RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
1080e0c1b49fSNick Terrell ZSTD_memcpy(dictBuffer, dict, dictSize);
1081e0c1b49fSNick Terrell cctx->localDict.dictBuffer = dictBuffer;
1082e0c1b49fSNick Terrell cctx->localDict.dict = dictBuffer;
1083e0c1b49fSNick Terrell }
1084e0c1b49fSNick Terrell cctx->localDict.dictSize = dictSize;
1085e0c1b49fSNick Terrell cctx->localDict.dictContentType = dictContentType;
1086e0c1b49fSNick Terrell return 0;
1087e0c1b49fSNick Terrell }
1088e0c1b49fSNick Terrell
ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx * cctx,const void * dict,size_t dictSize)1089*2aa14b1aSNick Terrell size_t ZSTD_CCtx_loadDictionary_byReference(
1090e0c1b49fSNick Terrell ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
1091e0c1b49fSNick Terrell {
1092e0c1b49fSNick Terrell return ZSTD_CCtx_loadDictionary_advanced(
1093e0c1b49fSNick Terrell cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
1094e0c1b49fSNick Terrell }
1095e0c1b49fSNick Terrell
ZSTD_CCtx_loadDictionary(ZSTD_CCtx * cctx,const void * dict,size_t dictSize)1096*2aa14b1aSNick Terrell size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
1097e0c1b49fSNick Terrell {
1098e0c1b49fSNick Terrell return ZSTD_CCtx_loadDictionary_advanced(
1099e0c1b49fSNick Terrell cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
1100e0c1b49fSNick Terrell }
1101e0c1b49fSNick Terrell
1102e0c1b49fSNick Terrell
ZSTD_CCtx_refCDict(ZSTD_CCtx * cctx,const ZSTD_CDict * cdict)1103e0c1b49fSNick Terrell size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
1104e0c1b49fSNick Terrell {
1105e0c1b49fSNick Terrell RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
1106e0c1b49fSNick Terrell "Can't ref a dict when ctx not in init stage.");
1107e0c1b49fSNick Terrell /* Free the existing local cdict (if any) to save memory. */
1108e0c1b49fSNick Terrell ZSTD_clearAllDicts(cctx);
1109e0c1b49fSNick Terrell cctx->cdict = cdict;
1110e0c1b49fSNick Terrell return 0;
1111e0c1b49fSNick Terrell }
1112e0c1b49fSNick Terrell
ZSTD_CCtx_refThreadPool(ZSTD_CCtx * cctx,ZSTD_threadPool * pool)1113e0c1b49fSNick Terrell size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool)
1114e0c1b49fSNick Terrell {
1115e0c1b49fSNick Terrell RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
1116e0c1b49fSNick Terrell "Can't ref a pool when ctx not in init stage.");
1117e0c1b49fSNick Terrell cctx->pool = pool;
1118e0c1b49fSNick Terrell return 0;
1119e0c1b49fSNick Terrell }
1120e0c1b49fSNick Terrell
ZSTD_CCtx_refPrefix(ZSTD_CCtx * cctx,const void * prefix,size_t prefixSize)1121e0c1b49fSNick Terrell size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
1122e0c1b49fSNick Terrell {
1123e0c1b49fSNick Terrell return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
1124e0c1b49fSNick Terrell }
1125e0c1b49fSNick Terrell
ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx * cctx,const void * prefix,size_t prefixSize,ZSTD_dictContentType_e dictContentType)1126e0c1b49fSNick Terrell size_t ZSTD_CCtx_refPrefix_advanced(
1127e0c1b49fSNick Terrell ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
1128e0c1b49fSNick Terrell {
1129e0c1b49fSNick Terrell RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
1130e0c1b49fSNick Terrell "Can't ref a prefix when ctx not in init stage.");
1131e0c1b49fSNick Terrell ZSTD_clearAllDicts(cctx);
1132e0c1b49fSNick Terrell if (prefix != NULL && prefixSize > 0) {
1133e0c1b49fSNick Terrell cctx->prefixDict.dict = prefix;
1134e0c1b49fSNick Terrell cctx->prefixDict.dictSize = prefixSize;
1135e0c1b49fSNick Terrell cctx->prefixDict.dictContentType = dictContentType;
1136e0c1b49fSNick Terrell }
1137e0c1b49fSNick Terrell return 0;
1138e0c1b49fSNick Terrell }
1139e0c1b49fSNick Terrell
1140e0c1b49fSNick Terrell /*! ZSTD_CCtx_reset() :
1141e0c1b49fSNick Terrell * Also dumps dictionary */
ZSTD_CCtx_reset(ZSTD_CCtx * cctx,ZSTD_ResetDirective reset)1142e0c1b49fSNick Terrell size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
1143e0c1b49fSNick Terrell {
1144e0c1b49fSNick Terrell if ( (reset == ZSTD_reset_session_only)
1145e0c1b49fSNick Terrell || (reset == ZSTD_reset_session_and_parameters) ) {
1146e0c1b49fSNick Terrell cctx->streamStage = zcss_init;
1147e0c1b49fSNick Terrell cctx->pledgedSrcSizePlusOne = 0;
1148e0c1b49fSNick Terrell }
1149e0c1b49fSNick Terrell if ( (reset == ZSTD_reset_parameters)
1150e0c1b49fSNick Terrell || (reset == ZSTD_reset_session_and_parameters) ) {
1151e0c1b49fSNick Terrell RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
1152e0c1b49fSNick Terrell "Can't reset parameters only when not in init stage.");
1153e0c1b49fSNick Terrell ZSTD_clearAllDicts(cctx);
1154e0c1b49fSNick Terrell return ZSTD_CCtxParams_reset(&cctx->requestedParams);
1155e0c1b49fSNick Terrell }
1156e0c1b49fSNick Terrell return 0;
1157e0c1b49fSNick Terrell }
1158e0c1b49fSNick Terrell
1159e0c1b49fSNick Terrell
1160e0c1b49fSNick Terrell /* ZSTD_checkCParams() :
1161e0c1b49fSNick Terrell control CParam values remain within authorized range.
1162e0c1b49fSNick Terrell @return : 0, or an error code if one value is beyond authorized range */
ZSTD_checkCParams(ZSTD_compressionParameters cParams)1163e0c1b49fSNick Terrell size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
1164e0c1b49fSNick Terrell {
1165e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
1166e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog);
1167e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog);
1168e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
1169e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch);
1170e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
1171e0c1b49fSNick Terrell BOUNDCHECK(ZSTD_c_strategy, cParams.strategy);
1172e0c1b49fSNick Terrell return 0;
1173e0c1b49fSNick Terrell }
1174e0c1b49fSNick Terrell
1175e0c1b49fSNick Terrell /* ZSTD_clampCParams() :
1176e0c1b49fSNick Terrell * make CParam values within valid range.
1177e0c1b49fSNick Terrell * @return : valid CParams */
1178e0c1b49fSNick Terrell static ZSTD_compressionParameters
ZSTD_clampCParams(ZSTD_compressionParameters cParams)1179e0c1b49fSNick Terrell ZSTD_clampCParams(ZSTD_compressionParameters cParams)
1180e0c1b49fSNick Terrell {
1181e0c1b49fSNick Terrell # define CLAMP_TYPE(cParam, val, type) { \
1182e0c1b49fSNick Terrell ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \
1183e0c1b49fSNick Terrell if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
1184e0c1b49fSNick Terrell else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
1185e0c1b49fSNick Terrell }
1186e0c1b49fSNick Terrell # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
1187e0c1b49fSNick Terrell CLAMP(ZSTD_c_windowLog, cParams.windowLog);
1188e0c1b49fSNick Terrell CLAMP(ZSTD_c_chainLog, cParams.chainLog);
1189e0c1b49fSNick Terrell CLAMP(ZSTD_c_hashLog, cParams.hashLog);
1190e0c1b49fSNick Terrell CLAMP(ZSTD_c_searchLog, cParams.searchLog);
1191e0c1b49fSNick Terrell CLAMP(ZSTD_c_minMatch, cParams.minMatch);
1192e0c1b49fSNick Terrell CLAMP(ZSTD_c_targetLength,cParams.targetLength);
1193e0c1b49fSNick Terrell CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);
1194e0c1b49fSNick Terrell return cParams;
1195e0c1b49fSNick Terrell }
1196e0c1b49fSNick Terrell
1197e0c1b49fSNick Terrell /* ZSTD_cycleLog() :
1198e0c1b49fSNick Terrell * condition for correct operation : hashLog > 1 */
ZSTD_cycleLog(U32 hashLog,ZSTD_strategy strat)1199e0c1b49fSNick Terrell U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
1200e0c1b49fSNick Terrell {
1201e0c1b49fSNick Terrell U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
1202e0c1b49fSNick Terrell return hashLog - btScale;
1203e0c1b49fSNick Terrell }
1204e0c1b49fSNick Terrell
1205e0c1b49fSNick Terrell /* ZSTD_dictAndWindowLog() :
1206e0c1b49fSNick Terrell * Returns an adjusted window log that is large enough to fit the source and the dictionary.
1207e0c1b49fSNick Terrell * The zstd format says that the entire dictionary is valid if one byte of the dictionary
1208e0c1b49fSNick Terrell * is within the window. So the hashLog and chainLog should be large enough to reference both
1209e0c1b49fSNick Terrell * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing
1210e0c1b49fSNick Terrell * the hashLog and windowLog.
1211e0c1b49fSNick Terrell * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN.
1212e0c1b49fSNick Terrell */
ZSTD_dictAndWindowLog(U32 windowLog,U64 srcSize,U64 dictSize)1213e0c1b49fSNick Terrell static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize)
1214e0c1b49fSNick Terrell {
1215e0c1b49fSNick Terrell const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX;
1216e0c1b49fSNick Terrell /* No dictionary ==> No change */
1217e0c1b49fSNick Terrell if (dictSize == 0) {
1218e0c1b49fSNick Terrell return windowLog;
1219e0c1b49fSNick Terrell }
1220e0c1b49fSNick Terrell assert(windowLog <= ZSTD_WINDOWLOG_MAX);
1221e0c1b49fSNick Terrell assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */
1222e0c1b49fSNick Terrell {
1223e0c1b49fSNick Terrell U64 const windowSize = 1ULL << windowLog;
1224e0c1b49fSNick Terrell U64 const dictAndWindowSize = dictSize + windowSize;
1225e0c1b49fSNick Terrell /* If the window size is already large enough to fit both the source and the dictionary
1226e0c1b49fSNick Terrell * then just use the window size. Otherwise adjust so that it fits the dictionary and
1227e0c1b49fSNick Terrell * the window.
1228e0c1b49fSNick Terrell */
1229e0c1b49fSNick Terrell if (windowSize >= dictSize + srcSize) {
1230e0c1b49fSNick Terrell return windowLog; /* Window size large enough already */
1231e0c1b49fSNick Terrell } else if (dictAndWindowSize >= maxWindowSize) {
1232e0c1b49fSNick Terrell return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */
1233e0c1b49fSNick Terrell } else {
1234e0c1b49fSNick Terrell return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1;
1235e0c1b49fSNick Terrell }
1236e0c1b49fSNick Terrell }
1237e0c1b49fSNick Terrell }
1238e0c1b49fSNick Terrell
1239e0c1b49fSNick Terrell /* ZSTD_adjustCParams_internal() :
1240e0c1b49fSNick Terrell * optimize `cPar` for a specified input (`srcSize` and `dictSize`).
1241e0c1b49fSNick Terrell * mostly downsize to reduce memory consumption and initialization latency.
1242e0c1b49fSNick Terrell * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
1243e0c1b49fSNick Terrell * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`.
1244e0c1b49fSNick Terrell * note : `srcSize==0` means 0!
1245e0c1b49fSNick Terrell * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
1246e0c1b49fSNick Terrell static ZSTD_compressionParameters
ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,unsigned long long srcSize,size_t dictSize,ZSTD_cParamMode_e mode)1247e0c1b49fSNick Terrell ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
1248e0c1b49fSNick Terrell unsigned long long srcSize,
1249e0c1b49fSNick Terrell size_t dictSize,
1250e0c1b49fSNick Terrell ZSTD_cParamMode_e mode)
1251e0c1b49fSNick Terrell {
1252e0c1b49fSNick Terrell const U64 minSrcSize = 513; /* (1<<9) + 1 */
1253e0c1b49fSNick Terrell const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
1254e0c1b49fSNick Terrell assert(ZSTD_checkCParams(cPar)==0);
1255e0c1b49fSNick Terrell
1256e0c1b49fSNick Terrell switch (mode) {
1257e0c1b49fSNick Terrell case ZSTD_cpm_unknown:
1258e0c1b49fSNick Terrell case ZSTD_cpm_noAttachDict:
1259e0c1b49fSNick Terrell /* If we don't know the source size, don't make any
1260e0c1b49fSNick Terrell * assumptions about it. We will already have selected
1261e0c1b49fSNick Terrell * smaller parameters if a dictionary is in use.
1262e0c1b49fSNick Terrell */
1263e0c1b49fSNick Terrell break;
1264e0c1b49fSNick Terrell case ZSTD_cpm_createCDict:
1265e0c1b49fSNick Terrell /* Assume a small source size when creating a dictionary
1266*2aa14b1aSNick Terrell * with an unknown source size.
1267e0c1b49fSNick Terrell */
1268e0c1b49fSNick Terrell if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
1269e0c1b49fSNick Terrell srcSize = minSrcSize;
1270e0c1b49fSNick Terrell break;
1271e0c1b49fSNick Terrell case ZSTD_cpm_attachDict:
1272e0c1b49fSNick Terrell /* Dictionary has its own dedicated parameters which have
1273e0c1b49fSNick Terrell * already been selected. We are selecting parameters
1274e0c1b49fSNick Terrell * for only the source.
1275e0c1b49fSNick Terrell */
1276e0c1b49fSNick Terrell dictSize = 0;
1277e0c1b49fSNick Terrell break;
1278e0c1b49fSNick Terrell default:
1279e0c1b49fSNick Terrell assert(0);
1280e0c1b49fSNick Terrell break;
1281e0c1b49fSNick Terrell }
1282e0c1b49fSNick Terrell
1283e0c1b49fSNick Terrell /* resize windowLog if input is small enough, to use less memory */
1284e0c1b49fSNick Terrell if ( (srcSize < maxWindowResize)
1285e0c1b49fSNick Terrell && (dictSize < maxWindowResize) ) {
1286e0c1b49fSNick Terrell U32 const tSize = (U32)(srcSize + dictSize);
1287e0c1b49fSNick Terrell static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
1288e0c1b49fSNick Terrell U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
1289e0c1b49fSNick Terrell ZSTD_highbit32(tSize-1) + 1;
1290e0c1b49fSNick Terrell if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
1291e0c1b49fSNick Terrell }
1292e0c1b49fSNick Terrell if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
1293e0c1b49fSNick Terrell U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);
1294e0c1b49fSNick Terrell U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
1295e0c1b49fSNick Terrell if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;
1296e0c1b49fSNick Terrell if (cycleLog > dictAndWindowLog)
1297e0c1b49fSNick Terrell cPar.chainLog -= (cycleLog - dictAndWindowLog);
1298e0c1b49fSNick Terrell }
1299e0c1b49fSNick Terrell
1300e0c1b49fSNick Terrell if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
1301e0c1b49fSNick Terrell cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */
1302e0c1b49fSNick Terrell
1303e0c1b49fSNick Terrell return cPar;
1304e0c1b49fSNick Terrell }
1305e0c1b49fSNick Terrell
1306e0c1b49fSNick Terrell ZSTD_compressionParameters
ZSTD_adjustCParams(ZSTD_compressionParameters cPar,unsigned long long srcSize,size_t dictSize)1307e0c1b49fSNick Terrell ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
1308e0c1b49fSNick Terrell unsigned long long srcSize,
1309e0c1b49fSNick Terrell size_t dictSize)
1310e0c1b49fSNick Terrell {
1311e0c1b49fSNick Terrell cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */
1312e0c1b49fSNick Terrell if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
1313e0c1b49fSNick Terrell return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
1314e0c1b49fSNick Terrell }
1315e0c1b49fSNick Terrell
1316e0c1b49fSNick Terrell static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
1317e0c1b49fSNick Terrell static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
1318e0c1b49fSNick Terrell
ZSTD_overrideCParams(ZSTD_compressionParameters * cParams,const ZSTD_compressionParameters * overrides)1319e0c1b49fSNick Terrell static void ZSTD_overrideCParams(
1320e0c1b49fSNick Terrell ZSTD_compressionParameters* cParams,
1321e0c1b49fSNick Terrell const ZSTD_compressionParameters* overrides)
1322e0c1b49fSNick Terrell {
1323e0c1b49fSNick Terrell if (overrides->windowLog) cParams->windowLog = overrides->windowLog;
1324e0c1b49fSNick Terrell if (overrides->hashLog) cParams->hashLog = overrides->hashLog;
1325e0c1b49fSNick Terrell if (overrides->chainLog) cParams->chainLog = overrides->chainLog;
1326e0c1b49fSNick Terrell if (overrides->searchLog) cParams->searchLog = overrides->searchLog;
1327e0c1b49fSNick Terrell if (overrides->minMatch) cParams->minMatch = overrides->minMatch;
1328e0c1b49fSNick Terrell if (overrides->targetLength) cParams->targetLength = overrides->targetLength;
1329e0c1b49fSNick Terrell if (overrides->strategy) cParams->strategy = overrides->strategy;
1330e0c1b49fSNick Terrell }
1331e0c1b49fSNick Terrell
ZSTD_getCParamsFromCCtxParams(const ZSTD_CCtx_params * CCtxParams,U64 srcSizeHint,size_t dictSize,ZSTD_cParamMode_e mode)1332e0c1b49fSNick Terrell ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1333e0c1b49fSNick Terrell const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
1334e0c1b49fSNick Terrell {
1335e0c1b49fSNick Terrell ZSTD_compressionParameters cParams;
1336e0c1b49fSNick Terrell if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
1337e0c1b49fSNick Terrell srcSizeHint = CCtxParams->srcSizeHint;
1338e0c1b49fSNick Terrell }
1339e0c1b49fSNick Terrell cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode);
1340*2aa14b1aSNick Terrell if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
1341e0c1b49fSNick Terrell ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
1342e0c1b49fSNick Terrell assert(!ZSTD_checkCParams(cParams));
1343e0c1b49fSNick Terrell /* srcSizeHint == 0 means 0 */
1344e0c1b49fSNick Terrell return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
1345e0c1b49fSNick Terrell }
1346e0c1b49fSNick Terrell
1347e0c1b49fSNick Terrell static size_t
ZSTD_sizeof_matchState(const ZSTD_compressionParameters * const cParams,const ZSTD_paramSwitch_e useRowMatchFinder,const U32 enableDedicatedDictSearch,const U32 forCCtx)1348e0c1b49fSNick Terrell ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
1349*2aa14b1aSNick Terrell const ZSTD_paramSwitch_e useRowMatchFinder,
1350*2aa14b1aSNick Terrell const U32 enableDedicatedDictSearch,
1351e0c1b49fSNick Terrell const U32 forCCtx)
1352e0c1b49fSNick Terrell {
1353*2aa14b1aSNick Terrell /* chain table size should be 0 for fast or row-hash strategies */
1354*2aa14b1aSNick Terrell size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx)
1355*2aa14b1aSNick Terrell ? ((size_t)1 << cParams->chainLog)
1356*2aa14b1aSNick Terrell : 0;
1357e0c1b49fSNick Terrell size_t const hSize = ((size_t)1) << cParams->hashLog;
1358e0c1b49fSNick Terrell U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1359e0c1b49fSNick Terrell size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
1360e0c1b49fSNick Terrell /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
1361e0c1b49fSNick Terrell * surrounded by redzones in ASAN. */
1362e0c1b49fSNick Terrell size_t const tableSpace = chainSize * sizeof(U32)
1363e0c1b49fSNick Terrell + hSize * sizeof(U32)
1364e0c1b49fSNick Terrell + h3Size * sizeof(U32);
1365e0c1b49fSNick Terrell size_t const optPotentialSpace =
1366*2aa14b1aSNick Terrell ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32))
1367*2aa14b1aSNick Terrell + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
1368*2aa14b1aSNick Terrell + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
1369*2aa14b1aSNick Terrell + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
1370*2aa14b1aSNick Terrell + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
1371*2aa14b1aSNick Terrell + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
1372*2aa14b1aSNick Terrell size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
1373*2aa14b1aSNick Terrell ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))
1374*2aa14b1aSNick Terrell : 0;
1375e0c1b49fSNick Terrell size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
1376e0c1b49fSNick Terrell ? optPotentialSpace
1377e0c1b49fSNick Terrell : 0;
1378*2aa14b1aSNick Terrell size_t const slackSpace = ZSTD_cwksp_slack_space_required();
1379*2aa14b1aSNick Terrell
1380*2aa14b1aSNick Terrell /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */
1381*2aa14b1aSNick Terrell ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4);
1382*2aa14b1aSNick Terrell assert(useRowMatchFinder != ZSTD_ps_auto);
1383*2aa14b1aSNick Terrell
1384e0c1b49fSNick Terrell DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
1385e0c1b49fSNick Terrell (U32)chainSize, (U32)hSize, (U32)h3Size);
1386*2aa14b1aSNick Terrell return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
1387e0c1b49fSNick Terrell }
1388e0c1b49fSNick Terrell
ZSTD_estimateCCtxSize_usingCCtxParams_internal(const ZSTD_compressionParameters * cParams,const ldmParams_t * ldmParams,const int isStatic,const ZSTD_paramSwitch_e useRowMatchFinder,const size_t buffInSize,const size_t buffOutSize,const U64 pledgedSrcSize)1389e0c1b49fSNick Terrell static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1390e0c1b49fSNick Terrell const ZSTD_compressionParameters* cParams,
1391e0c1b49fSNick Terrell const ldmParams_t* ldmParams,
1392e0c1b49fSNick Terrell const int isStatic,
1393*2aa14b1aSNick Terrell const ZSTD_paramSwitch_e useRowMatchFinder,
1394e0c1b49fSNick Terrell const size_t buffInSize,
1395e0c1b49fSNick Terrell const size_t buffOutSize,
1396e0c1b49fSNick Terrell const U64 pledgedSrcSize)
1397e0c1b49fSNick Terrell {
1398*2aa14b1aSNick Terrell size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
1399e0c1b49fSNick Terrell size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
1400e0c1b49fSNick Terrell U32 const divider = (cParams->minMatch==3) ? 3 : 4;
1401e0c1b49fSNick Terrell size_t const maxNbSeq = blockSize / divider;
1402e0c1b49fSNick Terrell size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
1403*2aa14b1aSNick Terrell + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
1404e0c1b49fSNick Terrell + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
1405e0c1b49fSNick Terrell size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
1406e0c1b49fSNick Terrell size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
1407*2aa14b1aSNick Terrell size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
1408e0c1b49fSNick Terrell
1409e0c1b49fSNick Terrell size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
1410e0c1b49fSNick Terrell size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
1411*2aa14b1aSNick Terrell size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ?
1412*2aa14b1aSNick Terrell ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
1413e0c1b49fSNick Terrell
1414e0c1b49fSNick Terrell
1415e0c1b49fSNick Terrell size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
1416e0c1b49fSNick Terrell + ZSTD_cwksp_alloc_size(buffOutSize);
1417e0c1b49fSNick Terrell
1418e0c1b49fSNick Terrell size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
1419e0c1b49fSNick Terrell
1420e0c1b49fSNick Terrell size_t const neededSpace =
1421e0c1b49fSNick Terrell cctxSpace +
1422e0c1b49fSNick Terrell entropySpace +
1423e0c1b49fSNick Terrell blockStateSpace +
1424e0c1b49fSNick Terrell ldmSpace +
1425e0c1b49fSNick Terrell ldmSeqSpace +
1426e0c1b49fSNick Terrell matchStateSize +
1427e0c1b49fSNick Terrell tokenSpace +
1428e0c1b49fSNick Terrell bufferSpace;
1429e0c1b49fSNick Terrell
1430e0c1b49fSNick Terrell DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
1431e0c1b49fSNick Terrell return neededSpace;
1432e0c1b49fSNick Terrell }
1433e0c1b49fSNick Terrell
ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params * params)1434e0c1b49fSNick Terrell size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
1435e0c1b49fSNick Terrell {
1436e0c1b49fSNick Terrell ZSTD_compressionParameters const cParams =
1437e0c1b49fSNick Terrell ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
1438*2aa14b1aSNick Terrell ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder,
1439*2aa14b1aSNick Terrell &cParams);
1440e0c1b49fSNick Terrell
1441e0c1b49fSNick Terrell RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
1442e0c1b49fSNick Terrell /* estimateCCtxSize is for one-shot compression. So no buffers should
1443e0c1b49fSNick Terrell * be needed. However, we still allocate two 0-sized buffers, which can
1444e0c1b49fSNick Terrell * take space under ASAN. */
1445e0c1b49fSNick Terrell return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1446*2aa14b1aSNick Terrell &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
1447e0c1b49fSNick Terrell }
1448e0c1b49fSNick Terrell
ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)1449e0c1b49fSNick Terrell size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
1450e0c1b49fSNick Terrell {
1451*2aa14b1aSNick Terrell ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
1452*2aa14b1aSNick Terrell if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
1453*2aa14b1aSNick Terrell /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
1454*2aa14b1aSNick Terrell size_t noRowCCtxSize;
1455*2aa14b1aSNick Terrell size_t rowCCtxSize;
1456*2aa14b1aSNick Terrell initialParams.useRowMatchFinder = ZSTD_ps_disable;
1457*2aa14b1aSNick Terrell noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
1458*2aa14b1aSNick Terrell initialParams.useRowMatchFinder = ZSTD_ps_enable;
1459*2aa14b1aSNick Terrell rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
1460*2aa14b1aSNick Terrell return MAX(noRowCCtxSize, rowCCtxSize);
1461*2aa14b1aSNick Terrell } else {
1462*2aa14b1aSNick Terrell return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
1463*2aa14b1aSNick Terrell }
1464e0c1b49fSNick Terrell }
1465e0c1b49fSNick Terrell
ZSTD_estimateCCtxSize_internal(int compressionLevel)1466e0c1b49fSNick Terrell static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
1467e0c1b49fSNick Terrell {
1468e0c1b49fSNick Terrell int tier = 0;
1469e0c1b49fSNick Terrell size_t largestSize = 0;
1470e0c1b49fSNick Terrell static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN};
1471e0c1b49fSNick Terrell for (; tier < 4; ++tier) {
1472e0c1b49fSNick Terrell /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */
1473e0c1b49fSNick Terrell ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict);
1474e0c1b49fSNick Terrell largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize);
1475e0c1b49fSNick Terrell }
1476e0c1b49fSNick Terrell return largestSize;
1477e0c1b49fSNick Terrell }
1478e0c1b49fSNick Terrell
ZSTD_estimateCCtxSize(int compressionLevel)1479e0c1b49fSNick Terrell size_t ZSTD_estimateCCtxSize(int compressionLevel)
1480e0c1b49fSNick Terrell {
1481e0c1b49fSNick Terrell int level;
1482e0c1b49fSNick Terrell size_t memBudget = 0;
1483e0c1b49fSNick Terrell for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
1484e0c1b49fSNick Terrell /* Ensure monotonically increasing memory usage as compression level increases */
1485e0c1b49fSNick Terrell size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
1486e0c1b49fSNick Terrell if (newMB > memBudget) memBudget = newMB;
1487e0c1b49fSNick Terrell }
1488e0c1b49fSNick Terrell return memBudget;
1489e0c1b49fSNick Terrell }
1490e0c1b49fSNick Terrell
ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params * params)1491e0c1b49fSNick Terrell size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
1492e0c1b49fSNick Terrell {
1493e0c1b49fSNick Terrell RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
1494e0c1b49fSNick Terrell { ZSTD_compressionParameters const cParams =
1495e0c1b49fSNick Terrell ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
1496e0c1b49fSNick Terrell size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
1497e0c1b49fSNick Terrell size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
1498e0c1b49fSNick Terrell ? ((size_t)1 << cParams.windowLog) + blockSize
1499e0c1b49fSNick Terrell : 0;
1500e0c1b49fSNick Terrell size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
1501e0c1b49fSNick Terrell ? ZSTD_compressBound(blockSize) + 1
1502e0c1b49fSNick Terrell : 0;
1503*2aa14b1aSNick Terrell ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, ¶ms->cParams);
1504e0c1b49fSNick Terrell
1505e0c1b49fSNick Terrell return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1506*2aa14b1aSNick Terrell &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
1507e0c1b49fSNick Terrell ZSTD_CONTENTSIZE_UNKNOWN);
1508e0c1b49fSNick Terrell }
1509e0c1b49fSNick Terrell }
1510e0c1b49fSNick Terrell
ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)1511e0c1b49fSNick Terrell size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
1512e0c1b49fSNick Terrell {
1513*2aa14b1aSNick Terrell ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
1514*2aa14b1aSNick Terrell if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
1515*2aa14b1aSNick Terrell /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
1516*2aa14b1aSNick Terrell size_t noRowCCtxSize;
1517*2aa14b1aSNick Terrell size_t rowCCtxSize;
1518*2aa14b1aSNick Terrell initialParams.useRowMatchFinder = ZSTD_ps_disable;
1519*2aa14b1aSNick Terrell noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
1520*2aa14b1aSNick Terrell initialParams.useRowMatchFinder = ZSTD_ps_enable;
1521*2aa14b1aSNick Terrell rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
1522*2aa14b1aSNick Terrell return MAX(noRowCCtxSize, rowCCtxSize);
1523*2aa14b1aSNick Terrell } else {
1524*2aa14b1aSNick Terrell return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
1525*2aa14b1aSNick Terrell }
1526e0c1b49fSNick Terrell }
1527e0c1b49fSNick Terrell
ZSTD_estimateCStreamSize_internal(int compressionLevel)1528e0c1b49fSNick Terrell static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
1529e0c1b49fSNick Terrell {
1530e0c1b49fSNick Terrell ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
1531e0c1b49fSNick Terrell return ZSTD_estimateCStreamSize_usingCParams(cParams);
1532e0c1b49fSNick Terrell }
1533e0c1b49fSNick Terrell
ZSTD_estimateCStreamSize(int compressionLevel)1534e0c1b49fSNick Terrell size_t ZSTD_estimateCStreamSize(int compressionLevel)
1535e0c1b49fSNick Terrell {
1536e0c1b49fSNick Terrell int level;
1537e0c1b49fSNick Terrell size_t memBudget = 0;
1538e0c1b49fSNick Terrell for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
1539e0c1b49fSNick Terrell size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
1540e0c1b49fSNick Terrell if (newMB > memBudget) memBudget = newMB;
1541e0c1b49fSNick Terrell }
1542e0c1b49fSNick Terrell return memBudget;
1543e0c1b49fSNick Terrell }
1544e0c1b49fSNick Terrell
1545e0c1b49fSNick Terrell /* ZSTD_getFrameProgression():
1546e0c1b49fSNick Terrell * tells how much data has been consumed (input) and produced (output) for current frame.
1547e0c1b49fSNick Terrell * able to count progression inside worker threads (non-blocking mode).
1548e0c1b49fSNick Terrell */
ZSTD_getFrameProgression(const ZSTD_CCtx * cctx)1549e0c1b49fSNick Terrell ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)
1550e0c1b49fSNick Terrell {
1551e0c1b49fSNick Terrell { ZSTD_frameProgression fp;
1552e0c1b49fSNick Terrell size_t const buffered = (cctx->inBuff == NULL) ? 0 :
1553e0c1b49fSNick Terrell cctx->inBuffPos - cctx->inToCompress;
1554e0c1b49fSNick Terrell if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);
1555e0c1b49fSNick Terrell assert(buffered <= ZSTD_BLOCKSIZE_MAX);
1556e0c1b49fSNick Terrell fp.ingested = cctx->consumedSrcSize + buffered;
1557e0c1b49fSNick Terrell fp.consumed = cctx->consumedSrcSize;
1558e0c1b49fSNick Terrell fp.produced = cctx->producedCSize;
1559e0c1b49fSNick Terrell fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */
1560e0c1b49fSNick Terrell fp.currentJobID = 0;
1561e0c1b49fSNick Terrell fp.nbActiveWorkers = 0;
1562e0c1b49fSNick Terrell return fp;
1563e0c1b49fSNick Terrell } }
1564e0c1b49fSNick Terrell
1565e0c1b49fSNick Terrell /*! ZSTD_toFlushNow()
1566e0c1b49fSNick Terrell * Only useful for multithreading scenarios currently (nbWorkers >= 1).
1567e0c1b49fSNick Terrell */
ZSTD_toFlushNow(ZSTD_CCtx * cctx)1568e0c1b49fSNick Terrell size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
1569e0c1b49fSNick Terrell {
1570e0c1b49fSNick Terrell (void)cctx;
1571e0c1b49fSNick Terrell return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
1572e0c1b49fSNick Terrell }
1573e0c1b49fSNick Terrell
ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,ZSTD_compressionParameters cParams2)1574e0c1b49fSNick Terrell static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
1575e0c1b49fSNick Terrell ZSTD_compressionParameters cParams2)
1576e0c1b49fSNick Terrell {
1577e0c1b49fSNick Terrell (void)cParams1;
1578e0c1b49fSNick Terrell (void)cParams2;
1579e0c1b49fSNick Terrell assert(cParams1.windowLog == cParams2.windowLog);
1580e0c1b49fSNick Terrell assert(cParams1.chainLog == cParams2.chainLog);
1581e0c1b49fSNick Terrell assert(cParams1.hashLog == cParams2.hashLog);
1582e0c1b49fSNick Terrell assert(cParams1.searchLog == cParams2.searchLog);
1583e0c1b49fSNick Terrell assert(cParams1.minMatch == cParams2.minMatch);
1584e0c1b49fSNick Terrell assert(cParams1.targetLength == cParams2.targetLength);
1585e0c1b49fSNick Terrell assert(cParams1.strategy == cParams2.strategy);
1586e0c1b49fSNick Terrell }
1587e0c1b49fSNick Terrell
ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t * bs)1588e0c1b49fSNick Terrell void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
1589e0c1b49fSNick Terrell {
1590e0c1b49fSNick Terrell int i;
1591e0c1b49fSNick Terrell for (i = 0; i < ZSTD_REP_NUM; ++i)
1592e0c1b49fSNick Terrell bs->rep[i] = repStartValue[i];
1593e0c1b49fSNick Terrell bs->entropy.huf.repeatMode = HUF_repeat_none;
1594e0c1b49fSNick Terrell bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
1595e0c1b49fSNick Terrell bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
1596e0c1b49fSNick Terrell bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
1597e0c1b49fSNick Terrell }
1598e0c1b49fSNick Terrell
1599e0c1b49fSNick Terrell /*! ZSTD_invalidateMatchState()
1600e0c1b49fSNick Terrell * Invalidate all the matches in the match finder tables.
1601e0c1b49fSNick Terrell * Requires nextSrc and base to be set (can be NULL).
1602e0c1b49fSNick Terrell */
ZSTD_invalidateMatchState(ZSTD_matchState_t * ms)1603e0c1b49fSNick Terrell static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
1604e0c1b49fSNick Terrell {
1605e0c1b49fSNick Terrell ZSTD_window_clear(&ms->window);
1606e0c1b49fSNick Terrell
1607e0c1b49fSNick Terrell ms->nextToUpdate = ms->window.dictLimit;
1608e0c1b49fSNick Terrell ms->loadedDictEnd = 0;
1609e0c1b49fSNick Terrell ms->opt.litLengthSum = 0; /* force reset of btopt stats */
1610e0c1b49fSNick Terrell ms->dictMatchState = NULL;
1611e0c1b49fSNick Terrell }
1612e0c1b49fSNick Terrell
1613e0c1b49fSNick Terrell /*
1614e0c1b49fSNick Terrell * Controls, for this matchState reset, whether the tables need to be cleared /
1615e0c1b49fSNick Terrell * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
1616e0c1b49fSNick Terrell * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
1617e0c1b49fSNick Terrell * subsequent operation will overwrite the table space anyways (e.g., copying
1618e0c1b49fSNick Terrell * the matchState contents in from a CDict).
1619e0c1b49fSNick Terrell */
1620e0c1b49fSNick Terrell typedef enum {
1621e0c1b49fSNick Terrell ZSTDcrp_makeClean,
1622e0c1b49fSNick Terrell ZSTDcrp_leaveDirty
1623e0c1b49fSNick Terrell } ZSTD_compResetPolicy_e;
1624e0c1b49fSNick Terrell
1625e0c1b49fSNick Terrell /*
1626e0c1b49fSNick Terrell * Controls, for this matchState reset, whether indexing can continue where it
1627e0c1b49fSNick Terrell * left off (ZSTDirp_continue), or whether it needs to be restarted from zero
1628e0c1b49fSNick Terrell * (ZSTDirp_reset).
1629e0c1b49fSNick Terrell */
1630e0c1b49fSNick Terrell typedef enum {
1631e0c1b49fSNick Terrell ZSTDirp_continue,
1632e0c1b49fSNick Terrell ZSTDirp_reset
1633e0c1b49fSNick Terrell } ZSTD_indexResetPolicy_e;
1634e0c1b49fSNick Terrell
1635e0c1b49fSNick Terrell typedef enum {
1636e0c1b49fSNick Terrell ZSTD_resetTarget_CDict,
1637e0c1b49fSNick Terrell ZSTD_resetTarget_CCtx
1638e0c1b49fSNick Terrell } ZSTD_resetTarget_e;
1639e0c1b49fSNick Terrell
1640*2aa14b1aSNick Terrell
1641e0c1b49fSNick Terrell static size_t
ZSTD_reset_matchState(ZSTD_matchState_t * ms,ZSTD_cwksp * ws,const ZSTD_compressionParameters * cParams,const ZSTD_paramSwitch_e useRowMatchFinder,const ZSTD_compResetPolicy_e crp,const ZSTD_indexResetPolicy_e forceResetIndex,const ZSTD_resetTarget_e forWho)1642e0c1b49fSNick Terrell ZSTD_reset_matchState(ZSTD_matchState_t* ms,
1643e0c1b49fSNick Terrell ZSTD_cwksp* ws,
1644e0c1b49fSNick Terrell const ZSTD_compressionParameters* cParams,
1645*2aa14b1aSNick Terrell const ZSTD_paramSwitch_e useRowMatchFinder,
1646e0c1b49fSNick Terrell const ZSTD_compResetPolicy_e crp,
1647e0c1b49fSNick Terrell const ZSTD_indexResetPolicy_e forceResetIndex,
1648e0c1b49fSNick Terrell const ZSTD_resetTarget_e forWho)
1649e0c1b49fSNick Terrell {
1650*2aa14b1aSNick Terrell /* disable chain table allocation for fast or row-based strategies */
1651*2aa14b1aSNick Terrell size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder,
1652*2aa14b1aSNick Terrell ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict))
1653*2aa14b1aSNick Terrell ? ((size_t)1 << cParams->chainLog)
1654*2aa14b1aSNick Terrell : 0;
1655e0c1b49fSNick Terrell size_t const hSize = ((size_t)1) << cParams->hashLog;
1656e0c1b49fSNick Terrell U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1657e0c1b49fSNick Terrell size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
1658e0c1b49fSNick Terrell
1659e0c1b49fSNick Terrell DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
1660*2aa14b1aSNick Terrell assert(useRowMatchFinder != ZSTD_ps_auto);
1661e0c1b49fSNick Terrell if (forceResetIndex == ZSTDirp_reset) {
1662e0c1b49fSNick Terrell ZSTD_window_init(&ms->window);
1663e0c1b49fSNick Terrell ZSTD_cwksp_mark_tables_dirty(ws);
1664e0c1b49fSNick Terrell }
1665e0c1b49fSNick Terrell
1666e0c1b49fSNick Terrell ms->hashLog3 = hashLog3;
1667e0c1b49fSNick Terrell
1668e0c1b49fSNick Terrell ZSTD_invalidateMatchState(ms);
1669e0c1b49fSNick Terrell
1670e0c1b49fSNick Terrell assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */
1671e0c1b49fSNick Terrell
1672e0c1b49fSNick Terrell ZSTD_cwksp_clear_tables(ws);
1673e0c1b49fSNick Terrell
1674e0c1b49fSNick Terrell DEBUGLOG(5, "reserving table space");
1675e0c1b49fSNick Terrell /* table Space */
1676e0c1b49fSNick Terrell ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));
1677e0c1b49fSNick Terrell ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));
1678e0c1b49fSNick Terrell ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));
1679e0c1b49fSNick Terrell RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
1680e0c1b49fSNick Terrell "failed a workspace allocation in ZSTD_reset_matchState");
1681e0c1b49fSNick Terrell
1682e0c1b49fSNick Terrell DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);
1683e0c1b49fSNick Terrell if (crp!=ZSTDcrp_leaveDirty) {
1684e0c1b49fSNick Terrell /* reset tables only */
1685e0c1b49fSNick Terrell ZSTD_cwksp_clean_tables(ws);
1686e0c1b49fSNick Terrell }
1687e0c1b49fSNick Terrell
1688e0c1b49fSNick Terrell /* opt parser space */
1689e0c1b49fSNick Terrell if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
1690e0c1b49fSNick Terrell DEBUGLOG(4, "reserving optimal parser space");
1691e0c1b49fSNick Terrell ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
1692e0c1b49fSNick Terrell ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
1693e0c1b49fSNick Terrell ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
1694e0c1b49fSNick Terrell ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
1695e0c1b49fSNick Terrell ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
1696e0c1b49fSNick Terrell ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
1697e0c1b49fSNick Terrell }
1698e0c1b49fSNick Terrell
1699*2aa14b1aSNick Terrell if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
1700*2aa14b1aSNick Terrell { /* Row match finder needs an additional table of hashes ("tags") */
1701*2aa14b1aSNick Terrell size_t const tagTableSize = hSize*sizeof(U16);
1702*2aa14b1aSNick Terrell ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
1703*2aa14b1aSNick Terrell if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
1704*2aa14b1aSNick Terrell }
1705*2aa14b1aSNick Terrell { /* Switch to 32-entry rows if searchLog is 5 (or more) */
1706*2aa14b1aSNick Terrell U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
1707*2aa14b1aSNick Terrell assert(cParams->hashLog >= rowLog);
1708*2aa14b1aSNick Terrell ms->rowHashLog = cParams->hashLog - rowLog;
1709*2aa14b1aSNick Terrell }
1710*2aa14b1aSNick Terrell }
1711*2aa14b1aSNick Terrell
1712e0c1b49fSNick Terrell ms->cParams = *cParams;
1713e0c1b49fSNick Terrell
1714e0c1b49fSNick Terrell RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
1715e0c1b49fSNick Terrell "failed a workspace allocation in ZSTD_reset_matchState");
1716e0c1b49fSNick Terrell return 0;
1717e0c1b49fSNick Terrell }
1718e0c1b49fSNick Terrell
1719e0c1b49fSNick Terrell /* ZSTD_indexTooCloseToMax() :
1720e0c1b49fSNick Terrell * minor optimization : prefer memset() rather than reduceIndex()
1721e0c1b49fSNick Terrell * which is measurably slow in some circumstances (reported for Visual Studio).
1722e0c1b49fSNick Terrell * Works when re-using a context for a lot of smallish inputs :
1723e0c1b49fSNick Terrell * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
1724e0c1b49fSNick Terrell * memset() will be triggered before reduceIndex().
1725e0c1b49fSNick Terrell */
1726e0c1b49fSNick Terrell #define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
ZSTD_indexTooCloseToMax(ZSTD_window_t w)1727e0c1b49fSNick Terrell static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
1728e0c1b49fSNick Terrell {
1729e0c1b49fSNick Terrell return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
1730e0c1b49fSNick Terrell }
1731e0c1b49fSNick Terrell
1732*2aa14b1aSNick Terrell /* ZSTD_dictTooBig():
1733*2aa14b1aSNick Terrell * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in
1734*2aa14b1aSNick Terrell * one go generically. So we ensure that in that case we reset the tables to zero,
1735*2aa14b1aSNick Terrell * so that we can load as much of the dictionary as possible.
1736*2aa14b1aSNick Terrell */
ZSTD_dictTooBig(size_t const loadedDictSize)1737*2aa14b1aSNick Terrell static int ZSTD_dictTooBig(size_t const loadedDictSize)
1738*2aa14b1aSNick Terrell {
1739*2aa14b1aSNick Terrell return loadedDictSize > ZSTD_CHUNKSIZE_MAX;
1740*2aa14b1aSNick Terrell }
1741*2aa14b1aSNick Terrell
1742e0c1b49fSNick Terrell /*! ZSTD_resetCCtx_internal() :
1743*2aa14b1aSNick Terrell * @param loadedDictSize The size of the dictionary to be loaded
1744*2aa14b1aSNick Terrell * into the context, if any. If no dictionary is used, or the
1745*2aa14b1aSNick Terrell * dictionary is being attached / copied, then pass 0.
1746*2aa14b1aSNick Terrell * note : `params` are assumed fully validated at this stage.
1747*2aa14b1aSNick Terrell */
ZSTD_resetCCtx_internal(ZSTD_CCtx * zc,ZSTD_CCtx_params const * params,U64 const pledgedSrcSize,size_t const loadedDictSize,ZSTD_compResetPolicy_e const crp,ZSTD_buffered_policy_e const zbuff)1748e0c1b49fSNick Terrell static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1749*2aa14b1aSNick Terrell ZSTD_CCtx_params const* params,
1750e0c1b49fSNick Terrell U64 const pledgedSrcSize,
1751*2aa14b1aSNick Terrell size_t const loadedDictSize,
1752e0c1b49fSNick Terrell ZSTD_compResetPolicy_e const crp,
1753e0c1b49fSNick Terrell ZSTD_buffered_policy_e const zbuff)
1754e0c1b49fSNick Terrell {
1755e0c1b49fSNick Terrell ZSTD_cwksp* const ws = &zc->workspace;
1756*2aa14b1aSNick Terrell DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d",
1757*2aa14b1aSNick Terrell (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter);
1758*2aa14b1aSNick Terrell assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
1759e0c1b49fSNick Terrell
1760e0c1b49fSNick Terrell zc->isFirstBlock = 1;
1761e0c1b49fSNick Terrell
1762*2aa14b1aSNick Terrell /* Set applied params early so we can modify them for LDM,
1763*2aa14b1aSNick Terrell * and point params at the applied params.
1764*2aa14b1aSNick Terrell */
1765*2aa14b1aSNick Terrell zc->appliedParams = *params;
1766*2aa14b1aSNick Terrell params = &zc->appliedParams;
1767*2aa14b1aSNick Terrell
1768*2aa14b1aSNick Terrell assert(params->useRowMatchFinder != ZSTD_ps_auto);
1769*2aa14b1aSNick Terrell assert(params->useBlockSplitter != ZSTD_ps_auto);
1770*2aa14b1aSNick Terrell assert(params->ldmParams.enableLdm != ZSTD_ps_auto);
1771*2aa14b1aSNick Terrell if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
1772e0c1b49fSNick Terrell /* Adjust long distance matching parameters */
1773*2aa14b1aSNick Terrell ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams);
1774*2aa14b1aSNick Terrell assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog);
1775*2aa14b1aSNick Terrell assert(params->ldmParams.hashRateLog < 32);
1776e0c1b49fSNick Terrell }
1777e0c1b49fSNick Terrell
1778*2aa14b1aSNick Terrell { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
1779e0c1b49fSNick Terrell size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
1780*2aa14b1aSNick Terrell U32 const divider = (params->cParams.minMatch==3) ? 3 : 4;
1781e0c1b49fSNick Terrell size_t const maxNbSeq = blockSize / divider;
1782*2aa14b1aSNick Terrell size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
1783e0c1b49fSNick Terrell ? ZSTD_compressBound(blockSize) + 1
1784e0c1b49fSNick Terrell : 0;
1785*2aa14b1aSNick Terrell size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered)
1786e0c1b49fSNick Terrell ? windowSize + blockSize
1787e0c1b49fSNick Terrell : 0;
1788*2aa14b1aSNick Terrell size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize);
1789e0c1b49fSNick Terrell
1790e0c1b49fSNick Terrell int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);
1791*2aa14b1aSNick Terrell int const dictTooBig = ZSTD_dictTooBig(loadedDictSize);
1792e0c1b49fSNick Terrell ZSTD_indexResetPolicy_e needsIndexReset =
1793*2aa14b1aSNick Terrell (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue;
1794e0c1b49fSNick Terrell
1795e0c1b49fSNick Terrell size_t const neededSpace =
1796e0c1b49fSNick Terrell ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1797*2aa14b1aSNick Terrell ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
1798e0c1b49fSNick Terrell buffInSize, buffOutSize, pledgedSrcSize);
1799*2aa14b1aSNick Terrell int resizeWorkspace;
1800*2aa14b1aSNick Terrell
1801e0c1b49fSNick Terrell FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
1802e0c1b49fSNick Terrell
1803e0c1b49fSNick Terrell if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
1804e0c1b49fSNick Terrell
1805*2aa14b1aSNick Terrell { /* Check if workspace is large enough, alloc a new one if needed */
1806e0c1b49fSNick Terrell int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
1807e0c1b49fSNick Terrell int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
1808*2aa14b1aSNick Terrell resizeWorkspace = workspaceTooSmall || workspaceWasteful;
1809e0c1b49fSNick Terrell DEBUGLOG(4, "Need %zu B workspace", neededSpace);
1810e0c1b49fSNick Terrell DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
1811e0c1b49fSNick Terrell
1812*2aa14b1aSNick Terrell if (resizeWorkspace) {
1813e0c1b49fSNick Terrell DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
1814e0c1b49fSNick Terrell ZSTD_cwksp_sizeof(ws) >> 10,
1815e0c1b49fSNick Terrell neededSpace >> 10);
1816e0c1b49fSNick Terrell
1817e0c1b49fSNick Terrell RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");
1818e0c1b49fSNick Terrell
1819e0c1b49fSNick Terrell needsIndexReset = ZSTDirp_reset;
1820e0c1b49fSNick Terrell
1821e0c1b49fSNick Terrell ZSTD_cwksp_free(ws, zc->customMem);
1822e0c1b49fSNick Terrell FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), "");
1823e0c1b49fSNick Terrell
1824e0c1b49fSNick Terrell DEBUGLOG(5, "reserving object space");
1825e0c1b49fSNick Terrell /* Statically sized space.
1826e0c1b49fSNick Terrell * entropyWorkspace never moves,
1827e0c1b49fSNick Terrell * though prev/next block swap places */
1828e0c1b49fSNick Terrell assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
1829e0c1b49fSNick Terrell zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
1830e0c1b49fSNick Terrell RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
1831e0c1b49fSNick Terrell zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
1832e0c1b49fSNick Terrell RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
1833e0c1b49fSNick Terrell zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);
1834*2aa14b1aSNick Terrell RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
1835e0c1b49fSNick Terrell } }
1836e0c1b49fSNick Terrell
1837e0c1b49fSNick Terrell ZSTD_cwksp_clear(ws);
1838e0c1b49fSNick Terrell
1839e0c1b49fSNick Terrell /* init params */
1840*2aa14b1aSNick Terrell zc->blockState.matchState.cParams = params->cParams;
1841e0c1b49fSNick Terrell zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
1842e0c1b49fSNick Terrell zc->consumedSrcSize = 0;
1843e0c1b49fSNick Terrell zc->producedCSize = 0;
1844e0c1b49fSNick Terrell if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
1845e0c1b49fSNick Terrell zc->appliedParams.fParams.contentSizeFlag = 0;
1846e0c1b49fSNick Terrell DEBUGLOG(4, "pledged content size : %u ; flag : %u",
1847e0c1b49fSNick Terrell (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
1848e0c1b49fSNick Terrell zc->blockSize = blockSize;
1849e0c1b49fSNick Terrell
1850e0c1b49fSNick Terrell xxh64_reset(&zc->xxhState, 0);
1851e0c1b49fSNick Terrell zc->stage = ZSTDcs_init;
1852e0c1b49fSNick Terrell zc->dictID = 0;
1853e0c1b49fSNick Terrell zc->dictContentSize = 0;
1854e0c1b49fSNick Terrell
1855e0c1b49fSNick Terrell ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
1856e0c1b49fSNick Terrell
1857e0c1b49fSNick Terrell /* ZSTD_wildcopy() is used to copy into the literals buffer,
1858e0c1b49fSNick Terrell * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
1859e0c1b49fSNick Terrell */
1860e0c1b49fSNick Terrell zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
1861e0c1b49fSNick Terrell zc->seqStore.maxNbLit = blockSize;
1862e0c1b49fSNick Terrell
1863e0c1b49fSNick Terrell /* buffers */
1864e0c1b49fSNick Terrell zc->bufferedPolicy = zbuff;
1865e0c1b49fSNick Terrell zc->inBuffSize = buffInSize;
1866e0c1b49fSNick Terrell zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
1867e0c1b49fSNick Terrell zc->outBuffSize = buffOutSize;
1868e0c1b49fSNick Terrell zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
1869e0c1b49fSNick Terrell
1870e0c1b49fSNick Terrell /* ldm bucketOffsets table */
1871*2aa14b1aSNick Terrell if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
1872e0c1b49fSNick Terrell /* TODO: avoid memset? */
1873e0c1b49fSNick Terrell size_t const numBuckets =
1874*2aa14b1aSNick Terrell ((size_t)1) << (params->ldmParams.hashLog -
1875*2aa14b1aSNick Terrell params->ldmParams.bucketSizeLog);
1876e0c1b49fSNick Terrell zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);
1877e0c1b49fSNick Terrell ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);
1878e0c1b49fSNick Terrell }
1879e0c1b49fSNick Terrell
1880e0c1b49fSNick Terrell /* sequences storage */
1881e0c1b49fSNick Terrell ZSTD_referenceExternalSequences(zc, NULL, 0);
1882e0c1b49fSNick Terrell zc->seqStore.maxNbSeq = maxNbSeq;
1883e0c1b49fSNick Terrell zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
1884e0c1b49fSNick Terrell zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
1885e0c1b49fSNick Terrell zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
1886e0c1b49fSNick Terrell zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
1887e0c1b49fSNick Terrell
1888e0c1b49fSNick Terrell FORWARD_IF_ERROR(ZSTD_reset_matchState(
1889e0c1b49fSNick Terrell &zc->blockState.matchState,
1890e0c1b49fSNick Terrell ws,
1891*2aa14b1aSNick Terrell ¶ms->cParams,
1892*2aa14b1aSNick Terrell params->useRowMatchFinder,
1893e0c1b49fSNick Terrell crp,
1894e0c1b49fSNick Terrell needsIndexReset,
1895e0c1b49fSNick Terrell ZSTD_resetTarget_CCtx), "");
1896e0c1b49fSNick Terrell
1897e0c1b49fSNick Terrell /* ldm hash table */
1898*2aa14b1aSNick Terrell if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
1899e0c1b49fSNick Terrell /* TODO: avoid memset? */
1900*2aa14b1aSNick Terrell size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
1901e0c1b49fSNick Terrell zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
1902e0c1b49fSNick Terrell ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
1903e0c1b49fSNick Terrell zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
1904e0c1b49fSNick Terrell zc->maxNbLdmSequences = maxNbLdmSeq;
1905e0c1b49fSNick Terrell
1906e0c1b49fSNick Terrell ZSTD_window_init(&zc->ldmState.window);
1907e0c1b49fSNick Terrell zc->ldmState.loadedDictEnd = 0;
1908e0c1b49fSNick Terrell }
1909e0c1b49fSNick Terrell
1910e0c1b49fSNick Terrell DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
1911*2aa14b1aSNick Terrell assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
1912*2aa14b1aSNick Terrell
1913e0c1b49fSNick Terrell zc->initialized = 1;
1914e0c1b49fSNick Terrell
1915e0c1b49fSNick Terrell return 0;
1916e0c1b49fSNick Terrell }
1917e0c1b49fSNick Terrell }
1918e0c1b49fSNick Terrell
1919e0c1b49fSNick Terrell /* ZSTD_invalidateRepCodes() :
1920e0c1b49fSNick Terrell * ensures next compression will not use repcodes from previous block.
1921e0c1b49fSNick Terrell * Note : only works with regular variant;
1922e0c1b49fSNick Terrell * do not use with extDict variant ! */
ZSTD_invalidateRepCodes(ZSTD_CCtx * cctx)1923e0c1b49fSNick Terrell void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
1924e0c1b49fSNick Terrell int i;
1925e0c1b49fSNick Terrell for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;
1926e0c1b49fSNick Terrell assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
1927e0c1b49fSNick Terrell }
1928e0c1b49fSNick Terrell
1929e0c1b49fSNick Terrell /* These are the approximate sizes for each strategy past which copying the
1930e0c1b49fSNick Terrell * dictionary tables into the working context is faster than using them
1931e0c1b49fSNick Terrell * in-place.
1932e0c1b49fSNick Terrell */
1933e0c1b49fSNick Terrell static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {
1934e0c1b49fSNick Terrell 8 KB, /* unused */
1935e0c1b49fSNick Terrell 8 KB, /* ZSTD_fast */
1936e0c1b49fSNick Terrell 16 KB, /* ZSTD_dfast */
1937e0c1b49fSNick Terrell 32 KB, /* ZSTD_greedy */
1938e0c1b49fSNick Terrell 32 KB, /* ZSTD_lazy */
1939e0c1b49fSNick Terrell 32 KB, /* ZSTD_lazy2 */
1940e0c1b49fSNick Terrell 32 KB, /* ZSTD_btlazy2 */
1941e0c1b49fSNick Terrell 32 KB, /* ZSTD_btopt */
1942e0c1b49fSNick Terrell 8 KB, /* ZSTD_btultra */
1943e0c1b49fSNick Terrell 8 KB /* ZSTD_btultra2 */
1944e0c1b49fSNick Terrell };
1945e0c1b49fSNick Terrell
ZSTD_shouldAttachDict(const ZSTD_CDict * cdict,const ZSTD_CCtx_params * params,U64 pledgedSrcSize)1946e0c1b49fSNick Terrell static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
1947e0c1b49fSNick Terrell const ZSTD_CCtx_params* params,
1948e0c1b49fSNick Terrell U64 pledgedSrcSize)
1949e0c1b49fSNick Terrell {
1950e0c1b49fSNick Terrell size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
1951e0c1b49fSNick Terrell int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch;
1952e0c1b49fSNick Terrell return dedicatedDictSearch
1953e0c1b49fSNick Terrell || ( ( pledgedSrcSize <= cutoff
1954e0c1b49fSNick Terrell || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
1955e0c1b49fSNick Terrell || params->attachDictPref == ZSTD_dictForceAttach )
1956e0c1b49fSNick Terrell && params->attachDictPref != ZSTD_dictForceCopy
1957e0c1b49fSNick Terrell && !params->forceWindow ); /* dictMatchState isn't correctly
1958e0c1b49fSNick Terrell * handled in _enforceMaxDist */
1959e0c1b49fSNick Terrell }
1960e0c1b49fSNick Terrell
1961e0c1b49fSNick Terrell static size_t
ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx * cctx,const ZSTD_CDict * cdict,ZSTD_CCtx_params params,U64 pledgedSrcSize,ZSTD_buffered_policy_e zbuff)1962e0c1b49fSNick Terrell ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
1963e0c1b49fSNick Terrell const ZSTD_CDict* cdict,
1964e0c1b49fSNick Terrell ZSTD_CCtx_params params,
1965e0c1b49fSNick Terrell U64 pledgedSrcSize,
1966e0c1b49fSNick Terrell ZSTD_buffered_policy_e zbuff)
1967e0c1b49fSNick Terrell {
1968*2aa14b1aSNick Terrell DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu",
1969*2aa14b1aSNick Terrell (unsigned long long)pledgedSrcSize);
1970e0c1b49fSNick Terrell {
1971e0c1b49fSNick Terrell ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
1972e0c1b49fSNick Terrell unsigned const windowLog = params.cParams.windowLog;
1973e0c1b49fSNick Terrell assert(windowLog != 0);
1974e0c1b49fSNick Terrell /* Resize working context table params for input only, since the dict
1975e0c1b49fSNick Terrell * has its own tables. */
1976e0c1b49fSNick Terrell /* pledgedSrcSize == 0 means 0! */
1977e0c1b49fSNick Terrell
1978e0c1b49fSNick Terrell if (cdict->matchState.dedicatedDictSearch) {
1979e0c1b49fSNick Terrell ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams);
1980e0c1b49fSNick Terrell }
1981e0c1b49fSNick Terrell
1982e0c1b49fSNick Terrell params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
1983e0c1b49fSNick Terrell cdict->dictContentSize, ZSTD_cpm_attachDict);
1984e0c1b49fSNick Terrell params.cParams.windowLog = windowLog;
1985*2aa14b1aSNick Terrell params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */
1986*2aa14b1aSNick Terrell FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize,
1987*2aa14b1aSNick Terrell /* loadedDictSize */ 0,
1988e0c1b49fSNick Terrell ZSTDcrp_makeClean, zbuff), "");
1989e0c1b49fSNick Terrell assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
1990e0c1b49fSNick Terrell }
1991e0c1b49fSNick Terrell
1992e0c1b49fSNick Terrell { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
1993e0c1b49fSNick Terrell - cdict->matchState.window.base);
1994e0c1b49fSNick Terrell const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
1995e0c1b49fSNick Terrell if (cdictLen == 0) {
1996e0c1b49fSNick Terrell /* don't even attach dictionaries with no contents */
1997e0c1b49fSNick Terrell DEBUGLOG(4, "skipping attaching empty dictionary");
1998e0c1b49fSNick Terrell } else {
1999e0c1b49fSNick Terrell DEBUGLOG(4, "attaching dictionary into context");
2000e0c1b49fSNick Terrell cctx->blockState.matchState.dictMatchState = &cdict->matchState;
2001e0c1b49fSNick Terrell
2002e0c1b49fSNick Terrell /* prep working match state so dict matches never have negative indices
2003e0c1b49fSNick Terrell * when they are translated to the working context's index space. */
2004e0c1b49fSNick Terrell if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {
2005e0c1b49fSNick Terrell cctx->blockState.matchState.window.nextSrc =
2006e0c1b49fSNick Terrell cctx->blockState.matchState.window.base + cdictEnd;
2007e0c1b49fSNick Terrell ZSTD_window_clear(&cctx->blockState.matchState.window);
2008e0c1b49fSNick Terrell }
2009e0c1b49fSNick Terrell /* loadedDictEnd is expressed within the referential of the active context */
2010e0c1b49fSNick Terrell cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
2011e0c1b49fSNick Terrell } }
2012e0c1b49fSNick Terrell
2013e0c1b49fSNick Terrell cctx->dictID = cdict->dictID;
2014e0c1b49fSNick Terrell cctx->dictContentSize = cdict->dictContentSize;
2015e0c1b49fSNick Terrell
2016e0c1b49fSNick Terrell /* copy block state */
2017e0c1b49fSNick Terrell ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
2018e0c1b49fSNick Terrell
2019e0c1b49fSNick Terrell return 0;
2020e0c1b49fSNick Terrell }
2021e0c1b49fSNick Terrell
ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx * cctx,const ZSTD_CDict * cdict,ZSTD_CCtx_params params,U64 pledgedSrcSize,ZSTD_buffered_policy_e zbuff)2022e0c1b49fSNick Terrell static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
2023e0c1b49fSNick Terrell const ZSTD_CDict* cdict,
2024e0c1b49fSNick Terrell ZSTD_CCtx_params params,
2025e0c1b49fSNick Terrell U64 pledgedSrcSize,
2026e0c1b49fSNick Terrell ZSTD_buffered_policy_e zbuff)
2027e0c1b49fSNick Terrell {
2028e0c1b49fSNick Terrell const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
2029e0c1b49fSNick Terrell
2030e0c1b49fSNick Terrell assert(!cdict->matchState.dedicatedDictSearch);
2031*2aa14b1aSNick Terrell DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu",
2032*2aa14b1aSNick Terrell (unsigned long long)pledgedSrcSize);
2033e0c1b49fSNick Terrell
2034e0c1b49fSNick Terrell { unsigned const windowLog = params.cParams.windowLog;
2035e0c1b49fSNick Terrell assert(windowLog != 0);
2036e0c1b49fSNick Terrell /* Copy only compression parameters related to tables. */
2037e0c1b49fSNick Terrell params.cParams = *cdict_cParams;
2038e0c1b49fSNick Terrell params.cParams.windowLog = windowLog;
2039*2aa14b1aSNick Terrell params.useRowMatchFinder = cdict->useRowMatchFinder;
2040*2aa14b1aSNick Terrell FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize,
2041*2aa14b1aSNick Terrell /* loadedDictSize */ 0,
2042e0c1b49fSNick Terrell ZSTDcrp_leaveDirty, zbuff), "");
2043e0c1b49fSNick Terrell assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
2044e0c1b49fSNick Terrell assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
2045e0c1b49fSNick Terrell assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
2046e0c1b49fSNick Terrell }
2047e0c1b49fSNick Terrell
2048e0c1b49fSNick Terrell ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
2049*2aa14b1aSNick Terrell assert(params.useRowMatchFinder != ZSTD_ps_auto);
2050e0c1b49fSNick Terrell
2051e0c1b49fSNick Terrell /* copy tables */
2052*2aa14b1aSNick Terrell { size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */)
2053*2aa14b1aSNick Terrell ? ((size_t)1 << cdict_cParams->chainLog)
2054*2aa14b1aSNick Terrell : 0;
2055e0c1b49fSNick Terrell size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
2056e0c1b49fSNick Terrell
2057e0c1b49fSNick Terrell ZSTD_memcpy(cctx->blockState.matchState.hashTable,
2058e0c1b49fSNick Terrell cdict->matchState.hashTable,
2059e0c1b49fSNick Terrell hSize * sizeof(U32));
2060*2aa14b1aSNick Terrell /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
2061*2aa14b1aSNick Terrell if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {
2062e0c1b49fSNick Terrell ZSTD_memcpy(cctx->blockState.matchState.chainTable,
2063e0c1b49fSNick Terrell cdict->matchState.chainTable,
2064e0c1b49fSNick Terrell chainSize * sizeof(U32));
2065e0c1b49fSNick Terrell }
2066*2aa14b1aSNick Terrell /* copy tag table */
2067*2aa14b1aSNick Terrell if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
2068*2aa14b1aSNick Terrell size_t const tagTableSize = hSize*sizeof(U16);
2069*2aa14b1aSNick Terrell ZSTD_memcpy(cctx->blockState.matchState.tagTable,
2070*2aa14b1aSNick Terrell cdict->matchState.tagTable,
2071*2aa14b1aSNick Terrell tagTableSize);
2072*2aa14b1aSNick Terrell }
2073*2aa14b1aSNick Terrell }
2074e0c1b49fSNick Terrell
2075e0c1b49fSNick Terrell /* Zero the hashTable3, since the cdict never fills it */
2076e0c1b49fSNick Terrell { int const h3log = cctx->blockState.matchState.hashLog3;
2077e0c1b49fSNick Terrell size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
2078e0c1b49fSNick Terrell assert(cdict->matchState.hashLog3 == 0);
2079e0c1b49fSNick Terrell ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
2080e0c1b49fSNick Terrell }
2081e0c1b49fSNick Terrell
2082e0c1b49fSNick Terrell ZSTD_cwksp_mark_tables_clean(&cctx->workspace);
2083e0c1b49fSNick Terrell
2084e0c1b49fSNick Terrell /* copy dictionary offsets */
2085e0c1b49fSNick Terrell { ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
2086e0c1b49fSNick Terrell ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
2087e0c1b49fSNick Terrell dstMatchState->window = srcMatchState->window;
2088e0c1b49fSNick Terrell dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
2089e0c1b49fSNick Terrell dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
2090e0c1b49fSNick Terrell }
2091e0c1b49fSNick Terrell
2092e0c1b49fSNick Terrell cctx->dictID = cdict->dictID;
2093e0c1b49fSNick Terrell cctx->dictContentSize = cdict->dictContentSize;
2094e0c1b49fSNick Terrell
2095e0c1b49fSNick Terrell /* copy block state */
2096e0c1b49fSNick Terrell ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
2097e0c1b49fSNick Terrell
2098e0c1b49fSNick Terrell return 0;
2099e0c1b49fSNick Terrell }
2100e0c1b49fSNick Terrell
2101e0c1b49fSNick Terrell /* We have a choice between copying the dictionary context into the working
2102e0c1b49fSNick Terrell * context, or referencing the dictionary context from the working context
2103e0c1b49fSNick Terrell * in-place. We decide here which strategy to use. */
ZSTD_resetCCtx_usingCDict(ZSTD_CCtx * cctx,const ZSTD_CDict * cdict,const ZSTD_CCtx_params * params,U64 pledgedSrcSize,ZSTD_buffered_policy_e zbuff)2104e0c1b49fSNick Terrell static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
2105e0c1b49fSNick Terrell const ZSTD_CDict* cdict,
2106e0c1b49fSNick Terrell const ZSTD_CCtx_params* params,
2107e0c1b49fSNick Terrell U64 pledgedSrcSize,
2108e0c1b49fSNick Terrell ZSTD_buffered_policy_e zbuff)
2109e0c1b49fSNick Terrell {
2110e0c1b49fSNick Terrell
2111e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
2112e0c1b49fSNick Terrell (unsigned)pledgedSrcSize);
2113e0c1b49fSNick Terrell
2114e0c1b49fSNick Terrell if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
2115e0c1b49fSNick Terrell return ZSTD_resetCCtx_byAttachingCDict(
2116e0c1b49fSNick Terrell cctx, cdict, *params, pledgedSrcSize, zbuff);
2117e0c1b49fSNick Terrell } else {
2118e0c1b49fSNick Terrell return ZSTD_resetCCtx_byCopyingCDict(
2119e0c1b49fSNick Terrell cctx, cdict, *params, pledgedSrcSize, zbuff);
2120e0c1b49fSNick Terrell }
2121e0c1b49fSNick Terrell }
2122e0c1b49fSNick Terrell
2123e0c1b49fSNick Terrell /*! ZSTD_copyCCtx_internal() :
2124e0c1b49fSNick Terrell * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
2125e0c1b49fSNick Terrell * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
2126e0c1b49fSNick Terrell * The "context", in this case, refers to the hash and chain tables,
2127e0c1b49fSNick Terrell * entropy tables, and dictionary references.
2128e0c1b49fSNick Terrell * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.
2129e0c1b49fSNick Terrell * @return : 0, or an error code */
ZSTD_copyCCtx_internal(ZSTD_CCtx * dstCCtx,const ZSTD_CCtx * srcCCtx,ZSTD_frameParameters fParams,U64 pledgedSrcSize,ZSTD_buffered_policy_e zbuff)2130e0c1b49fSNick Terrell static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
2131e0c1b49fSNick Terrell const ZSTD_CCtx* srcCCtx,
2132e0c1b49fSNick Terrell ZSTD_frameParameters fParams,
2133e0c1b49fSNick Terrell U64 pledgedSrcSize,
2134e0c1b49fSNick Terrell ZSTD_buffered_policy_e zbuff)
2135e0c1b49fSNick Terrell {
2136e0c1b49fSNick Terrell RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
2137e0c1b49fSNick Terrell "Can't copy a ctx that's not in init stage.");
2138*2aa14b1aSNick Terrell DEBUGLOG(5, "ZSTD_copyCCtx_internal");
2139e0c1b49fSNick Terrell ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
2140e0c1b49fSNick Terrell { ZSTD_CCtx_params params = dstCCtx->requestedParams;
2141e0c1b49fSNick Terrell /* Copy only compression parameters related to tables. */
2142e0c1b49fSNick Terrell params.cParams = srcCCtx->appliedParams.cParams;
2143*2aa14b1aSNick Terrell assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_ps_auto);
2144*2aa14b1aSNick Terrell assert(srcCCtx->appliedParams.useBlockSplitter != ZSTD_ps_auto);
2145*2aa14b1aSNick Terrell assert(srcCCtx->appliedParams.ldmParams.enableLdm != ZSTD_ps_auto);
2146*2aa14b1aSNick Terrell params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder;
2147*2aa14b1aSNick Terrell params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter;
2148*2aa14b1aSNick Terrell params.ldmParams = srcCCtx->appliedParams.ldmParams;
2149e0c1b49fSNick Terrell params.fParams = fParams;
2150*2aa14b1aSNick Terrell ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize,
2151*2aa14b1aSNick Terrell /* loadedDictSize */ 0,
2152e0c1b49fSNick Terrell ZSTDcrp_leaveDirty, zbuff);
2153e0c1b49fSNick Terrell assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
2154e0c1b49fSNick Terrell assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
2155e0c1b49fSNick Terrell assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
2156e0c1b49fSNick Terrell assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);
2157e0c1b49fSNick Terrell assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
2158e0c1b49fSNick Terrell }
2159e0c1b49fSNick Terrell
2160e0c1b49fSNick Terrell ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
2161e0c1b49fSNick Terrell
2162e0c1b49fSNick Terrell /* copy tables */
2163*2aa14b1aSNick Terrell { size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy,
2164*2aa14b1aSNick Terrell srcCCtx->appliedParams.useRowMatchFinder,
2165*2aa14b1aSNick Terrell 0 /* forDDSDict */)
2166*2aa14b1aSNick Terrell ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog)
2167*2aa14b1aSNick Terrell : 0;
2168e0c1b49fSNick Terrell size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
2169e0c1b49fSNick Terrell int const h3log = srcCCtx->blockState.matchState.hashLog3;
2170e0c1b49fSNick Terrell size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
2171e0c1b49fSNick Terrell
2172e0c1b49fSNick Terrell ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable,
2173e0c1b49fSNick Terrell srcCCtx->blockState.matchState.hashTable,
2174e0c1b49fSNick Terrell hSize * sizeof(U32));
2175e0c1b49fSNick Terrell ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable,
2176e0c1b49fSNick Terrell srcCCtx->blockState.matchState.chainTable,
2177e0c1b49fSNick Terrell chainSize * sizeof(U32));
2178e0c1b49fSNick Terrell ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3,
2179e0c1b49fSNick Terrell srcCCtx->blockState.matchState.hashTable3,
2180e0c1b49fSNick Terrell h3Size * sizeof(U32));
2181e0c1b49fSNick Terrell }
2182e0c1b49fSNick Terrell
2183e0c1b49fSNick Terrell ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);
2184e0c1b49fSNick Terrell
2185e0c1b49fSNick Terrell /* copy dictionary offsets */
2186e0c1b49fSNick Terrell {
2187e0c1b49fSNick Terrell const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
2188e0c1b49fSNick Terrell ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
2189e0c1b49fSNick Terrell dstMatchState->window = srcMatchState->window;
2190e0c1b49fSNick Terrell dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
2191e0c1b49fSNick Terrell dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
2192e0c1b49fSNick Terrell }
2193e0c1b49fSNick Terrell dstCCtx->dictID = srcCCtx->dictID;
2194e0c1b49fSNick Terrell dstCCtx->dictContentSize = srcCCtx->dictContentSize;
2195e0c1b49fSNick Terrell
2196e0c1b49fSNick Terrell /* copy block state */
2197e0c1b49fSNick Terrell ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
2198e0c1b49fSNick Terrell
2199e0c1b49fSNick Terrell return 0;
2200e0c1b49fSNick Terrell }
2201e0c1b49fSNick Terrell
2202e0c1b49fSNick Terrell /*! ZSTD_copyCCtx() :
2203e0c1b49fSNick Terrell * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
2204e0c1b49fSNick Terrell * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
2205e0c1b49fSNick Terrell * pledgedSrcSize==0 means "unknown".
2206e0c1b49fSNick Terrell * @return : 0, or an error code */
ZSTD_copyCCtx(ZSTD_CCtx * dstCCtx,const ZSTD_CCtx * srcCCtx,unsigned long long pledgedSrcSize)2207e0c1b49fSNick Terrell size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
2208e0c1b49fSNick Terrell {
2209e0c1b49fSNick Terrell ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
2210e0c1b49fSNick Terrell ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy;
2211e0c1b49fSNick Terrell ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
2212e0c1b49fSNick Terrell if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
2213e0c1b49fSNick Terrell fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
2214e0c1b49fSNick Terrell
2215e0c1b49fSNick Terrell return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,
2216e0c1b49fSNick Terrell fParams, pledgedSrcSize,
2217e0c1b49fSNick Terrell zbuff);
2218e0c1b49fSNick Terrell }
2219e0c1b49fSNick Terrell
2220e0c1b49fSNick Terrell
2221e0c1b49fSNick Terrell #define ZSTD_ROWSIZE 16
2222e0c1b49fSNick Terrell /*! ZSTD_reduceTable() :
2223e0c1b49fSNick Terrell * reduce table indexes by `reducerValue`, or squash to zero.
2224e0c1b49fSNick Terrell * PreserveMark preserves "unsorted mark" for btlazy2 strategy.
2225e0c1b49fSNick Terrell * It must be set to a clear 0/1 value, to remove branch during inlining.
2226e0c1b49fSNick Terrell * Presume table size is a multiple of ZSTD_ROWSIZE
2227e0c1b49fSNick Terrell * to help auto-vectorization */
2228e0c1b49fSNick Terrell FORCE_INLINE_TEMPLATE void
ZSTD_reduceTable_internal(U32 * const table,U32 const size,U32 const reducerValue,int const preserveMark)2229e0c1b49fSNick Terrell ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
2230e0c1b49fSNick Terrell {
2231e0c1b49fSNick Terrell int const nbRows = (int)size / ZSTD_ROWSIZE;
2232e0c1b49fSNick Terrell int cellNb = 0;
2233e0c1b49fSNick Terrell int rowNb;
2234*2aa14b1aSNick Terrell /* Protect special index values < ZSTD_WINDOW_START_INDEX. */
2235*2aa14b1aSNick Terrell U32 const reducerThreshold = reducerValue + ZSTD_WINDOW_START_INDEX;
2236e0c1b49fSNick Terrell assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
2237e0c1b49fSNick Terrell assert(size < (1U<<31)); /* can be casted to int */
2238e0c1b49fSNick Terrell
2239e0c1b49fSNick Terrell
2240e0c1b49fSNick Terrell for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
2241e0c1b49fSNick Terrell int column;
2242e0c1b49fSNick Terrell for (column=0; column<ZSTD_ROWSIZE; column++) {
2243*2aa14b1aSNick Terrell U32 newVal;
2244*2aa14b1aSNick Terrell if (preserveMark && table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) {
2245*2aa14b1aSNick Terrell /* This write is pointless, but is required(?) for the compiler
2246*2aa14b1aSNick Terrell * to auto-vectorize the loop. */
2247*2aa14b1aSNick Terrell newVal = ZSTD_DUBT_UNSORTED_MARK;
2248*2aa14b1aSNick Terrell } else if (table[cellNb] < reducerThreshold) {
2249*2aa14b1aSNick Terrell newVal = 0;
2250*2aa14b1aSNick Terrell } else {
2251*2aa14b1aSNick Terrell newVal = table[cellNb] - reducerValue;
2252e0c1b49fSNick Terrell }
2253*2aa14b1aSNick Terrell table[cellNb] = newVal;
2254e0c1b49fSNick Terrell cellNb++;
2255e0c1b49fSNick Terrell } }
2256e0c1b49fSNick Terrell }
2257e0c1b49fSNick Terrell
ZSTD_reduceTable(U32 * const table,U32 const size,U32 const reducerValue)2258e0c1b49fSNick Terrell static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
2259e0c1b49fSNick Terrell {
2260e0c1b49fSNick Terrell ZSTD_reduceTable_internal(table, size, reducerValue, 0);
2261e0c1b49fSNick Terrell }
2262e0c1b49fSNick Terrell
ZSTD_reduceTable_btlazy2(U32 * const table,U32 const size,U32 const reducerValue)2263e0c1b49fSNick Terrell static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
2264e0c1b49fSNick Terrell {
2265e0c1b49fSNick Terrell ZSTD_reduceTable_internal(table, size, reducerValue, 1);
2266e0c1b49fSNick Terrell }
2267e0c1b49fSNick Terrell
2268e0c1b49fSNick Terrell /*! ZSTD_reduceIndex() :
2269e0c1b49fSNick Terrell * rescale all indexes to avoid future overflow (indexes are U32) */
ZSTD_reduceIndex(ZSTD_matchState_t * ms,ZSTD_CCtx_params const * params,const U32 reducerValue)2270e0c1b49fSNick Terrell static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
2271e0c1b49fSNick Terrell {
2272e0c1b49fSNick Terrell { U32 const hSize = (U32)1 << params->cParams.hashLog;
2273e0c1b49fSNick Terrell ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
2274e0c1b49fSNick Terrell }
2275e0c1b49fSNick Terrell
2276*2aa14b1aSNick Terrell if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) {
2277e0c1b49fSNick Terrell U32 const chainSize = (U32)1 << params->cParams.chainLog;
2278e0c1b49fSNick Terrell if (params->cParams.strategy == ZSTD_btlazy2)
2279e0c1b49fSNick Terrell ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
2280e0c1b49fSNick Terrell else
2281e0c1b49fSNick Terrell ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
2282e0c1b49fSNick Terrell }
2283e0c1b49fSNick Terrell
2284e0c1b49fSNick Terrell if (ms->hashLog3) {
2285e0c1b49fSNick Terrell U32 const h3Size = (U32)1 << ms->hashLog3;
2286e0c1b49fSNick Terrell ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);
2287e0c1b49fSNick Terrell }
2288e0c1b49fSNick Terrell }
2289e0c1b49fSNick Terrell
2290e0c1b49fSNick Terrell
2291e0c1b49fSNick Terrell /*-*******************************************************
2292e0c1b49fSNick Terrell * Block entropic compression
2293e0c1b49fSNick Terrell *********************************************************/
2294e0c1b49fSNick Terrell
2295e0c1b49fSNick Terrell /* See doc/zstd_compression_format.md for detailed format description */
2296e0c1b49fSNick Terrell
ZSTD_seqToCodes(const seqStore_t * seqStorePtr)2297e0c1b49fSNick Terrell void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
2298e0c1b49fSNick Terrell {
2299e0c1b49fSNick Terrell const seqDef* const sequences = seqStorePtr->sequencesStart;
2300e0c1b49fSNick Terrell BYTE* const llCodeTable = seqStorePtr->llCode;
2301e0c1b49fSNick Terrell BYTE* const ofCodeTable = seqStorePtr->ofCode;
2302e0c1b49fSNick Terrell BYTE* const mlCodeTable = seqStorePtr->mlCode;
2303e0c1b49fSNick Terrell U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
2304e0c1b49fSNick Terrell U32 u;
2305e0c1b49fSNick Terrell assert(nbSeq <= seqStorePtr->maxNbSeq);
2306e0c1b49fSNick Terrell for (u=0; u<nbSeq; u++) {
2307e0c1b49fSNick Terrell U32 const llv = sequences[u].litLength;
2308*2aa14b1aSNick Terrell U32 const mlv = sequences[u].mlBase;
2309e0c1b49fSNick Terrell llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
2310*2aa14b1aSNick Terrell ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offBase);
2311e0c1b49fSNick Terrell mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
2312e0c1b49fSNick Terrell }
2313*2aa14b1aSNick Terrell if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
2314e0c1b49fSNick Terrell llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
2315*2aa14b1aSNick Terrell if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)
2316e0c1b49fSNick Terrell mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
2317e0c1b49fSNick Terrell }
2318e0c1b49fSNick Terrell
2319e0c1b49fSNick Terrell /* ZSTD_useTargetCBlockSize():
2320e0c1b49fSNick Terrell * Returns if target compressed block size param is being used.
2321e0c1b49fSNick Terrell * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
2322e0c1b49fSNick Terrell * Returns 1 if true, 0 otherwise. */
ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params * cctxParams)2323e0c1b49fSNick Terrell static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
2324e0c1b49fSNick Terrell {
2325e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize);
2326e0c1b49fSNick Terrell return (cctxParams->targetCBlockSize != 0);
2327e0c1b49fSNick Terrell }
2328e0c1b49fSNick Terrell
2329*2aa14b1aSNick Terrell /* ZSTD_blockSplitterEnabled():
2330*2aa14b1aSNick Terrell * Returns if block splitting param is being used
2331*2aa14b1aSNick Terrell * If used, compression will do best effort to split a block in order to improve compression ratio.
2332*2aa14b1aSNick Terrell * At the time this function is called, the parameter must be finalized.
2333*2aa14b1aSNick Terrell * Returns 1 if true, 0 otherwise. */
ZSTD_blockSplitterEnabled(ZSTD_CCtx_params * cctxParams)2334*2aa14b1aSNick Terrell static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams)
2335*2aa14b1aSNick Terrell {
2336*2aa14b1aSNick Terrell DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams->useBlockSplitter);
2337*2aa14b1aSNick Terrell assert(cctxParams->useBlockSplitter != ZSTD_ps_auto);
2338*2aa14b1aSNick Terrell return (cctxParams->useBlockSplitter == ZSTD_ps_enable);
2339*2aa14b1aSNick Terrell }
2340*2aa14b1aSNick Terrell
2341*2aa14b1aSNick Terrell /* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types
2342*2aa14b1aSNick Terrell * and size of the sequences statistics
2343*2aa14b1aSNick Terrell */
2344*2aa14b1aSNick Terrell typedef struct {
2345*2aa14b1aSNick Terrell U32 LLtype;
2346*2aa14b1aSNick Terrell U32 Offtype;
2347*2aa14b1aSNick Terrell U32 MLtype;
2348*2aa14b1aSNick Terrell size_t size;
2349*2aa14b1aSNick Terrell size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
2350*2aa14b1aSNick Terrell } ZSTD_symbolEncodingTypeStats_t;
2351*2aa14b1aSNick Terrell
2352*2aa14b1aSNick Terrell /* ZSTD_buildSequencesStatistics():
2353*2aa14b1aSNick Terrell * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field.
2354*2aa14b1aSNick Terrell * Modifies `nextEntropy` to have the appropriate values as a side effect.
2355*2aa14b1aSNick Terrell * nbSeq must be greater than 0.
2356*2aa14b1aSNick Terrell *
2357*2aa14b1aSNick Terrell * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
2358*2aa14b1aSNick Terrell */
2359*2aa14b1aSNick Terrell static ZSTD_symbolEncodingTypeStats_t
ZSTD_buildSequencesStatistics(seqStore_t * seqStorePtr,size_t nbSeq,const ZSTD_fseCTables_t * prevEntropy,ZSTD_fseCTables_t * nextEntropy,BYTE * dst,const BYTE * const dstEnd,ZSTD_strategy strategy,unsigned * countWorkspace,void * entropyWorkspace,size_t entropyWkspSize)2360*2aa14b1aSNick Terrell ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
2361*2aa14b1aSNick Terrell const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
2362*2aa14b1aSNick Terrell BYTE* dst, const BYTE* const dstEnd,
2363*2aa14b1aSNick Terrell ZSTD_strategy strategy, unsigned* countWorkspace,
2364*2aa14b1aSNick Terrell void* entropyWorkspace, size_t entropyWkspSize) {
2365*2aa14b1aSNick Terrell BYTE* const ostart = dst;
2366*2aa14b1aSNick Terrell const BYTE* const oend = dstEnd;
2367*2aa14b1aSNick Terrell BYTE* op = ostart;
2368*2aa14b1aSNick Terrell FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
2369*2aa14b1aSNick Terrell FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
2370*2aa14b1aSNick Terrell FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
2371*2aa14b1aSNick Terrell const BYTE* const ofCodeTable = seqStorePtr->ofCode;
2372*2aa14b1aSNick Terrell const BYTE* const llCodeTable = seqStorePtr->llCode;
2373*2aa14b1aSNick Terrell const BYTE* const mlCodeTable = seqStorePtr->mlCode;
2374*2aa14b1aSNick Terrell ZSTD_symbolEncodingTypeStats_t stats;
2375*2aa14b1aSNick Terrell
2376*2aa14b1aSNick Terrell stats.lastCountSize = 0;
2377*2aa14b1aSNick Terrell /* convert length/distances into codes */
2378*2aa14b1aSNick Terrell ZSTD_seqToCodes(seqStorePtr);
2379*2aa14b1aSNick Terrell assert(op <= oend);
2380*2aa14b1aSNick Terrell assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */
2381*2aa14b1aSNick Terrell /* build CTable for Literal Lengths */
2382*2aa14b1aSNick Terrell { unsigned max = MaxLL;
2383*2aa14b1aSNick Terrell size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2384*2aa14b1aSNick Terrell DEBUGLOG(5, "Building LL table");
2385*2aa14b1aSNick Terrell nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
2386*2aa14b1aSNick Terrell stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
2387*2aa14b1aSNick Terrell countWorkspace, max, mostFrequent, nbSeq,
2388*2aa14b1aSNick Terrell LLFSELog, prevEntropy->litlengthCTable,
2389*2aa14b1aSNick Terrell LL_defaultNorm, LL_defaultNormLog,
2390*2aa14b1aSNick Terrell ZSTD_defaultAllowed, strategy);
2391*2aa14b1aSNick Terrell assert(set_basic < set_compressed && set_rle < set_compressed);
2392*2aa14b1aSNick Terrell assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2393*2aa14b1aSNick Terrell { size_t const countSize = ZSTD_buildCTable(
2394*2aa14b1aSNick Terrell op, (size_t)(oend - op),
2395*2aa14b1aSNick Terrell CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype,
2396*2aa14b1aSNick Terrell countWorkspace, max, llCodeTable, nbSeq,
2397*2aa14b1aSNick Terrell LL_defaultNorm, LL_defaultNormLog, MaxLL,
2398*2aa14b1aSNick Terrell prevEntropy->litlengthCTable,
2399*2aa14b1aSNick Terrell sizeof(prevEntropy->litlengthCTable),
2400*2aa14b1aSNick Terrell entropyWorkspace, entropyWkspSize);
2401*2aa14b1aSNick Terrell if (ZSTD_isError(countSize)) {
2402*2aa14b1aSNick Terrell DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed");
2403*2aa14b1aSNick Terrell stats.size = countSize;
2404*2aa14b1aSNick Terrell return stats;
2405*2aa14b1aSNick Terrell }
2406*2aa14b1aSNick Terrell if (stats.LLtype == set_compressed)
2407*2aa14b1aSNick Terrell stats.lastCountSize = countSize;
2408*2aa14b1aSNick Terrell op += countSize;
2409*2aa14b1aSNick Terrell assert(op <= oend);
2410*2aa14b1aSNick Terrell } }
2411*2aa14b1aSNick Terrell /* build CTable for Offsets */
2412*2aa14b1aSNick Terrell { unsigned max = MaxOff;
2413*2aa14b1aSNick Terrell size_t const mostFrequent = HIST_countFast_wksp(
2414*2aa14b1aSNick Terrell countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2415*2aa14b1aSNick Terrell /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
2416*2aa14b1aSNick Terrell ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
2417*2aa14b1aSNick Terrell DEBUGLOG(5, "Building OF table");
2418*2aa14b1aSNick Terrell nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
2419*2aa14b1aSNick Terrell stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
2420*2aa14b1aSNick Terrell countWorkspace, max, mostFrequent, nbSeq,
2421*2aa14b1aSNick Terrell OffFSELog, prevEntropy->offcodeCTable,
2422*2aa14b1aSNick Terrell OF_defaultNorm, OF_defaultNormLog,
2423*2aa14b1aSNick Terrell defaultPolicy, strategy);
2424*2aa14b1aSNick Terrell assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2425*2aa14b1aSNick Terrell { size_t const countSize = ZSTD_buildCTable(
2426*2aa14b1aSNick Terrell op, (size_t)(oend - op),
2427*2aa14b1aSNick Terrell CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype,
2428*2aa14b1aSNick Terrell countWorkspace, max, ofCodeTable, nbSeq,
2429*2aa14b1aSNick Terrell OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
2430*2aa14b1aSNick Terrell prevEntropy->offcodeCTable,
2431*2aa14b1aSNick Terrell sizeof(prevEntropy->offcodeCTable),
2432*2aa14b1aSNick Terrell entropyWorkspace, entropyWkspSize);
2433*2aa14b1aSNick Terrell if (ZSTD_isError(countSize)) {
2434*2aa14b1aSNick Terrell DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed");
2435*2aa14b1aSNick Terrell stats.size = countSize;
2436*2aa14b1aSNick Terrell return stats;
2437*2aa14b1aSNick Terrell }
2438*2aa14b1aSNick Terrell if (stats.Offtype == set_compressed)
2439*2aa14b1aSNick Terrell stats.lastCountSize = countSize;
2440*2aa14b1aSNick Terrell op += countSize;
2441*2aa14b1aSNick Terrell assert(op <= oend);
2442*2aa14b1aSNick Terrell } }
2443*2aa14b1aSNick Terrell /* build CTable for MatchLengths */
2444*2aa14b1aSNick Terrell { unsigned max = MaxML;
2445*2aa14b1aSNick Terrell size_t const mostFrequent = HIST_countFast_wksp(
2446*2aa14b1aSNick Terrell countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2447*2aa14b1aSNick Terrell DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
2448*2aa14b1aSNick Terrell nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
2449*2aa14b1aSNick Terrell stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
2450*2aa14b1aSNick Terrell countWorkspace, max, mostFrequent, nbSeq,
2451*2aa14b1aSNick Terrell MLFSELog, prevEntropy->matchlengthCTable,
2452*2aa14b1aSNick Terrell ML_defaultNorm, ML_defaultNormLog,
2453*2aa14b1aSNick Terrell ZSTD_defaultAllowed, strategy);
2454*2aa14b1aSNick Terrell assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2455*2aa14b1aSNick Terrell { size_t const countSize = ZSTD_buildCTable(
2456*2aa14b1aSNick Terrell op, (size_t)(oend - op),
2457*2aa14b1aSNick Terrell CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype,
2458*2aa14b1aSNick Terrell countWorkspace, max, mlCodeTable, nbSeq,
2459*2aa14b1aSNick Terrell ML_defaultNorm, ML_defaultNormLog, MaxML,
2460*2aa14b1aSNick Terrell prevEntropy->matchlengthCTable,
2461*2aa14b1aSNick Terrell sizeof(prevEntropy->matchlengthCTable),
2462*2aa14b1aSNick Terrell entropyWorkspace, entropyWkspSize);
2463*2aa14b1aSNick Terrell if (ZSTD_isError(countSize)) {
2464*2aa14b1aSNick Terrell DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed");
2465*2aa14b1aSNick Terrell stats.size = countSize;
2466*2aa14b1aSNick Terrell return stats;
2467*2aa14b1aSNick Terrell }
2468*2aa14b1aSNick Terrell if (stats.MLtype == set_compressed)
2469*2aa14b1aSNick Terrell stats.lastCountSize = countSize;
2470*2aa14b1aSNick Terrell op += countSize;
2471*2aa14b1aSNick Terrell assert(op <= oend);
2472*2aa14b1aSNick Terrell } }
2473*2aa14b1aSNick Terrell stats.size = (size_t)(op-ostart);
2474*2aa14b1aSNick Terrell return stats;
2475*2aa14b1aSNick Terrell }
2476*2aa14b1aSNick Terrell
2477*2aa14b1aSNick Terrell /* ZSTD_entropyCompressSeqStore_internal():
2478*2aa14b1aSNick Terrell * compresses both literals and sequences
2479*2aa14b1aSNick Terrell * Returns compressed size of block, or a zstd error.
2480*2aa14b1aSNick Terrell */
2481*2aa14b1aSNick Terrell #define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20
2482e0c1b49fSNick Terrell MEM_STATIC size_t
ZSTD_entropyCompressSeqStore_internal(seqStore_t * seqStorePtr,const ZSTD_entropyCTables_t * prevEntropy,ZSTD_entropyCTables_t * nextEntropy,const ZSTD_CCtx_params * cctxParams,void * dst,size_t dstCapacity,void * entropyWorkspace,size_t entropyWkspSize,const int bmi2)2483*2aa14b1aSNick Terrell ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
2484e0c1b49fSNick Terrell const ZSTD_entropyCTables_t* prevEntropy,
2485e0c1b49fSNick Terrell ZSTD_entropyCTables_t* nextEntropy,
2486e0c1b49fSNick Terrell const ZSTD_CCtx_params* cctxParams,
2487e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
2488e0c1b49fSNick Terrell void* entropyWorkspace, size_t entropyWkspSize,
2489e0c1b49fSNick Terrell const int bmi2)
2490e0c1b49fSNick Terrell {
2491e0c1b49fSNick Terrell const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
2492e0c1b49fSNick Terrell ZSTD_strategy const strategy = cctxParams->cParams.strategy;
2493e0c1b49fSNick Terrell unsigned* count = (unsigned*)entropyWorkspace;
2494e0c1b49fSNick Terrell FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
2495e0c1b49fSNick Terrell FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
2496e0c1b49fSNick Terrell FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
2497e0c1b49fSNick Terrell const seqDef* const sequences = seqStorePtr->sequencesStart;
2498*2aa14b1aSNick Terrell const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
2499e0c1b49fSNick Terrell const BYTE* const ofCodeTable = seqStorePtr->ofCode;
2500e0c1b49fSNick Terrell const BYTE* const llCodeTable = seqStorePtr->llCode;
2501e0c1b49fSNick Terrell const BYTE* const mlCodeTable = seqStorePtr->mlCode;
2502e0c1b49fSNick Terrell BYTE* const ostart = (BYTE*)dst;
2503e0c1b49fSNick Terrell BYTE* const oend = ostart + dstCapacity;
2504e0c1b49fSNick Terrell BYTE* op = ostart;
2505*2aa14b1aSNick Terrell size_t lastCountSize;
2506e0c1b49fSNick Terrell
2507e0c1b49fSNick Terrell entropyWorkspace = count + (MaxSeq + 1);
2508e0c1b49fSNick Terrell entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
2509e0c1b49fSNick Terrell
2510*2aa14b1aSNick Terrell DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq);
2511e0c1b49fSNick Terrell ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
2512e0c1b49fSNick Terrell assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
2513e0c1b49fSNick Terrell
2514e0c1b49fSNick Terrell /* Compress literals */
2515e0c1b49fSNick Terrell { const BYTE* const literals = seqStorePtr->litStart;
2516*2aa14b1aSNick Terrell size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart;
2517*2aa14b1aSNick Terrell size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart;
2518*2aa14b1aSNick Terrell /* Base suspicion of uncompressibility on ratio of literals to sequences */
2519*2aa14b1aSNick Terrell unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO);
2520e0c1b49fSNick Terrell size_t const litSize = (size_t)(seqStorePtr->lit - literals);
2521e0c1b49fSNick Terrell size_t const cSize = ZSTD_compressLiterals(
2522e0c1b49fSNick Terrell &prevEntropy->huf, &nextEntropy->huf,
2523e0c1b49fSNick Terrell cctxParams->cParams.strategy,
2524*2aa14b1aSNick Terrell ZSTD_literalsCompressionIsDisabled(cctxParams),
2525e0c1b49fSNick Terrell op, dstCapacity,
2526e0c1b49fSNick Terrell literals, litSize,
2527e0c1b49fSNick Terrell entropyWorkspace, entropyWkspSize,
2528*2aa14b1aSNick Terrell bmi2, suspectUncompressible);
2529e0c1b49fSNick Terrell FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
2530e0c1b49fSNick Terrell assert(cSize <= dstCapacity);
2531e0c1b49fSNick Terrell op += cSize;
2532e0c1b49fSNick Terrell }
2533e0c1b49fSNick Terrell
2534e0c1b49fSNick Terrell /* Sequences Header */
2535e0c1b49fSNick Terrell RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
2536e0c1b49fSNick Terrell dstSize_tooSmall, "Can't fit seq hdr in output buf!");
2537e0c1b49fSNick Terrell if (nbSeq < 128) {
2538e0c1b49fSNick Terrell *op++ = (BYTE)nbSeq;
2539e0c1b49fSNick Terrell } else if (nbSeq < LONGNBSEQ) {
2540e0c1b49fSNick Terrell op[0] = (BYTE)((nbSeq>>8) + 0x80);
2541e0c1b49fSNick Terrell op[1] = (BYTE)nbSeq;
2542e0c1b49fSNick Terrell op+=2;
2543e0c1b49fSNick Terrell } else {
2544e0c1b49fSNick Terrell op[0]=0xFF;
2545e0c1b49fSNick Terrell MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
2546e0c1b49fSNick Terrell op+=3;
2547e0c1b49fSNick Terrell }
2548e0c1b49fSNick Terrell assert(op <= oend);
2549e0c1b49fSNick Terrell if (nbSeq==0) {
2550e0c1b49fSNick Terrell /* Copy the old tables over as if we repeated them */
2551e0c1b49fSNick Terrell ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
2552e0c1b49fSNick Terrell return (size_t)(op - ostart);
2553e0c1b49fSNick Terrell }
2554*2aa14b1aSNick Terrell {
2555*2aa14b1aSNick Terrell ZSTD_symbolEncodingTypeStats_t stats;
2556*2aa14b1aSNick Terrell BYTE* seqHead = op++;
2557*2aa14b1aSNick Terrell /* build stats for sequences */
2558*2aa14b1aSNick Terrell stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
2559*2aa14b1aSNick Terrell &prevEntropy->fse, &nextEntropy->fse,
2560*2aa14b1aSNick Terrell op, oend,
2561*2aa14b1aSNick Terrell strategy, count,
2562e0c1b49fSNick Terrell entropyWorkspace, entropyWkspSize);
2563*2aa14b1aSNick Terrell FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
2564*2aa14b1aSNick Terrell *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));
2565*2aa14b1aSNick Terrell lastCountSize = stats.lastCountSize;
2566*2aa14b1aSNick Terrell op += stats.size;
2567*2aa14b1aSNick Terrell }
2568e0c1b49fSNick Terrell
2569e0c1b49fSNick Terrell { size_t const bitstreamSize = ZSTD_encodeSequences(
2570e0c1b49fSNick Terrell op, (size_t)(oend - op),
2571e0c1b49fSNick Terrell CTable_MatchLength, mlCodeTable,
2572e0c1b49fSNick Terrell CTable_OffsetBits, ofCodeTable,
2573e0c1b49fSNick Terrell CTable_LitLength, llCodeTable,
2574e0c1b49fSNick Terrell sequences, nbSeq,
2575e0c1b49fSNick Terrell longOffsets, bmi2);
2576e0c1b49fSNick Terrell FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
2577e0c1b49fSNick Terrell op += bitstreamSize;
2578e0c1b49fSNick Terrell assert(op <= oend);
2579e0c1b49fSNick Terrell /* zstd versions <= 1.3.4 mistakenly report corruption when
2580e0c1b49fSNick Terrell * FSE_readNCount() receives a buffer < 4 bytes.
2581e0c1b49fSNick Terrell * Fixed by https://github.com/facebook/zstd/pull/1146.
2582e0c1b49fSNick Terrell * This can happen when the last set_compressed table present is 2
2583e0c1b49fSNick Terrell * bytes and the bitstream is only one byte.
2584e0c1b49fSNick Terrell * In this exceedingly rare case, we will simply emit an uncompressed
2585e0c1b49fSNick Terrell * block, since it isn't worth optimizing.
2586e0c1b49fSNick Terrell */
2587*2aa14b1aSNick Terrell if (lastCountSize && (lastCountSize + bitstreamSize) < 4) {
2588*2aa14b1aSNick Terrell /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
2589*2aa14b1aSNick Terrell assert(lastCountSize + bitstreamSize == 3);
2590e0c1b49fSNick Terrell DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
2591e0c1b49fSNick Terrell "emitting an uncompressed block.");
2592e0c1b49fSNick Terrell return 0;
2593e0c1b49fSNick Terrell }
2594e0c1b49fSNick Terrell }
2595e0c1b49fSNick Terrell
2596e0c1b49fSNick Terrell DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
2597e0c1b49fSNick Terrell return (size_t)(op - ostart);
2598e0c1b49fSNick Terrell }
2599e0c1b49fSNick Terrell
2600e0c1b49fSNick Terrell MEM_STATIC size_t
ZSTD_entropyCompressSeqStore(seqStore_t * seqStorePtr,const ZSTD_entropyCTables_t * prevEntropy,ZSTD_entropyCTables_t * nextEntropy,const ZSTD_CCtx_params * cctxParams,void * dst,size_t dstCapacity,size_t srcSize,void * entropyWorkspace,size_t entropyWkspSize,int bmi2)2601*2aa14b1aSNick Terrell ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
2602e0c1b49fSNick Terrell const ZSTD_entropyCTables_t* prevEntropy,
2603e0c1b49fSNick Terrell ZSTD_entropyCTables_t* nextEntropy,
2604e0c1b49fSNick Terrell const ZSTD_CCtx_params* cctxParams,
2605e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
2606e0c1b49fSNick Terrell size_t srcSize,
2607e0c1b49fSNick Terrell void* entropyWorkspace, size_t entropyWkspSize,
2608e0c1b49fSNick Terrell int bmi2)
2609e0c1b49fSNick Terrell {
2610*2aa14b1aSNick Terrell size_t const cSize = ZSTD_entropyCompressSeqStore_internal(
2611e0c1b49fSNick Terrell seqStorePtr, prevEntropy, nextEntropy, cctxParams,
2612e0c1b49fSNick Terrell dst, dstCapacity,
2613e0c1b49fSNick Terrell entropyWorkspace, entropyWkspSize, bmi2);
2614e0c1b49fSNick Terrell if (cSize == 0) return 0;
2615e0c1b49fSNick Terrell /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
2616e0c1b49fSNick Terrell * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
2617e0c1b49fSNick Terrell */
2618e0c1b49fSNick Terrell if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
2619e0c1b49fSNick Terrell return 0; /* block not compressed */
2620*2aa14b1aSNick Terrell FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");
2621e0c1b49fSNick Terrell
2622e0c1b49fSNick Terrell /* Check compressibility */
2623e0c1b49fSNick Terrell { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
2624e0c1b49fSNick Terrell if (cSize >= maxCSize) return 0; /* block not compressed */
2625e0c1b49fSNick Terrell }
2626*2aa14b1aSNick Terrell DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
2627e0c1b49fSNick Terrell return cSize;
2628e0c1b49fSNick Terrell }
2629e0c1b49fSNick Terrell
2630e0c1b49fSNick Terrell /* ZSTD_selectBlockCompressor() :
2631e0c1b49fSNick Terrell * Not static, but internal use only (used by long distance matcher)
2632e0c1b49fSNick Terrell * assumption : strat is a valid strategy */
ZSTD_selectBlockCompressor(ZSTD_strategy strat,ZSTD_paramSwitch_e useRowMatchFinder,ZSTD_dictMode_e dictMode)2633*2aa14b1aSNick Terrell ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode)
2634e0c1b49fSNick Terrell {
2635e0c1b49fSNick Terrell static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
2636e0c1b49fSNick Terrell { ZSTD_compressBlock_fast /* default for 0 */,
2637e0c1b49fSNick Terrell ZSTD_compressBlock_fast,
2638e0c1b49fSNick Terrell ZSTD_compressBlock_doubleFast,
2639e0c1b49fSNick Terrell ZSTD_compressBlock_greedy,
2640e0c1b49fSNick Terrell ZSTD_compressBlock_lazy,
2641e0c1b49fSNick Terrell ZSTD_compressBlock_lazy2,
2642e0c1b49fSNick Terrell ZSTD_compressBlock_btlazy2,
2643e0c1b49fSNick Terrell ZSTD_compressBlock_btopt,
2644e0c1b49fSNick Terrell ZSTD_compressBlock_btultra,
2645e0c1b49fSNick Terrell ZSTD_compressBlock_btultra2 },
2646e0c1b49fSNick Terrell { ZSTD_compressBlock_fast_extDict /* default for 0 */,
2647e0c1b49fSNick Terrell ZSTD_compressBlock_fast_extDict,
2648e0c1b49fSNick Terrell ZSTD_compressBlock_doubleFast_extDict,
2649e0c1b49fSNick Terrell ZSTD_compressBlock_greedy_extDict,
2650e0c1b49fSNick Terrell ZSTD_compressBlock_lazy_extDict,
2651e0c1b49fSNick Terrell ZSTD_compressBlock_lazy2_extDict,
2652e0c1b49fSNick Terrell ZSTD_compressBlock_btlazy2_extDict,
2653e0c1b49fSNick Terrell ZSTD_compressBlock_btopt_extDict,
2654e0c1b49fSNick Terrell ZSTD_compressBlock_btultra_extDict,
2655e0c1b49fSNick Terrell ZSTD_compressBlock_btultra_extDict },
2656e0c1b49fSNick Terrell { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,
2657e0c1b49fSNick Terrell ZSTD_compressBlock_fast_dictMatchState,
2658e0c1b49fSNick Terrell ZSTD_compressBlock_doubleFast_dictMatchState,
2659e0c1b49fSNick Terrell ZSTD_compressBlock_greedy_dictMatchState,
2660e0c1b49fSNick Terrell ZSTD_compressBlock_lazy_dictMatchState,
2661e0c1b49fSNick Terrell ZSTD_compressBlock_lazy2_dictMatchState,
2662e0c1b49fSNick Terrell ZSTD_compressBlock_btlazy2_dictMatchState,
2663e0c1b49fSNick Terrell ZSTD_compressBlock_btopt_dictMatchState,
2664e0c1b49fSNick Terrell ZSTD_compressBlock_btultra_dictMatchState,
2665e0c1b49fSNick Terrell ZSTD_compressBlock_btultra_dictMatchState },
2666e0c1b49fSNick Terrell { NULL /* default for 0 */,
2667e0c1b49fSNick Terrell NULL,
2668e0c1b49fSNick Terrell NULL,
2669e0c1b49fSNick Terrell ZSTD_compressBlock_greedy_dedicatedDictSearch,
2670e0c1b49fSNick Terrell ZSTD_compressBlock_lazy_dedicatedDictSearch,
2671e0c1b49fSNick Terrell ZSTD_compressBlock_lazy2_dedicatedDictSearch,
2672e0c1b49fSNick Terrell NULL,
2673e0c1b49fSNick Terrell NULL,
2674e0c1b49fSNick Terrell NULL,
2675e0c1b49fSNick Terrell NULL }
2676e0c1b49fSNick Terrell };
2677e0c1b49fSNick Terrell ZSTD_blockCompressor selectedCompressor;
2678e0c1b49fSNick Terrell ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
2679e0c1b49fSNick Terrell
2680e0c1b49fSNick Terrell assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
2681*2aa14b1aSNick Terrell DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);
2682*2aa14b1aSNick Terrell if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {
2683*2aa14b1aSNick Terrell static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {
2684*2aa14b1aSNick Terrell { ZSTD_compressBlock_greedy_row,
2685*2aa14b1aSNick Terrell ZSTD_compressBlock_lazy_row,
2686*2aa14b1aSNick Terrell ZSTD_compressBlock_lazy2_row },
2687*2aa14b1aSNick Terrell { ZSTD_compressBlock_greedy_extDict_row,
2688*2aa14b1aSNick Terrell ZSTD_compressBlock_lazy_extDict_row,
2689*2aa14b1aSNick Terrell ZSTD_compressBlock_lazy2_extDict_row },
2690*2aa14b1aSNick Terrell { ZSTD_compressBlock_greedy_dictMatchState_row,
2691*2aa14b1aSNick Terrell ZSTD_compressBlock_lazy_dictMatchState_row,
2692*2aa14b1aSNick Terrell ZSTD_compressBlock_lazy2_dictMatchState_row },
2693*2aa14b1aSNick Terrell { ZSTD_compressBlock_greedy_dedicatedDictSearch_row,
2694*2aa14b1aSNick Terrell ZSTD_compressBlock_lazy_dedicatedDictSearch_row,
2695*2aa14b1aSNick Terrell ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }
2696*2aa14b1aSNick Terrell };
2697*2aa14b1aSNick Terrell DEBUGLOG(4, "Selecting a row-based matchfinder");
2698*2aa14b1aSNick Terrell assert(useRowMatchFinder != ZSTD_ps_auto);
2699*2aa14b1aSNick Terrell selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy];
2700*2aa14b1aSNick Terrell } else {
2701e0c1b49fSNick Terrell selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
2702*2aa14b1aSNick Terrell }
2703e0c1b49fSNick Terrell assert(selectedCompressor != NULL);
2704e0c1b49fSNick Terrell return selectedCompressor;
2705e0c1b49fSNick Terrell }
2706e0c1b49fSNick Terrell
ZSTD_storeLastLiterals(seqStore_t * seqStorePtr,const BYTE * anchor,size_t lastLLSize)2707e0c1b49fSNick Terrell static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
2708e0c1b49fSNick Terrell const BYTE* anchor, size_t lastLLSize)
2709e0c1b49fSNick Terrell {
2710e0c1b49fSNick Terrell ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize);
2711e0c1b49fSNick Terrell seqStorePtr->lit += lastLLSize;
2712e0c1b49fSNick Terrell }
2713e0c1b49fSNick Terrell
ZSTD_resetSeqStore(seqStore_t * ssPtr)2714e0c1b49fSNick Terrell void ZSTD_resetSeqStore(seqStore_t* ssPtr)
2715e0c1b49fSNick Terrell {
2716e0c1b49fSNick Terrell ssPtr->lit = ssPtr->litStart;
2717e0c1b49fSNick Terrell ssPtr->sequences = ssPtr->sequencesStart;
2718*2aa14b1aSNick Terrell ssPtr->longLengthType = ZSTD_llt_none;
2719e0c1b49fSNick Terrell }
2720e0c1b49fSNick Terrell
2721e0c1b49fSNick Terrell typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
2722e0c1b49fSNick Terrell
ZSTD_buildSeqStore(ZSTD_CCtx * zc,const void * src,size_t srcSize)2723e0c1b49fSNick Terrell static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
2724e0c1b49fSNick Terrell {
2725e0c1b49fSNick Terrell ZSTD_matchState_t* const ms = &zc->blockState.matchState;
2726e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
2727e0c1b49fSNick Terrell assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
2728e0c1b49fSNick Terrell /* Assert that we have correctly flushed the ctx params into the ms's copy */
2729e0c1b49fSNick Terrell ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
2730e0c1b49fSNick Terrell if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
2731e0c1b49fSNick Terrell if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {
2732e0c1b49fSNick Terrell ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);
2733e0c1b49fSNick Terrell } else {
2734e0c1b49fSNick Terrell ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
2735e0c1b49fSNick Terrell }
2736e0c1b49fSNick Terrell return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
2737e0c1b49fSNick Terrell }
2738e0c1b49fSNick Terrell ZSTD_resetSeqStore(&(zc->seqStore));
2739e0c1b49fSNick Terrell /* required for optimal parser to read stats from dictionary */
2740e0c1b49fSNick Terrell ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
2741e0c1b49fSNick Terrell /* tell the optimal parser how we expect to compress literals */
2742e0c1b49fSNick Terrell ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
2743e0c1b49fSNick Terrell /* a gap between an attached dict and the current window is not safe,
2744e0c1b49fSNick Terrell * they must remain adjacent,
2745e0c1b49fSNick Terrell * and when that stops being the case, the dict must be unset */
2746e0c1b49fSNick Terrell assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
2747e0c1b49fSNick Terrell
2748e0c1b49fSNick Terrell /* limited update after a very long match */
2749e0c1b49fSNick Terrell { const BYTE* const base = ms->window.base;
2750e0c1b49fSNick Terrell const BYTE* const istart = (const BYTE*)src;
2751e0c1b49fSNick Terrell const U32 curr = (U32)(istart-base);
2752e0c1b49fSNick Terrell if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */
2753e0c1b49fSNick Terrell if (curr > ms->nextToUpdate + 384)
2754e0c1b49fSNick Terrell ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384));
2755e0c1b49fSNick Terrell }
2756e0c1b49fSNick Terrell
2757e0c1b49fSNick Terrell /* select and store sequences */
2758e0c1b49fSNick Terrell { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
2759e0c1b49fSNick Terrell size_t lastLLSize;
2760e0c1b49fSNick Terrell { int i;
2761e0c1b49fSNick Terrell for (i = 0; i < ZSTD_REP_NUM; ++i)
2762e0c1b49fSNick Terrell zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
2763e0c1b49fSNick Terrell }
2764e0c1b49fSNick Terrell if (zc->externSeqStore.pos < zc->externSeqStore.size) {
2765*2aa14b1aSNick Terrell assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable);
2766e0c1b49fSNick Terrell /* Updates ldmSeqStore.pos */
2767e0c1b49fSNick Terrell lastLLSize =
2768e0c1b49fSNick Terrell ZSTD_ldm_blockCompress(&zc->externSeqStore,
2769e0c1b49fSNick Terrell ms, &zc->seqStore,
2770e0c1b49fSNick Terrell zc->blockState.nextCBlock->rep,
2771*2aa14b1aSNick Terrell zc->appliedParams.useRowMatchFinder,
2772e0c1b49fSNick Terrell src, srcSize);
2773e0c1b49fSNick Terrell assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
2774*2aa14b1aSNick Terrell } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
2775e0c1b49fSNick Terrell rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
2776e0c1b49fSNick Terrell
2777e0c1b49fSNick Terrell ldmSeqStore.seq = zc->ldmSequences;
2778e0c1b49fSNick Terrell ldmSeqStore.capacity = zc->maxNbLdmSequences;
2779e0c1b49fSNick Terrell /* Updates ldmSeqStore.size */
2780e0c1b49fSNick Terrell FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
2781e0c1b49fSNick Terrell &zc->appliedParams.ldmParams,
2782e0c1b49fSNick Terrell src, srcSize), "");
2783e0c1b49fSNick Terrell /* Updates ldmSeqStore.pos */
2784e0c1b49fSNick Terrell lastLLSize =
2785e0c1b49fSNick Terrell ZSTD_ldm_blockCompress(&ldmSeqStore,
2786e0c1b49fSNick Terrell ms, &zc->seqStore,
2787e0c1b49fSNick Terrell zc->blockState.nextCBlock->rep,
2788*2aa14b1aSNick Terrell zc->appliedParams.useRowMatchFinder,
2789e0c1b49fSNick Terrell src, srcSize);
2790e0c1b49fSNick Terrell assert(ldmSeqStore.pos == ldmSeqStore.size);
2791e0c1b49fSNick Terrell } else { /* not long range mode */
2792*2aa14b1aSNick Terrell ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
2793*2aa14b1aSNick Terrell zc->appliedParams.useRowMatchFinder,
2794*2aa14b1aSNick Terrell dictMode);
2795e0c1b49fSNick Terrell ms->ldmSeqStore = NULL;
2796e0c1b49fSNick Terrell lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
2797e0c1b49fSNick Terrell }
2798e0c1b49fSNick Terrell { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
2799e0c1b49fSNick Terrell ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
2800e0c1b49fSNick Terrell } }
2801e0c1b49fSNick Terrell return ZSTDbss_compress;
2802e0c1b49fSNick Terrell }
2803e0c1b49fSNick Terrell
ZSTD_copyBlockSequences(ZSTD_CCtx * zc)2804e0c1b49fSNick Terrell static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
2805e0c1b49fSNick Terrell {
2806e0c1b49fSNick Terrell const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
2807e0c1b49fSNick Terrell const seqDef* seqStoreSeqs = seqStore->sequencesStart;
2808e0c1b49fSNick Terrell size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
2809e0c1b49fSNick Terrell size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
2810e0c1b49fSNick Terrell size_t literalsRead = 0;
2811e0c1b49fSNick Terrell size_t lastLLSize;
2812e0c1b49fSNick Terrell
2813e0c1b49fSNick Terrell ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
2814e0c1b49fSNick Terrell size_t i;
2815e0c1b49fSNick Terrell repcodes_t updatedRepcodes;
2816e0c1b49fSNick Terrell
2817e0c1b49fSNick Terrell assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
2818e0c1b49fSNick Terrell /* Ensure we have enough space for last literals "sequence" */
2819e0c1b49fSNick Terrell assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
2820e0c1b49fSNick Terrell ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
2821e0c1b49fSNick Terrell for (i = 0; i < seqStoreSeqSize; ++i) {
2822*2aa14b1aSNick Terrell U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM;
2823e0c1b49fSNick Terrell outSeqs[i].litLength = seqStoreSeqs[i].litLength;
2824*2aa14b1aSNick Terrell outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH;
2825e0c1b49fSNick Terrell outSeqs[i].rep = 0;
2826e0c1b49fSNick Terrell
2827e0c1b49fSNick Terrell if (i == seqStore->longLengthPos) {
2828*2aa14b1aSNick Terrell if (seqStore->longLengthType == ZSTD_llt_literalLength) {
2829e0c1b49fSNick Terrell outSeqs[i].litLength += 0x10000;
2830*2aa14b1aSNick Terrell } else if (seqStore->longLengthType == ZSTD_llt_matchLength) {
2831e0c1b49fSNick Terrell outSeqs[i].matchLength += 0x10000;
2832e0c1b49fSNick Terrell }
2833e0c1b49fSNick Terrell }
2834e0c1b49fSNick Terrell
2835*2aa14b1aSNick Terrell if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) {
2836e0c1b49fSNick Terrell /* Derive the correct offset corresponding to a repcode */
2837*2aa14b1aSNick Terrell outSeqs[i].rep = seqStoreSeqs[i].offBase;
2838e0c1b49fSNick Terrell if (outSeqs[i].litLength != 0) {
2839e0c1b49fSNick Terrell rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
2840e0c1b49fSNick Terrell } else {
2841e0c1b49fSNick Terrell if (outSeqs[i].rep == 3) {
2842e0c1b49fSNick Terrell rawOffset = updatedRepcodes.rep[0] - 1;
2843e0c1b49fSNick Terrell } else {
2844e0c1b49fSNick Terrell rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
2845e0c1b49fSNick Terrell }
2846e0c1b49fSNick Terrell }
2847e0c1b49fSNick Terrell }
2848e0c1b49fSNick Terrell outSeqs[i].offset = rawOffset;
2849e0c1b49fSNick Terrell /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
2850e0c1b49fSNick Terrell so we provide seqStoreSeqs[i].offset - 1 */
2851*2aa14b1aSNick Terrell ZSTD_updateRep(updatedRepcodes.rep,
2852*2aa14b1aSNick Terrell seqStoreSeqs[i].offBase - 1,
2853e0c1b49fSNick Terrell seqStoreSeqs[i].litLength == 0);
2854e0c1b49fSNick Terrell literalsRead += outSeqs[i].litLength;
2855e0c1b49fSNick Terrell }
2856e0c1b49fSNick Terrell /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
2857e0c1b49fSNick Terrell * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
2858e0c1b49fSNick Terrell * for the block boundary, according to the API.
2859e0c1b49fSNick Terrell */
2860e0c1b49fSNick Terrell assert(seqStoreLiteralsSize >= literalsRead);
2861e0c1b49fSNick Terrell lastLLSize = seqStoreLiteralsSize - literalsRead;
2862e0c1b49fSNick Terrell outSeqs[i].litLength = (U32)lastLLSize;
2863e0c1b49fSNick Terrell outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
2864e0c1b49fSNick Terrell seqStoreSeqSize++;
2865e0c1b49fSNick Terrell zc->seqCollector.seqIndex += seqStoreSeqSize;
2866e0c1b49fSNick Terrell }
2867e0c1b49fSNick Terrell
ZSTD_generateSequences(ZSTD_CCtx * zc,ZSTD_Sequence * outSeqs,size_t outSeqsSize,const void * src,size_t srcSize)2868e0c1b49fSNick Terrell size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
2869e0c1b49fSNick Terrell size_t outSeqsSize, const void* src, size_t srcSize)
2870e0c1b49fSNick Terrell {
2871e0c1b49fSNick Terrell const size_t dstCapacity = ZSTD_compressBound(srcSize);
2872e0c1b49fSNick Terrell void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
2873e0c1b49fSNick Terrell SeqCollector seqCollector;
2874e0c1b49fSNick Terrell
2875e0c1b49fSNick Terrell RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
2876e0c1b49fSNick Terrell
2877e0c1b49fSNick Terrell seqCollector.collectSequences = 1;
2878e0c1b49fSNick Terrell seqCollector.seqStart = outSeqs;
2879e0c1b49fSNick Terrell seqCollector.seqIndex = 0;
2880e0c1b49fSNick Terrell seqCollector.maxSequences = outSeqsSize;
2881e0c1b49fSNick Terrell zc->seqCollector = seqCollector;
2882e0c1b49fSNick Terrell
2883e0c1b49fSNick Terrell ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
2884e0c1b49fSNick Terrell ZSTD_customFree(dst, ZSTD_defaultCMem);
2885e0c1b49fSNick Terrell return zc->seqCollector.seqIndex;
2886e0c1b49fSNick Terrell }
2887e0c1b49fSNick Terrell
ZSTD_mergeBlockDelimiters(ZSTD_Sequence * sequences,size_t seqsSize)2888e0c1b49fSNick Terrell size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) {
2889e0c1b49fSNick Terrell size_t in = 0;
2890e0c1b49fSNick Terrell size_t out = 0;
2891e0c1b49fSNick Terrell for (; in < seqsSize; ++in) {
2892e0c1b49fSNick Terrell if (sequences[in].offset == 0 && sequences[in].matchLength == 0) {
2893e0c1b49fSNick Terrell if (in != seqsSize - 1) {
2894e0c1b49fSNick Terrell sequences[in+1].litLength += sequences[in].litLength;
2895e0c1b49fSNick Terrell }
2896e0c1b49fSNick Terrell } else {
2897e0c1b49fSNick Terrell sequences[out] = sequences[in];
2898e0c1b49fSNick Terrell ++out;
2899e0c1b49fSNick Terrell }
2900e0c1b49fSNick Terrell }
2901e0c1b49fSNick Terrell return out;
2902e0c1b49fSNick Terrell }
2903e0c1b49fSNick Terrell
2904e0c1b49fSNick Terrell /* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */
ZSTD_isRLE(const BYTE * src,size_t length)2905e0c1b49fSNick Terrell static int ZSTD_isRLE(const BYTE* src, size_t length) {
2906e0c1b49fSNick Terrell const BYTE* ip = src;
2907e0c1b49fSNick Terrell const BYTE value = ip[0];
2908e0c1b49fSNick Terrell const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL);
2909e0c1b49fSNick Terrell const size_t unrollSize = sizeof(size_t) * 4;
2910e0c1b49fSNick Terrell const size_t unrollMask = unrollSize - 1;
2911e0c1b49fSNick Terrell const size_t prefixLength = length & unrollMask;
2912e0c1b49fSNick Terrell size_t i;
2913e0c1b49fSNick Terrell size_t u;
2914e0c1b49fSNick Terrell if (length == 1) return 1;
2915e0c1b49fSNick Terrell /* Check if prefix is RLE first before using unrolled loop */
2916e0c1b49fSNick Terrell if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {
2917e0c1b49fSNick Terrell return 0;
2918e0c1b49fSNick Terrell }
2919e0c1b49fSNick Terrell for (i = prefixLength; i != length; i += unrollSize) {
2920e0c1b49fSNick Terrell for (u = 0; u < unrollSize; u += sizeof(size_t)) {
2921e0c1b49fSNick Terrell if (MEM_readST(ip + i + u) != valueST) {
2922e0c1b49fSNick Terrell return 0;
2923e0c1b49fSNick Terrell }
2924e0c1b49fSNick Terrell }
2925e0c1b49fSNick Terrell }
2926e0c1b49fSNick Terrell return 1;
2927e0c1b49fSNick Terrell }
2928e0c1b49fSNick Terrell
2929e0c1b49fSNick Terrell /* Returns true if the given block may be RLE.
2930e0c1b49fSNick Terrell * This is just a heuristic based on the compressibility.
2931e0c1b49fSNick Terrell * It may return both false positives and false negatives.
2932e0c1b49fSNick Terrell */
ZSTD_maybeRLE(seqStore_t const * seqStore)2933e0c1b49fSNick Terrell static int ZSTD_maybeRLE(seqStore_t const* seqStore)
2934e0c1b49fSNick Terrell {
2935e0c1b49fSNick Terrell size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
2936e0c1b49fSNick Terrell size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart);
2937e0c1b49fSNick Terrell
2938e0c1b49fSNick Terrell return nbSeqs < 4 && nbLits < 10;
2939e0c1b49fSNick Terrell }
2940e0c1b49fSNick Terrell
ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t * const bs)2941*2aa14b1aSNick Terrell static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
2942e0c1b49fSNick Terrell {
2943*2aa14b1aSNick Terrell ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;
2944*2aa14b1aSNick Terrell bs->prevCBlock = bs->nextCBlock;
2945*2aa14b1aSNick Terrell bs->nextCBlock = tmp;
2946e0c1b49fSNick Terrell }
2947e0c1b49fSNick Terrell
2948*2aa14b1aSNick Terrell /* Writes the block header */
writeBlockHeader(void * op,size_t cSize,size_t blockSize,U32 lastBlock)2949*2aa14b1aSNick Terrell static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) {
2950*2aa14b1aSNick Terrell U32 const cBlockHeader = cSize == 1 ?
2951*2aa14b1aSNick Terrell lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
2952*2aa14b1aSNick Terrell lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
2953*2aa14b1aSNick Terrell MEM_writeLE24(op, cBlockHeader);
2954*2aa14b1aSNick Terrell DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock);
2955*2aa14b1aSNick Terrell }
2956*2aa14b1aSNick Terrell
2957*2aa14b1aSNick Terrell /* ZSTD_buildBlockEntropyStats_literals() :
2958*2aa14b1aSNick Terrell * Builds entropy for the literals.
2959*2aa14b1aSNick Terrell * Stores literals block type (raw, rle, compressed, repeat) and
2960*2aa14b1aSNick Terrell * huffman description table to hufMetadata.
2961*2aa14b1aSNick Terrell * Requires ENTROPY_WORKSPACE_SIZE workspace
2962*2aa14b1aSNick Terrell * @return : size of huffman description table or error code */
ZSTD_buildBlockEntropyStats_literals(void * const src,size_t srcSize,const ZSTD_hufCTables_t * prevHuf,ZSTD_hufCTables_t * nextHuf,ZSTD_hufCTablesMetadata_t * hufMetadata,const int literalsCompressionIsDisabled,void * workspace,size_t wkspSize)2963*2aa14b1aSNick Terrell static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
2964*2aa14b1aSNick Terrell const ZSTD_hufCTables_t* prevHuf,
2965*2aa14b1aSNick Terrell ZSTD_hufCTables_t* nextHuf,
2966*2aa14b1aSNick Terrell ZSTD_hufCTablesMetadata_t* hufMetadata,
2967*2aa14b1aSNick Terrell const int literalsCompressionIsDisabled,
2968*2aa14b1aSNick Terrell void* workspace, size_t wkspSize)
2969*2aa14b1aSNick Terrell {
2970*2aa14b1aSNick Terrell BYTE* const wkspStart = (BYTE*)workspace;
2971*2aa14b1aSNick Terrell BYTE* const wkspEnd = wkspStart + wkspSize;
2972*2aa14b1aSNick Terrell BYTE* const countWkspStart = wkspStart;
2973*2aa14b1aSNick Terrell unsigned* const countWksp = (unsigned*)workspace;
2974*2aa14b1aSNick Terrell const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
2975*2aa14b1aSNick Terrell BYTE* const nodeWksp = countWkspStart + countWkspSize;
2976*2aa14b1aSNick Terrell const size_t nodeWkspSize = wkspEnd-nodeWksp;
2977*2aa14b1aSNick Terrell unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
2978*2aa14b1aSNick Terrell unsigned huffLog = HUF_TABLELOG_DEFAULT;
2979*2aa14b1aSNick Terrell HUF_repeat repeat = prevHuf->repeatMode;
2980*2aa14b1aSNick Terrell DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
2981*2aa14b1aSNick Terrell
2982*2aa14b1aSNick Terrell /* Prepare nextEntropy assuming reusing the existing table */
2983*2aa14b1aSNick Terrell ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
2984*2aa14b1aSNick Terrell
2985*2aa14b1aSNick Terrell if (literalsCompressionIsDisabled) {
2986*2aa14b1aSNick Terrell DEBUGLOG(5, "set_basic - disabled");
2987*2aa14b1aSNick Terrell hufMetadata->hType = set_basic;
2988*2aa14b1aSNick Terrell return 0;
2989*2aa14b1aSNick Terrell }
2990*2aa14b1aSNick Terrell
2991*2aa14b1aSNick Terrell /* small ? don't even attempt compression (speed opt) */
2992*2aa14b1aSNick Terrell #ifndef COMPRESS_LITERALS_SIZE_MIN
2993*2aa14b1aSNick Terrell #define COMPRESS_LITERALS_SIZE_MIN 63
2994*2aa14b1aSNick Terrell #endif
2995*2aa14b1aSNick Terrell { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
2996*2aa14b1aSNick Terrell if (srcSize <= minLitSize) {
2997*2aa14b1aSNick Terrell DEBUGLOG(5, "set_basic - too small");
2998*2aa14b1aSNick Terrell hufMetadata->hType = set_basic;
2999*2aa14b1aSNick Terrell return 0;
3000*2aa14b1aSNick Terrell }
3001*2aa14b1aSNick Terrell }
3002*2aa14b1aSNick Terrell
3003*2aa14b1aSNick Terrell /* Scan input and build symbol stats */
3004*2aa14b1aSNick Terrell { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
3005*2aa14b1aSNick Terrell FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
3006*2aa14b1aSNick Terrell if (largest == srcSize) {
3007*2aa14b1aSNick Terrell DEBUGLOG(5, "set_rle");
3008*2aa14b1aSNick Terrell hufMetadata->hType = set_rle;
3009*2aa14b1aSNick Terrell return 0;
3010*2aa14b1aSNick Terrell }
3011*2aa14b1aSNick Terrell if (largest <= (srcSize >> 7)+4) {
3012*2aa14b1aSNick Terrell DEBUGLOG(5, "set_basic - no gain");
3013*2aa14b1aSNick Terrell hufMetadata->hType = set_basic;
3014*2aa14b1aSNick Terrell return 0;
3015*2aa14b1aSNick Terrell }
3016*2aa14b1aSNick Terrell }
3017*2aa14b1aSNick Terrell
3018*2aa14b1aSNick Terrell /* Validate the previous Huffman table */
3019*2aa14b1aSNick Terrell if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
3020*2aa14b1aSNick Terrell repeat = HUF_repeat_none;
3021*2aa14b1aSNick Terrell }
3022*2aa14b1aSNick Terrell
3023*2aa14b1aSNick Terrell /* Build Huffman Tree */
3024*2aa14b1aSNick Terrell ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
3025*2aa14b1aSNick Terrell huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
3026*2aa14b1aSNick Terrell { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
3027*2aa14b1aSNick Terrell maxSymbolValue, huffLog,
3028*2aa14b1aSNick Terrell nodeWksp, nodeWkspSize);
3029*2aa14b1aSNick Terrell FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
3030*2aa14b1aSNick Terrell huffLog = (U32)maxBits;
3031*2aa14b1aSNick Terrell { /* Build and write the CTable */
3032*2aa14b1aSNick Terrell size_t const newCSize = HUF_estimateCompressedSize(
3033*2aa14b1aSNick Terrell (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
3034*2aa14b1aSNick Terrell size_t const hSize = HUF_writeCTable_wksp(
3035*2aa14b1aSNick Terrell hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
3036*2aa14b1aSNick Terrell (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
3037*2aa14b1aSNick Terrell nodeWksp, nodeWkspSize);
3038*2aa14b1aSNick Terrell /* Check against repeating the previous CTable */
3039*2aa14b1aSNick Terrell if (repeat != HUF_repeat_none) {
3040*2aa14b1aSNick Terrell size_t const oldCSize = HUF_estimateCompressedSize(
3041*2aa14b1aSNick Terrell (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
3042*2aa14b1aSNick Terrell if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
3043*2aa14b1aSNick Terrell DEBUGLOG(5, "set_repeat - smaller");
3044*2aa14b1aSNick Terrell ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
3045*2aa14b1aSNick Terrell hufMetadata->hType = set_repeat;
3046*2aa14b1aSNick Terrell return 0;
3047*2aa14b1aSNick Terrell }
3048*2aa14b1aSNick Terrell }
3049*2aa14b1aSNick Terrell if (newCSize + hSize >= srcSize) {
3050*2aa14b1aSNick Terrell DEBUGLOG(5, "set_basic - no gains");
3051*2aa14b1aSNick Terrell ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
3052*2aa14b1aSNick Terrell hufMetadata->hType = set_basic;
3053*2aa14b1aSNick Terrell return 0;
3054*2aa14b1aSNick Terrell }
3055*2aa14b1aSNick Terrell DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
3056*2aa14b1aSNick Terrell hufMetadata->hType = set_compressed;
3057*2aa14b1aSNick Terrell nextHuf->repeatMode = HUF_repeat_check;
3058*2aa14b1aSNick Terrell return hSize;
3059*2aa14b1aSNick Terrell }
3060*2aa14b1aSNick Terrell }
3061*2aa14b1aSNick Terrell }
3062*2aa14b1aSNick Terrell
3063*2aa14b1aSNick Terrell
3064*2aa14b1aSNick Terrell /* ZSTD_buildDummySequencesStatistics():
3065*2aa14b1aSNick Terrell * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic,
3066*2aa14b1aSNick Terrell * and updates nextEntropy to the appropriate repeatMode.
3067*2aa14b1aSNick Terrell */
3068*2aa14b1aSNick Terrell static ZSTD_symbolEncodingTypeStats_t
ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t * nextEntropy)3069*2aa14b1aSNick Terrell ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
3070*2aa14b1aSNick Terrell ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0};
3071*2aa14b1aSNick Terrell nextEntropy->litlength_repeatMode = FSE_repeat_none;
3072*2aa14b1aSNick Terrell nextEntropy->offcode_repeatMode = FSE_repeat_none;
3073*2aa14b1aSNick Terrell nextEntropy->matchlength_repeatMode = FSE_repeat_none;
3074*2aa14b1aSNick Terrell return stats;
3075*2aa14b1aSNick Terrell }
3076*2aa14b1aSNick Terrell
3077*2aa14b1aSNick Terrell /* ZSTD_buildBlockEntropyStats_sequences() :
3078*2aa14b1aSNick Terrell * Builds entropy for the sequences.
3079*2aa14b1aSNick Terrell * Stores symbol compression modes and fse table to fseMetadata.
3080*2aa14b1aSNick Terrell * Requires ENTROPY_WORKSPACE_SIZE wksp.
3081*2aa14b1aSNick Terrell * @return : size of fse tables or error code */
ZSTD_buildBlockEntropyStats_sequences(seqStore_t * seqStorePtr,const ZSTD_fseCTables_t * prevEntropy,ZSTD_fseCTables_t * nextEntropy,const ZSTD_CCtx_params * cctxParams,ZSTD_fseCTablesMetadata_t * fseMetadata,void * workspace,size_t wkspSize)3082*2aa14b1aSNick Terrell static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
3083*2aa14b1aSNick Terrell const ZSTD_fseCTables_t* prevEntropy,
3084*2aa14b1aSNick Terrell ZSTD_fseCTables_t* nextEntropy,
3085*2aa14b1aSNick Terrell const ZSTD_CCtx_params* cctxParams,
3086*2aa14b1aSNick Terrell ZSTD_fseCTablesMetadata_t* fseMetadata,
3087*2aa14b1aSNick Terrell void* workspace, size_t wkspSize)
3088*2aa14b1aSNick Terrell {
3089*2aa14b1aSNick Terrell ZSTD_strategy const strategy = cctxParams->cParams.strategy;
3090*2aa14b1aSNick Terrell size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
3091*2aa14b1aSNick Terrell BYTE* const ostart = fseMetadata->fseTablesBuffer;
3092*2aa14b1aSNick Terrell BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
3093*2aa14b1aSNick Terrell BYTE* op = ostart;
3094*2aa14b1aSNick Terrell unsigned* countWorkspace = (unsigned*)workspace;
3095*2aa14b1aSNick Terrell unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1);
3096*2aa14b1aSNick Terrell size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace);
3097*2aa14b1aSNick Terrell ZSTD_symbolEncodingTypeStats_t stats;
3098*2aa14b1aSNick Terrell
3099*2aa14b1aSNick Terrell DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq);
3100*2aa14b1aSNick Terrell stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
3101*2aa14b1aSNick Terrell prevEntropy, nextEntropy, op, oend,
3102*2aa14b1aSNick Terrell strategy, countWorkspace,
3103*2aa14b1aSNick Terrell entropyWorkspace, entropyWorkspaceSize)
3104*2aa14b1aSNick Terrell : ZSTD_buildDummySequencesStatistics(nextEntropy);
3105*2aa14b1aSNick Terrell FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
3106*2aa14b1aSNick Terrell fseMetadata->llType = (symbolEncodingType_e) stats.LLtype;
3107*2aa14b1aSNick Terrell fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype;
3108*2aa14b1aSNick Terrell fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype;
3109*2aa14b1aSNick Terrell fseMetadata->lastCountSize = stats.lastCountSize;
3110*2aa14b1aSNick Terrell return stats.size;
3111*2aa14b1aSNick Terrell }
3112*2aa14b1aSNick Terrell
3113*2aa14b1aSNick Terrell
3114*2aa14b1aSNick Terrell /* ZSTD_buildBlockEntropyStats() :
3115*2aa14b1aSNick Terrell * Builds entropy for the block.
3116*2aa14b1aSNick Terrell * Requires workspace size ENTROPY_WORKSPACE_SIZE
3117*2aa14b1aSNick Terrell *
3118*2aa14b1aSNick Terrell * @return : 0 on success or error code
3119*2aa14b1aSNick Terrell */
ZSTD_buildBlockEntropyStats(seqStore_t * seqStorePtr,const ZSTD_entropyCTables_t * prevEntropy,ZSTD_entropyCTables_t * nextEntropy,const ZSTD_CCtx_params * cctxParams,ZSTD_entropyCTablesMetadata_t * entropyMetadata,void * workspace,size_t wkspSize)3120*2aa14b1aSNick Terrell size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
3121*2aa14b1aSNick Terrell const ZSTD_entropyCTables_t* prevEntropy,
3122*2aa14b1aSNick Terrell ZSTD_entropyCTables_t* nextEntropy,
3123*2aa14b1aSNick Terrell const ZSTD_CCtx_params* cctxParams,
3124*2aa14b1aSNick Terrell ZSTD_entropyCTablesMetadata_t* entropyMetadata,
3125*2aa14b1aSNick Terrell void* workspace, size_t wkspSize)
3126*2aa14b1aSNick Terrell {
3127*2aa14b1aSNick Terrell size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
3128*2aa14b1aSNick Terrell entropyMetadata->hufMetadata.hufDesSize =
3129*2aa14b1aSNick Terrell ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
3130*2aa14b1aSNick Terrell &prevEntropy->huf, &nextEntropy->huf,
3131*2aa14b1aSNick Terrell &entropyMetadata->hufMetadata,
3132*2aa14b1aSNick Terrell ZSTD_literalsCompressionIsDisabled(cctxParams),
3133*2aa14b1aSNick Terrell workspace, wkspSize);
3134*2aa14b1aSNick Terrell FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
3135*2aa14b1aSNick Terrell entropyMetadata->fseMetadata.fseTablesSize =
3136*2aa14b1aSNick Terrell ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,
3137*2aa14b1aSNick Terrell &prevEntropy->fse, &nextEntropy->fse,
3138*2aa14b1aSNick Terrell cctxParams,
3139*2aa14b1aSNick Terrell &entropyMetadata->fseMetadata,
3140*2aa14b1aSNick Terrell workspace, wkspSize);
3141*2aa14b1aSNick Terrell FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed");
3142*2aa14b1aSNick Terrell return 0;
3143*2aa14b1aSNick Terrell }
3144*2aa14b1aSNick Terrell
3145*2aa14b1aSNick Terrell /* Returns the size estimate for the literals section (header + content) of a block */
ZSTD_estimateBlockSize_literal(const BYTE * literals,size_t litSize,const ZSTD_hufCTables_t * huf,const ZSTD_hufCTablesMetadata_t * hufMetadata,void * workspace,size_t wkspSize,int writeEntropy)3146*2aa14b1aSNick Terrell static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
3147*2aa14b1aSNick Terrell const ZSTD_hufCTables_t* huf,
3148*2aa14b1aSNick Terrell const ZSTD_hufCTablesMetadata_t* hufMetadata,
3149*2aa14b1aSNick Terrell void* workspace, size_t wkspSize,
3150*2aa14b1aSNick Terrell int writeEntropy)
3151*2aa14b1aSNick Terrell {
3152*2aa14b1aSNick Terrell unsigned* const countWksp = (unsigned*)workspace;
3153*2aa14b1aSNick Terrell unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
3154*2aa14b1aSNick Terrell size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB);
3155*2aa14b1aSNick Terrell U32 singleStream = litSize < 256;
3156*2aa14b1aSNick Terrell
3157*2aa14b1aSNick Terrell if (hufMetadata->hType == set_basic) return litSize;
3158*2aa14b1aSNick Terrell else if (hufMetadata->hType == set_rle) return 1;
3159*2aa14b1aSNick Terrell else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
3160*2aa14b1aSNick Terrell size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
3161*2aa14b1aSNick Terrell if (ZSTD_isError(largest)) return litSize;
3162*2aa14b1aSNick Terrell { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
3163*2aa14b1aSNick Terrell if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
3164*2aa14b1aSNick Terrell if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */
3165*2aa14b1aSNick Terrell return cLitSizeEstimate + literalSectionHeaderSize;
3166*2aa14b1aSNick Terrell } }
3167*2aa14b1aSNick Terrell assert(0); /* impossible */
3168*2aa14b1aSNick Terrell return 0;
3169*2aa14b1aSNick Terrell }
3170*2aa14b1aSNick Terrell
3171*2aa14b1aSNick Terrell /* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,const BYTE * codeTable,size_t nbSeq,unsigned maxCode,const FSE_CTable * fseCTable,const U8 * additionalBits,short const * defaultNorm,U32 defaultNormLog,U32 defaultMax,void * workspace,size_t wkspSize)3172*2aa14b1aSNick Terrell static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
3173*2aa14b1aSNick Terrell const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
3174*2aa14b1aSNick Terrell const FSE_CTable* fseCTable,
3175*2aa14b1aSNick Terrell const U8* additionalBits,
3176*2aa14b1aSNick Terrell short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
3177*2aa14b1aSNick Terrell void* workspace, size_t wkspSize)
3178*2aa14b1aSNick Terrell {
3179*2aa14b1aSNick Terrell unsigned* const countWksp = (unsigned*)workspace;
3180*2aa14b1aSNick Terrell const BYTE* ctp = codeTable;
3181*2aa14b1aSNick Terrell const BYTE* const ctStart = ctp;
3182*2aa14b1aSNick Terrell const BYTE* const ctEnd = ctStart + nbSeq;
3183*2aa14b1aSNick Terrell size_t cSymbolTypeSizeEstimateInBits = 0;
3184*2aa14b1aSNick Terrell unsigned max = maxCode;
3185*2aa14b1aSNick Terrell
3186*2aa14b1aSNick Terrell HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */
3187*2aa14b1aSNick Terrell if (type == set_basic) {
3188*2aa14b1aSNick Terrell /* We selected this encoding type, so it must be valid. */
3189*2aa14b1aSNick Terrell assert(max <= defaultMax);
3190*2aa14b1aSNick Terrell (void)defaultMax;
3191*2aa14b1aSNick Terrell cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
3192*2aa14b1aSNick Terrell } else if (type == set_rle) {
3193*2aa14b1aSNick Terrell cSymbolTypeSizeEstimateInBits = 0;
3194*2aa14b1aSNick Terrell } else if (type == set_compressed || type == set_repeat) {
3195*2aa14b1aSNick Terrell cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
3196*2aa14b1aSNick Terrell }
3197*2aa14b1aSNick Terrell if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) {
3198*2aa14b1aSNick Terrell return nbSeq * 10;
3199*2aa14b1aSNick Terrell }
3200*2aa14b1aSNick Terrell while (ctp < ctEnd) {
3201*2aa14b1aSNick Terrell if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
3202*2aa14b1aSNick Terrell else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
3203*2aa14b1aSNick Terrell ctp++;
3204*2aa14b1aSNick Terrell }
3205*2aa14b1aSNick Terrell return cSymbolTypeSizeEstimateInBits >> 3;
3206*2aa14b1aSNick Terrell }
3207*2aa14b1aSNick Terrell
3208*2aa14b1aSNick Terrell /* Returns the size estimate for the sequences section (header + content) of a block */
ZSTD_estimateBlockSize_sequences(const BYTE * ofCodeTable,const BYTE * llCodeTable,const BYTE * mlCodeTable,size_t nbSeq,const ZSTD_fseCTables_t * fseTables,const ZSTD_fseCTablesMetadata_t * fseMetadata,void * workspace,size_t wkspSize,int writeEntropy)3209*2aa14b1aSNick Terrell static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
3210*2aa14b1aSNick Terrell const BYTE* llCodeTable,
3211*2aa14b1aSNick Terrell const BYTE* mlCodeTable,
3212*2aa14b1aSNick Terrell size_t nbSeq,
3213*2aa14b1aSNick Terrell const ZSTD_fseCTables_t* fseTables,
3214*2aa14b1aSNick Terrell const ZSTD_fseCTablesMetadata_t* fseMetadata,
3215*2aa14b1aSNick Terrell void* workspace, size_t wkspSize,
3216*2aa14b1aSNick Terrell int writeEntropy)
3217*2aa14b1aSNick Terrell {
3218*2aa14b1aSNick Terrell size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
3219*2aa14b1aSNick Terrell size_t cSeqSizeEstimate = 0;
3220*2aa14b1aSNick Terrell cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,
3221*2aa14b1aSNick Terrell fseTables->offcodeCTable, NULL,
3222*2aa14b1aSNick Terrell OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
3223*2aa14b1aSNick Terrell workspace, wkspSize);
3224*2aa14b1aSNick Terrell cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,
3225*2aa14b1aSNick Terrell fseTables->litlengthCTable, LL_bits,
3226*2aa14b1aSNick Terrell LL_defaultNorm, LL_defaultNormLog, MaxLL,
3227*2aa14b1aSNick Terrell workspace, wkspSize);
3228*2aa14b1aSNick Terrell cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,
3229*2aa14b1aSNick Terrell fseTables->matchlengthCTable, ML_bits,
3230*2aa14b1aSNick Terrell ML_defaultNorm, ML_defaultNormLog, MaxML,
3231*2aa14b1aSNick Terrell workspace, wkspSize);
3232*2aa14b1aSNick Terrell if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
3233*2aa14b1aSNick Terrell return cSeqSizeEstimate + sequencesSectionHeaderSize;
3234*2aa14b1aSNick Terrell }
3235*2aa14b1aSNick Terrell
3236*2aa14b1aSNick Terrell /* Returns the size estimate for a given stream of literals, of, ll, ml */
ZSTD_estimateBlockSize(const BYTE * literals,size_t litSize,const BYTE * ofCodeTable,const BYTE * llCodeTable,const BYTE * mlCodeTable,size_t nbSeq,const ZSTD_entropyCTables_t * entropy,const ZSTD_entropyCTablesMetadata_t * entropyMetadata,void * workspace,size_t wkspSize,int writeLitEntropy,int writeSeqEntropy)3237*2aa14b1aSNick Terrell static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
3238*2aa14b1aSNick Terrell const BYTE* ofCodeTable,
3239*2aa14b1aSNick Terrell const BYTE* llCodeTable,
3240*2aa14b1aSNick Terrell const BYTE* mlCodeTable,
3241*2aa14b1aSNick Terrell size_t nbSeq,
3242*2aa14b1aSNick Terrell const ZSTD_entropyCTables_t* entropy,
3243*2aa14b1aSNick Terrell const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
3244*2aa14b1aSNick Terrell void* workspace, size_t wkspSize,
3245*2aa14b1aSNick Terrell int writeLitEntropy, int writeSeqEntropy) {
3246*2aa14b1aSNick Terrell size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,
3247*2aa14b1aSNick Terrell &entropy->huf, &entropyMetadata->hufMetadata,
3248*2aa14b1aSNick Terrell workspace, wkspSize, writeLitEntropy);
3249*2aa14b1aSNick Terrell size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
3250*2aa14b1aSNick Terrell nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
3251*2aa14b1aSNick Terrell workspace, wkspSize, writeSeqEntropy);
3252*2aa14b1aSNick Terrell return seqSize + literalsSize + ZSTD_blockHeaderSize;
3253*2aa14b1aSNick Terrell }
3254*2aa14b1aSNick Terrell
3255*2aa14b1aSNick Terrell /* Builds entropy statistics and uses them for blocksize estimation.
3256*2aa14b1aSNick Terrell *
3257*2aa14b1aSNick Terrell * Returns the estimated compressed size of the seqStore, or a zstd error.
3258*2aa14b1aSNick Terrell */
ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t * seqStore,ZSTD_CCtx * zc)3259*2aa14b1aSNick Terrell static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) {
3260*2aa14b1aSNick Terrell ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata;
3261*2aa14b1aSNick Terrell DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");
3262*2aa14b1aSNick Terrell FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
3263*2aa14b1aSNick Terrell &zc->blockState.prevCBlock->entropy,
3264*2aa14b1aSNick Terrell &zc->blockState.nextCBlock->entropy,
3265*2aa14b1aSNick Terrell &zc->appliedParams,
3266*2aa14b1aSNick Terrell entropyMetadata,
3267*2aa14b1aSNick Terrell zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
3268*2aa14b1aSNick Terrell return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
3269*2aa14b1aSNick Terrell seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
3270*2aa14b1aSNick Terrell (size_t)(seqStore->sequences - seqStore->sequencesStart),
3271*2aa14b1aSNick Terrell &zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
3272*2aa14b1aSNick Terrell (int)(entropyMetadata->hufMetadata.hType == set_compressed), 1);
3273*2aa14b1aSNick Terrell }
3274*2aa14b1aSNick Terrell
3275*2aa14b1aSNick Terrell /* Returns literals bytes represented in a seqStore */
ZSTD_countSeqStoreLiteralsBytes(const seqStore_t * const seqStore)3276*2aa14b1aSNick Terrell static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) {
3277*2aa14b1aSNick Terrell size_t literalsBytes = 0;
3278*2aa14b1aSNick Terrell size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
3279*2aa14b1aSNick Terrell size_t i;
3280*2aa14b1aSNick Terrell for (i = 0; i < nbSeqs; ++i) {
3281*2aa14b1aSNick Terrell seqDef seq = seqStore->sequencesStart[i];
3282*2aa14b1aSNick Terrell literalsBytes += seq.litLength;
3283*2aa14b1aSNick Terrell if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {
3284*2aa14b1aSNick Terrell literalsBytes += 0x10000;
3285*2aa14b1aSNick Terrell }
3286*2aa14b1aSNick Terrell }
3287*2aa14b1aSNick Terrell return literalsBytes;
3288*2aa14b1aSNick Terrell }
3289*2aa14b1aSNick Terrell
3290*2aa14b1aSNick Terrell /* Returns match bytes represented in a seqStore */
ZSTD_countSeqStoreMatchBytes(const seqStore_t * const seqStore)3291*2aa14b1aSNick Terrell static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
3292*2aa14b1aSNick Terrell size_t matchBytes = 0;
3293*2aa14b1aSNick Terrell size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
3294*2aa14b1aSNick Terrell size_t i;
3295*2aa14b1aSNick Terrell for (i = 0; i < nbSeqs; ++i) {
3296*2aa14b1aSNick Terrell seqDef seq = seqStore->sequencesStart[i];
3297*2aa14b1aSNick Terrell matchBytes += seq.mlBase + MINMATCH;
3298*2aa14b1aSNick Terrell if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {
3299*2aa14b1aSNick Terrell matchBytes += 0x10000;
3300*2aa14b1aSNick Terrell }
3301*2aa14b1aSNick Terrell }
3302*2aa14b1aSNick Terrell return matchBytes;
3303*2aa14b1aSNick Terrell }
3304*2aa14b1aSNick Terrell
3305*2aa14b1aSNick Terrell /* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx).
3306*2aa14b1aSNick Terrell * Stores the result in resultSeqStore.
3307*2aa14b1aSNick Terrell */
ZSTD_deriveSeqStoreChunk(seqStore_t * resultSeqStore,const seqStore_t * originalSeqStore,size_t startIdx,size_t endIdx)3308*2aa14b1aSNick Terrell static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
3309*2aa14b1aSNick Terrell const seqStore_t* originalSeqStore,
3310*2aa14b1aSNick Terrell size_t startIdx, size_t endIdx) {
3311*2aa14b1aSNick Terrell BYTE* const litEnd = originalSeqStore->lit;
3312*2aa14b1aSNick Terrell size_t literalsBytes;
3313*2aa14b1aSNick Terrell size_t literalsBytesPreceding = 0;
3314*2aa14b1aSNick Terrell
3315*2aa14b1aSNick Terrell *resultSeqStore = *originalSeqStore;
3316*2aa14b1aSNick Terrell if (startIdx > 0) {
3317*2aa14b1aSNick Terrell resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;
3318*2aa14b1aSNick Terrell literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
3319*2aa14b1aSNick Terrell }
3320*2aa14b1aSNick Terrell
3321*2aa14b1aSNick Terrell /* Move longLengthPos into the correct position if necessary */
3322*2aa14b1aSNick Terrell if (originalSeqStore->longLengthType != ZSTD_llt_none) {
3323*2aa14b1aSNick Terrell if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) {
3324*2aa14b1aSNick Terrell resultSeqStore->longLengthType = ZSTD_llt_none;
3325*2aa14b1aSNick Terrell } else {
3326*2aa14b1aSNick Terrell resultSeqStore->longLengthPos -= (U32)startIdx;
3327*2aa14b1aSNick Terrell }
3328*2aa14b1aSNick Terrell }
3329*2aa14b1aSNick Terrell resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx;
3330*2aa14b1aSNick Terrell resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx;
3331*2aa14b1aSNick Terrell literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
3332*2aa14b1aSNick Terrell resultSeqStore->litStart += literalsBytesPreceding;
3333*2aa14b1aSNick Terrell if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {
3334*2aa14b1aSNick Terrell /* This accounts for possible last literals if the derived chunk reaches the end of the block */
3335*2aa14b1aSNick Terrell resultSeqStore->lit = litEnd;
3336*2aa14b1aSNick Terrell } else {
3337*2aa14b1aSNick Terrell resultSeqStore->lit = resultSeqStore->litStart+literalsBytes;
3338*2aa14b1aSNick Terrell }
3339*2aa14b1aSNick Terrell resultSeqStore->llCode += startIdx;
3340*2aa14b1aSNick Terrell resultSeqStore->mlCode += startIdx;
3341*2aa14b1aSNick Terrell resultSeqStore->ofCode += startIdx;
3342*2aa14b1aSNick Terrell }
3343*2aa14b1aSNick Terrell
3344*2aa14b1aSNick Terrell /*
3345*2aa14b1aSNick Terrell * Returns the raw offset represented by the combination of offCode, ll0, and repcode history.
3346*2aa14b1aSNick Terrell * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq().
3347*2aa14b1aSNick Terrell */
3348*2aa14b1aSNick Terrell static U32
ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM],const U32 offCode,const U32 ll0)3349*2aa14b1aSNick Terrell ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0)
3350*2aa14b1aSNick Terrell {
3351*2aa14b1aSNick Terrell U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0; /* [ 0 - 3 ] */
3352*2aa14b1aSNick Terrell assert(STORED_IS_REPCODE(offCode));
3353*2aa14b1aSNick Terrell if (adjustedOffCode == ZSTD_REP_NUM) {
3354*2aa14b1aSNick Terrell /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */
3355*2aa14b1aSNick Terrell assert(rep[0] > 0);
3356*2aa14b1aSNick Terrell return rep[0] - 1;
3357*2aa14b1aSNick Terrell }
3358*2aa14b1aSNick Terrell return rep[adjustedOffCode];
3359*2aa14b1aSNick Terrell }
3360*2aa14b1aSNick Terrell
3361*2aa14b1aSNick Terrell /*
3362*2aa14b1aSNick Terrell * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise
3363*2aa14b1aSNick Terrell * due to emission of RLE/raw blocks that disturb the offset history,
3364*2aa14b1aSNick Terrell * and replaces any repcodes within the seqStore that may be invalid.
3365*2aa14b1aSNick Terrell *
3366*2aa14b1aSNick Terrell * dRepcodes are updated as would be on the decompression side.
3367*2aa14b1aSNick Terrell * cRepcodes are updated exactly in accordance with the seqStore.
3368*2aa14b1aSNick Terrell *
3369*2aa14b1aSNick Terrell * Note : this function assumes seq->offBase respects the following numbering scheme :
3370*2aa14b1aSNick Terrell * 0 : invalid
3371*2aa14b1aSNick Terrell * 1-3 : repcode 1-3
3372*2aa14b1aSNick Terrell * 4+ : real_offset+3
3373*2aa14b1aSNick Terrell */
ZSTD_seqStore_resolveOffCodes(repcodes_t * const dRepcodes,repcodes_t * const cRepcodes,seqStore_t * const seqStore,U32 const nbSeq)3374*2aa14b1aSNick Terrell static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
3375*2aa14b1aSNick Terrell seqStore_t* const seqStore, U32 const nbSeq) {
3376*2aa14b1aSNick Terrell U32 idx = 0;
3377*2aa14b1aSNick Terrell for (; idx < nbSeq; ++idx) {
3378*2aa14b1aSNick Terrell seqDef* const seq = seqStore->sequencesStart + idx;
3379*2aa14b1aSNick Terrell U32 const ll0 = (seq->litLength == 0);
3380*2aa14b1aSNick Terrell U32 const offCode = OFFBASE_TO_STORED(seq->offBase);
3381*2aa14b1aSNick Terrell assert(seq->offBase > 0);
3382*2aa14b1aSNick Terrell if (STORED_IS_REPCODE(offCode)) {
3383*2aa14b1aSNick Terrell U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);
3384*2aa14b1aSNick Terrell U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);
3385*2aa14b1aSNick Terrell /* Adjust simulated decompression repcode history if we come across a mismatch. Replace
3386*2aa14b1aSNick Terrell * the repcode with the offset it actually references, determined by the compression
3387*2aa14b1aSNick Terrell * repcode history.
3388*2aa14b1aSNick Terrell */
3389*2aa14b1aSNick Terrell if (dRawOffset != cRawOffset) {
3390*2aa14b1aSNick Terrell seq->offBase = cRawOffset + ZSTD_REP_NUM;
3391*2aa14b1aSNick Terrell }
3392*2aa14b1aSNick Terrell }
3393*2aa14b1aSNick Terrell /* Compression repcode history is always updated with values directly from the unmodified seqStore.
3394*2aa14b1aSNick Terrell * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
3395*2aa14b1aSNick Terrell */
3396*2aa14b1aSNick Terrell ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0);
3397*2aa14b1aSNick Terrell ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
3398*2aa14b1aSNick Terrell }
3399*2aa14b1aSNick Terrell }
3400*2aa14b1aSNick Terrell
3401*2aa14b1aSNick Terrell /* ZSTD_compressSeqStore_singleBlock():
3402*2aa14b1aSNick Terrell * Compresses a seqStore into a block with a block header, into the buffer dst.
3403*2aa14b1aSNick Terrell *
3404*2aa14b1aSNick Terrell * Returns the total size of that block (including header) or a ZSTD error code.
3405*2aa14b1aSNick Terrell */
3406*2aa14b1aSNick Terrell static size_t
ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx * zc,seqStore_t * const seqStore,repcodes_t * const dRep,repcodes_t * const cRep,void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastBlock,U32 isPartition)3407*2aa14b1aSNick Terrell ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
3408*2aa14b1aSNick Terrell repcodes_t* const dRep, repcodes_t* const cRep,
3409*2aa14b1aSNick Terrell void* dst, size_t dstCapacity,
3410*2aa14b1aSNick Terrell const void* src, size_t srcSize,
3411*2aa14b1aSNick Terrell U32 lastBlock, U32 isPartition)
3412*2aa14b1aSNick Terrell {
3413*2aa14b1aSNick Terrell const U32 rleMaxLength = 25;
3414*2aa14b1aSNick Terrell BYTE* op = (BYTE*)dst;
3415*2aa14b1aSNick Terrell const BYTE* ip = (const BYTE*)src;
3416*2aa14b1aSNick Terrell size_t cSize;
3417*2aa14b1aSNick Terrell size_t cSeqsSize;
3418*2aa14b1aSNick Terrell
3419*2aa14b1aSNick Terrell /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */
3420*2aa14b1aSNick Terrell repcodes_t const dRepOriginal = *dRep;
3421*2aa14b1aSNick Terrell DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock");
3422*2aa14b1aSNick Terrell if (isPartition)
3423*2aa14b1aSNick Terrell ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));
3424*2aa14b1aSNick Terrell
3425*2aa14b1aSNick Terrell RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "Block header doesn't fit");
3426*2aa14b1aSNick Terrell cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore,
3427*2aa14b1aSNick Terrell &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
3428*2aa14b1aSNick Terrell &zc->appliedParams,
3429*2aa14b1aSNick Terrell op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,
3430*2aa14b1aSNick Terrell srcSize,
3431*2aa14b1aSNick Terrell zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
3432*2aa14b1aSNick Terrell zc->bmi2);
3433*2aa14b1aSNick Terrell FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!");
3434*2aa14b1aSNick Terrell
3435*2aa14b1aSNick Terrell if (!zc->isFirstBlock &&
3436*2aa14b1aSNick Terrell cSeqsSize < rleMaxLength &&
3437*2aa14b1aSNick Terrell ZSTD_isRLE((BYTE const*)src, srcSize)) {
3438*2aa14b1aSNick Terrell /* We don't want to emit our first block as a RLE even if it qualifies because
3439*2aa14b1aSNick Terrell * doing so will cause the decoder (cli only) to throw a "should consume all input error."
3440*2aa14b1aSNick Terrell * This is only an issue for zstd <= v1.4.3
3441*2aa14b1aSNick Terrell */
3442*2aa14b1aSNick Terrell cSeqsSize = 1;
3443*2aa14b1aSNick Terrell }
3444*2aa14b1aSNick Terrell
3445*2aa14b1aSNick Terrell if (zc->seqCollector.collectSequences) {
3446*2aa14b1aSNick Terrell ZSTD_copyBlockSequences(zc);
3447*2aa14b1aSNick Terrell ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
3448*2aa14b1aSNick Terrell return 0;
3449*2aa14b1aSNick Terrell }
3450*2aa14b1aSNick Terrell
3451*2aa14b1aSNick Terrell if (cSeqsSize == 0) {
3452*2aa14b1aSNick Terrell cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
3453*2aa14b1aSNick Terrell FORWARD_IF_ERROR(cSize, "Nocompress block failed");
3454*2aa14b1aSNick Terrell DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize);
3455*2aa14b1aSNick Terrell *dRep = dRepOriginal; /* reset simulated decompression repcode history */
3456*2aa14b1aSNick Terrell } else if (cSeqsSize == 1) {
3457*2aa14b1aSNick Terrell cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock);
3458*2aa14b1aSNick Terrell FORWARD_IF_ERROR(cSize, "RLE compress block failed");
3459*2aa14b1aSNick Terrell DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize);
3460*2aa14b1aSNick Terrell *dRep = dRepOriginal; /* reset simulated decompression repcode history */
3461*2aa14b1aSNick Terrell } else {
3462*2aa14b1aSNick Terrell ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
3463*2aa14b1aSNick Terrell writeBlockHeader(op, cSeqsSize, srcSize, lastBlock);
3464*2aa14b1aSNick Terrell cSize = ZSTD_blockHeaderSize + cSeqsSize;
3465*2aa14b1aSNick Terrell DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize);
3466*2aa14b1aSNick Terrell }
3467*2aa14b1aSNick Terrell
3468*2aa14b1aSNick Terrell if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
3469*2aa14b1aSNick Terrell zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
3470*2aa14b1aSNick Terrell
3471*2aa14b1aSNick Terrell return cSize;
3472*2aa14b1aSNick Terrell }
3473*2aa14b1aSNick Terrell
3474*2aa14b1aSNick Terrell /* Struct to keep track of where we are in our recursive calls. */
3475*2aa14b1aSNick Terrell typedef struct {
3476*2aa14b1aSNick Terrell U32* splitLocations; /* Array of split indices */
3477*2aa14b1aSNick Terrell size_t idx; /* The current index within splitLocations being worked on */
3478*2aa14b1aSNick Terrell } seqStoreSplits;
3479*2aa14b1aSNick Terrell
3480*2aa14b1aSNick Terrell #define MIN_SEQUENCES_BLOCK_SPLITTING 300
3481*2aa14b1aSNick Terrell
3482*2aa14b1aSNick Terrell /* Helper function to perform the recursive search for block splits.
3483*2aa14b1aSNick Terrell * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
3484*2aa14b1aSNick Terrell * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then
3485*2aa14b1aSNick Terrell * we do not recurse.
3486*2aa14b1aSNick Terrell *
3487*2aa14b1aSNick Terrell * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
3488*2aa14b1aSNick Terrell * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
3489*2aa14b1aSNick Terrell * In practice, recursion depth usually doesn't go beyond 4.
3490*2aa14b1aSNick Terrell *
3491*2aa14b1aSNick Terrell * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize
3492*2aa14b1aSNick Terrell * maximum of 128 KB, this value is actually impossible to reach.
3493*2aa14b1aSNick Terrell */
3494*2aa14b1aSNick Terrell static void
ZSTD_deriveBlockSplitsHelper(seqStoreSplits * splits,size_t startIdx,size_t endIdx,ZSTD_CCtx * zc,const seqStore_t * origSeqStore)3495*2aa14b1aSNick Terrell ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
3496*2aa14b1aSNick Terrell ZSTD_CCtx* zc, const seqStore_t* origSeqStore)
3497*2aa14b1aSNick Terrell {
3498*2aa14b1aSNick Terrell seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
3499*2aa14b1aSNick Terrell seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
3500*2aa14b1aSNick Terrell seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
3501*2aa14b1aSNick Terrell size_t estimatedOriginalSize;
3502*2aa14b1aSNick Terrell size_t estimatedFirstHalfSize;
3503*2aa14b1aSNick Terrell size_t estimatedSecondHalfSize;
3504*2aa14b1aSNick Terrell size_t midIdx = (startIdx + endIdx)/2;
3505*2aa14b1aSNick Terrell
3506*2aa14b1aSNick Terrell if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) {
3507*2aa14b1aSNick Terrell DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences");
3508*2aa14b1aSNick Terrell return;
3509*2aa14b1aSNick Terrell }
3510*2aa14b1aSNick Terrell DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
3511*2aa14b1aSNick Terrell ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
3512*2aa14b1aSNick Terrell ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx);
3513*2aa14b1aSNick Terrell ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx);
3514*2aa14b1aSNick Terrell estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc);
3515*2aa14b1aSNick Terrell estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc);
3516*2aa14b1aSNick Terrell estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc);
3517*2aa14b1aSNick Terrell DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
3518*2aa14b1aSNick Terrell estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
3519*2aa14b1aSNick Terrell if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
3520*2aa14b1aSNick Terrell return;
3521*2aa14b1aSNick Terrell }
3522*2aa14b1aSNick Terrell if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
3523*2aa14b1aSNick Terrell ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);
3524*2aa14b1aSNick Terrell splits->splitLocations[splits->idx] = (U32)midIdx;
3525*2aa14b1aSNick Terrell splits->idx++;
3526*2aa14b1aSNick Terrell ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore);
3527*2aa14b1aSNick Terrell }
3528*2aa14b1aSNick Terrell }
3529*2aa14b1aSNick Terrell
3530*2aa14b1aSNick Terrell /* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio.
3531*2aa14b1aSNick Terrell *
3532*2aa14b1aSNick Terrell * Returns the number of splits made (which equals the size of the partition table - 1).
3533*2aa14b1aSNick Terrell */
ZSTD_deriveBlockSplits(ZSTD_CCtx * zc,U32 partitions[],U32 nbSeq)3534*2aa14b1aSNick Terrell static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) {
3535*2aa14b1aSNick Terrell seqStoreSplits splits = {partitions, 0};
3536*2aa14b1aSNick Terrell if (nbSeq <= 4) {
3537*2aa14b1aSNick Terrell DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split");
3538*2aa14b1aSNick Terrell /* Refuse to try and split anything with less than 4 sequences */
3539*2aa14b1aSNick Terrell return 0;
3540*2aa14b1aSNick Terrell }
3541*2aa14b1aSNick Terrell ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore);
3542*2aa14b1aSNick Terrell splits.splitLocations[splits.idx] = nbSeq;
3543*2aa14b1aSNick Terrell DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1);
3544*2aa14b1aSNick Terrell return splits.idx;
3545*2aa14b1aSNick Terrell }
3546*2aa14b1aSNick Terrell
3547*2aa14b1aSNick Terrell /* ZSTD_compressBlock_splitBlock():
3548*2aa14b1aSNick Terrell * Attempts to split a given block into multiple blocks to improve compression ratio.
3549*2aa14b1aSNick Terrell *
3550*2aa14b1aSNick Terrell * Returns combined size of all blocks (which includes headers), or a ZSTD error code.
3551*2aa14b1aSNick Terrell */
3552*2aa14b1aSNick Terrell static size_t
ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx * zc,void * dst,size_t dstCapacity,const void * src,size_t blockSize,U32 lastBlock,U32 nbSeq)3553*2aa14b1aSNick Terrell ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
3554*2aa14b1aSNick Terrell const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq)
3555*2aa14b1aSNick Terrell {
3556*2aa14b1aSNick Terrell size_t cSize = 0;
3557*2aa14b1aSNick Terrell const BYTE* ip = (const BYTE*)src;
3558*2aa14b1aSNick Terrell BYTE* op = (BYTE*)dst;
3559*2aa14b1aSNick Terrell size_t i = 0;
3560*2aa14b1aSNick Terrell size_t srcBytesTotal = 0;
3561*2aa14b1aSNick Terrell U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
3562*2aa14b1aSNick Terrell seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
3563*2aa14b1aSNick Terrell seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore;
3564*2aa14b1aSNick Terrell size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
3565*2aa14b1aSNick Terrell
3566*2aa14b1aSNick Terrell /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
3567*2aa14b1aSNick Terrell * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
3568*2aa14b1aSNick Terrell * separate repcode histories that simulate repcode history on compression and decompression side,
3569*2aa14b1aSNick Terrell * and use the histories to determine whether we must replace a particular repcode with its raw offset.
3570*2aa14b1aSNick Terrell *
3571*2aa14b1aSNick Terrell * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed
3572*2aa14b1aSNick Terrell * or RLE. This allows us to retrieve the offset value that an invalid repcode references within
3573*2aa14b1aSNick Terrell * a nocompress/RLE block.
3574*2aa14b1aSNick Terrell * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use
3575*2aa14b1aSNick Terrell * the replacement offset value rather than the original repcode to update the repcode history.
3576*2aa14b1aSNick Terrell * dRep also will be the final repcode history sent to the next block.
3577*2aa14b1aSNick Terrell *
3578*2aa14b1aSNick Terrell * See ZSTD_seqStore_resolveOffCodes() for more details.
3579*2aa14b1aSNick Terrell */
3580*2aa14b1aSNick Terrell repcodes_t dRep;
3581*2aa14b1aSNick Terrell repcodes_t cRep;
3582*2aa14b1aSNick Terrell ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
3583*2aa14b1aSNick Terrell ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
3584*2aa14b1aSNick Terrell ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t));
3585*2aa14b1aSNick Terrell
3586*2aa14b1aSNick Terrell DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
3587*2aa14b1aSNick Terrell (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
3588*2aa14b1aSNick Terrell (unsigned)zc->blockState.matchState.nextToUpdate);
3589*2aa14b1aSNick Terrell
3590*2aa14b1aSNick Terrell if (numSplits == 0) {
3591*2aa14b1aSNick Terrell size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
3592*2aa14b1aSNick Terrell &dRep, &cRep,
3593*2aa14b1aSNick Terrell op, dstCapacity,
3594*2aa14b1aSNick Terrell ip, blockSize,
3595*2aa14b1aSNick Terrell lastBlock, 0 /* isPartition */);
3596*2aa14b1aSNick Terrell FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
3597*2aa14b1aSNick Terrell DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
3598*2aa14b1aSNick Terrell assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
3599*2aa14b1aSNick Terrell return cSizeSingleBlock;
3600*2aa14b1aSNick Terrell }
3601*2aa14b1aSNick Terrell
3602*2aa14b1aSNick Terrell ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]);
3603*2aa14b1aSNick Terrell for (i = 0; i <= numSplits; ++i) {
3604*2aa14b1aSNick Terrell size_t srcBytes;
3605*2aa14b1aSNick Terrell size_t cSizeChunk;
3606*2aa14b1aSNick Terrell U32 const lastPartition = (i == numSplits);
3607*2aa14b1aSNick Terrell U32 lastBlockEntireSrc = 0;
3608*2aa14b1aSNick Terrell
3609*2aa14b1aSNick Terrell srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);
3610*2aa14b1aSNick Terrell srcBytesTotal += srcBytes;
3611*2aa14b1aSNick Terrell if (lastPartition) {
3612*2aa14b1aSNick Terrell /* This is the final partition, need to account for possible last literals */
3613*2aa14b1aSNick Terrell srcBytes += blockSize - srcBytesTotal;
3614*2aa14b1aSNick Terrell lastBlockEntireSrc = lastBlock;
3615*2aa14b1aSNick Terrell } else {
3616*2aa14b1aSNick Terrell ZSTD_deriveSeqStoreChunk(nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]);
3617*2aa14b1aSNick Terrell }
3618*2aa14b1aSNick Terrell
3619*2aa14b1aSNick Terrell cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, currSeqStore,
3620*2aa14b1aSNick Terrell &dRep, &cRep,
3621*2aa14b1aSNick Terrell op, dstCapacity,
3622*2aa14b1aSNick Terrell ip, srcBytes,
3623*2aa14b1aSNick Terrell lastBlockEntireSrc, 1 /* isPartition */);
3624*2aa14b1aSNick Terrell DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);
3625*2aa14b1aSNick Terrell FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
3626*2aa14b1aSNick Terrell
3627*2aa14b1aSNick Terrell ip += srcBytes;
3628*2aa14b1aSNick Terrell op += cSizeChunk;
3629*2aa14b1aSNick Terrell dstCapacity -= cSizeChunk;
3630*2aa14b1aSNick Terrell cSize += cSizeChunk;
3631*2aa14b1aSNick Terrell *currSeqStore = *nextSeqStore;
3632*2aa14b1aSNick Terrell assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
3633*2aa14b1aSNick Terrell }
3634*2aa14b1aSNick Terrell /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
3635*2aa14b1aSNick Terrell * for the next block.
3636*2aa14b1aSNick Terrell */
3637*2aa14b1aSNick Terrell ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));
3638*2aa14b1aSNick Terrell return cSize;
3639*2aa14b1aSNick Terrell }
3640*2aa14b1aSNick Terrell
3641*2aa14b1aSNick Terrell static size_t
ZSTD_compressBlock_splitBlock(ZSTD_CCtx * zc,void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastBlock)3642*2aa14b1aSNick Terrell ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
3643*2aa14b1aSNick Terrell void* dst, size_t dstCapacity,
3644*2aa14b1aSNick Terrell const void* src, size_t srcSize, U32 lastBlock)
3645*2aa14b1aSNick Terrell {
3646*2aa14b1aSNick Terrell const BYTE* ip = (const BYTE*)src;
3647*2aa14b1aSNick Terrell BYTE* op = (BYTE*)dst;
3648*2aa14b1aSNick Terrell U32 nbSeq;
3649*2aa14b1aSNick Terrell size_t cSize;
3650*2aa14b1aSNick Terrell DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
3651*2aa14b1aSNick Terrell assert(zc->appliedParams.useBlockSplitter == ZSTD_ps_enable);
3652*2aa14b1aSNick Terrell
3653*2aa14b1aSNick Terrell { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
3654*2aa14b1aSNick Terrell FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
3655*2aa14b1aSNick Terrell if (bss == ZSTDbss_noCompress) {
3656*2aa14b1aSNick Terrell if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
3657*2aa14b1aSNick Terrell zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
3658*2aa14b1aSNick Terrell cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
3659*2aa14b1aSNick Terrell FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
3660*2aa14b1aSNick Terrell DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
3661*2aa14b1aSNick Terrell return cSize;
3662*2aa14b1aSNick Terrell }
3663*2aa14b1aSNick Terrell nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart);
3664*2aa14b1aSNick Terrell }
3665*2aa14b1aSNick Terrell
3666*2aa14b1aSNick Terrell cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq);
3667*2aa14b1aSNick Terrell FORWARD_IF_ERROR(cSize, "Splitting blocks failed!");
3668*2aa14b1aSNick Terrell return cSize;
3669*2aa14b1aSNick Terrell }
3670*2aa14b1aSNick Terrell
3671*2aa14b1aSNick Terrell static size_t
ZSTD_compressBlock_internal(ZSTD_CCtx * zc,void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 frame)3672*2aa14b1aSNick Terrell ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
3673e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
3674e0c1b49fSNick Terrell const void* src, size_t srcSize, U32 frame)
3675e0c1b49fSNick Terrell {
3676e0c1b49fSNick Terrell /* This the upper bound for the length of an rle block.
3677e0c1b49fSNick Terrell * This isn't the actual upper bound. Finding the real threshold
3678e0c1b49fSNick Terrell * needs further investigation.
3679e0c1b49fSNick Terrell */
3680e0c1b49fSNick Terrell const U32 rleMaxLength = 25;
3681e0c1b49fSNick Terrell size_t cSize;
3682e0c1b49fSNick Terrell const BYTE* ip = (const BYTE*)src;
3683e0c1b49fSNick Terrell BYTE* op = (BYTE*)dst;
3684e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
3685e0c1b49fSNick Terrell (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
3686e0c1b49fSNick Terrell (unsigned)zc->blockState.matchState.nextToUpdate);
3687e0c1b49fSNick Terrell
3688e0c1b49fSNick Terrell { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
3689e0c1b49fSNick Terrell FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
3690e0c1b49fSNick Terrell if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
3691e0c1b49fSNick Terrell }
3692e0c1b49fSNick Terrell
3693e0c1b49fSNick Terrell if (zc->seqCollector.collectSequences) {
3694e0c1b49fSNick Terrell ZSTD_copyBlockSequences(zc);
3695*2aa14b1aSNick Terrell ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
3696e0c1b49fSNick Terrell return 0;
3697e0c1b49fSNick Terrell }
3698e0c1b49fSNick Terrell
3699e0c1b49fSNick Terrell /* encode sequences and literals */
3700*2aa14b1aSNick Terrell cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore,
3701e0c1b49fSNick Terrell &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
3702e0c1b49fSNick Terrell &zc->appliedParams,
3703e0c1b49fSNick Terrell dst, dstCapacity,
3704e0c1b49fSNick Terrell srcSize,
3705e0c1b49fSNick Terrell zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
3706e0c1b49fSNick Terrell zc->bmi2);
3707e0c1b49fSNick Terrell
3708e0c1b49fSNick Terrell if (frame &&
3709e0c1b49fSNick Terrell /* We don't want to emit our first block as a RLE even if it qualifies because
3710e0c1b49fSNick Terrell * doing so will cause the decoder (cli only) to throw a "should consume all input error."
3711e0c1b49fSNick Terrell * This is only an issue for zstd <= v1.4.3
3712e0c1b49fSNick Terrell */
3713e0c1b49fSNick Terrell !zc->isFirstBlock &&
3714e0c1b49fSNick Terrell cSize < rleMaxLength &&
3715e0c1b49fSNick Terrell ZSTD_isRLE(ip, srcSize))
3716e0c1b49fSNick Terrell {
3717e0c1b49fSNick Terrell cSize = 1;
3718e0c1b49fSNick Terrell op[0] = ip[0];
3719e0c1b49fSNick Terrell }
3720e0c1b49fSNick Terrell
3721e0c1b49fSNick Terrell out:
3722e0c1b49fSNick Terrell if (!ZSTD_isError(cSize) && cSize > 1) {
3723*2aa14b1aSNick Terrell ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
3724e0c1b49fSNick Terrell }
3725e0c1b49fSNick Terrell /* We check that dictionaries have offset codes available for the first
3726e0c1b49fSNick Terrell * block. After the first block, the offcode table might not have large
3727e0c1b49fSNick Terrell * enough codes to represent the offsets in the data.
3728e0c1b49fSNick Terrell */
3729e0c1b49fSNick Terrell if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
3730e0c1b49fSNick Terrell zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
3731e0c1b49fSNick Terrell
3732e0c1b49fSNick Terrell return cSize;
3733e0c1b49fSNick Terrell }
3734e0c1b49fSNick Terrell
ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx * zc,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const size_t bss,U32 lastBlock)3735e0c1b49fSNick Terrell static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
3736e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
3737e0c1b49fSNick Terrell const void* src, size_t srcSize,
3738e0c1b49fSNick Terrell const size_t bss, U32 lastBlock)
3739e0c1b49fSNick Terrell {
3740e0c1b49fSNick Terrell DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");
3741e0c1b49fSNick Terrell if (bss == ZSTDbss_compress) {
3742e0c1b49fSNick Terrell if (/* We don't want to emit our first block as a RLE even if it qualifies because
3743e0c1b49fSNick Terrell * doing so will cause the decoder (cli only) to throw a "should consume all input error."
3744e0c1b49fSNick Terrell * This is only an issue for zstd <= v1.4.3
3745e0c1b49fSNick Terrell */
3746e0c1b49fSNick Terrell !zc->isFirstBlock &&
3747e0c1b49fSNick Terrell ZSTD_maybeRLE(&zc->seqStore) &&
3748e0c1b49fSNick Terrell ZSTD_isRLE((BYTE const*)src, srcSize))
3749e0c1b49fSNick Terrell {
3750e0c1b49fSNick Terrell return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock);
3751e0c1b49fSNick Terrell }
3752e0c1b49fSNick Terrell /* Attempt superblock compression.
3753e0c1b49fSNick Terrell *
3754e0c1b49fSNick Terrell * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the
3755e0c1b49fSNick Terrell * standard ZSTD_compressBound(). This is a problem, because even if we have
3756e0c1b49fSNick Terrell * space now, taking an extra byte now could cause us to run out of space later
3757e0c1b49fSNick Terrell * and violate ZSTD_compressBound().
3758e0c1b49fSNick Terrell *
3759e0c1b49fSNick Terrell * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize.
3760e0c1b49fSNick Terrell *
3761e0c1b49fSNick Terrell * In order to respect ZSTD_compressBound() we must attempt to emit a raw
3762e0c1b49fSNick Terrell * uncompressed block in these cases:
3763e0c1b49fSNick Terrell * * cSize == 0: Return code for an uncompressed block.
3764e0c1b49fSNick Terrell * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize).
3765e0c1b49fSNick Terrell * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of
3766e0c1b49fSNick Terrell * output space.
3767e0c1b49fSNick Terrell * * cSize >= blockBound(srcSize): We have expanded the block too much so
3768e0c1b49fSNick Terrell * emit an uncompressed block.
3769e0c1b49fSNick Terrell */
3770e0c1b49fSNick Terrell {
3771e0c1b49fSNick Terrell size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
3772e0c1b49fSNick Terrell if (cSize != ERROR(dstSize_tooSmall)) {
3773e0c1b49fSNick Terrell size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
3774e0c1b49fSNick Terrell FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
3775e0c1b49fSNick Terrell if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
3776*2aa14b1aSNick Terrell ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
3777e0c1b49fSNick Terrell return cSize;
3778e0c1b49fSNick Terrell }
3779e0c1b49fSNick Terrell }
3780e0c1b49fSNick Terrell }
3781e0c1b49fSNick Terrell }
3782e0c1b49fSNick Terrell
3783e0c1b49fSNick Terrell DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
3784e0c1b49fSNick Terrell /* Superblock compression failed, attempt to emit a single no compress block.
3785e0c1b49fSNick Terrell * The decoder will be able to stream this block since it is uncompressed.
3786e0c1b49fSNick Terrell */
3787e0c1b49fSNick Terrell return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
3788e0c1b49fSNick Terrell }
3789e0c1b49fSNick Terrell
ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx * zc,void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastBlock)3790e0c1b49fSNick Terrell static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc,
3791e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
3792e0c1b49fSNick Terrell const void* src, size_t srcSize,
3793e0c1b49fSNick Terrell U32 lastBlock)
3794e0c1b49fSNick Terrell {
3795e0c1b49fSNick Terrell size_t cSize = 0;
3796e0c1b49fSNick Terrell const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
3797e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",
3798e0c1b49fSNick Terrell (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize);
3799e0c1b49fSNick Terrell FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
3800e0c1b49fSNick Terrell
3801e0c1b49fSNick Terrell cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock);
3802e0c1b49fSNick Terrell FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed");
3803e0c1b49fSNick Terrell
3804e0c1b49fSNick Terrell if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
3805e0c1b49fSNick Terrell zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
3806e0c1b49fSNick Terrell
3807e0c1b49fSNick Terrell return cSize;
3808e0c1b49fSNick Terrell }
3809e0c1b49fSNick Terrell
ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t * ms,ZSTD_cwksp * ws,ZSTD_CCtx_params const * params,void const * ip,void const * iend)3810e0c1b49fSNick Terrell static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
3811e0c1b49fSNick Terrell ZSTD_cwksp* ws,
3812e0c1b49fSNick Terrell ZSTD_CCtx_params const* params,
3813e0c1b49fSNick Terrell void const* ip,
3814e0c1b49fSNick Terrell void const* iend)
3815e0c1b49fSNick Terrell {
3816e0c1b49fSNick Terrell U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
3817*2aa14b1aSNick Terrell U32 const maxDist = (U32)1 << params->cParams.windowLog;
3818*2aa14b1aSNick Terrell if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) {
3819e0c1b49fSNick Terrell U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
3820e0c1b49fSNick Terrell ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
3821e0c1b49fSNick Terrell ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
3822e0c1b49fSNick Terrell ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
3823e0c1b49fSNick Terrell ZSTD_cwksp_mark_tables_dirty(ws);
3824e0c1b49fSNick Terrell ZSTD_reduceIndex(ms, params, correction);
3825e0c1b49fSNick Terrell ZSTD_cwksp_mark_tables_clean(ws);
3826e0c1b49fSNick Terrell if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
3827e0c1b49fSNick Terrell else ms->nextToUpdate -= correction;
3828e0c1b49fSNick Terrell /* invalidate dictionaries on overflow correction */
3829e0c1b49fSNick Terrell ms->loadedDictEnd = 0;
3830e0c1b49fSNick Terrell ms->dictMatchState = NULL;
3831e0c1b49fSNick Terrell }
3832e0c1b49fSNick Terrell }
3833e0c1b49fSNick Terrell
3834e0c1b49fSNick Terrell /*! ZSTD_compress_frameChunk() :
3835e0c1b49fSNick Terrell * Compress a chunk of data into one or multiple blocks.
3836e0c1b49fSNick Terrell * All blocks will be terminated, all input will be consumed.
3837e0c1b49fSNick Terrell * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
3838e0c1b49fSNick Terrell * Frame is supposed already started (header already produced)
3839e0c1b49fSNick Terrell * @return : compressed size, or an error code
3840e0c1b49fSNick Terrell */
ZSTD_compress_frameChunk(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastFrameChunk)3841e0c1b49fSNick Terrell static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
3842e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
3843e0c1b49fSNick Terrell const void* src, size_t srcSize,
3844e0c1b49fSNick Terrell U32 lastFrameChunk)
3845e0c1b49fSNick Terrell {
3846e0c1b49fSNick Terrell size_t blockSize = cctx->blockSize;
3847e0c1b49fSNick Terrell size_t remaining = srcSize;
3848e0c1b49fSNick Terrell const BYTE* ip = (const BYTE*)src;
3849e0c1b49fSNick Terrell BYTE* const ostart = (BYTE*)dst;
3850e0c1b49fSNick Terrell BYTE* op = ostart;
3851e0c1b49fSNick Terrell U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
3852e0c1b49fSNick Terrell
3853e0c1b49fSNick Terrell assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
3854e0c1b49fSNick Terrell
3855e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
3856e0c1b49fSNick Terrell if (cctx->appliedParams.fParams.checksumFlag && srcSize)
3857e0c1b49fSNick Terrell xxh64_update(&cctx->xxhState, src, srcSize);
3858e0c1b49fSNick Terrell
3859e0c1b49fSNick Terrell while (remaining) {
3860e0c1b49fSNick Terrell ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
3861e0c1b49fSNick Terrell U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
3862e0c1b49fSNick Terrell
3863e0c1b49fSNick Terrell RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE,
3864e0c1b49fSNick Terrell dstSize_tooSmall,
3865e0c1b49fSNick Terrell "not enough space to store compressed block");
3866e0c1b49fSNick Terrell if (remaining < blockSize) blockSize = remaining;
3867e0c1b49fSNick Terrell
3868e0c1b49fSNick Terrell ZSTD_overflowCorrectIfNeeded(
3869e0c1b49fSNick Terrell ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
3870e0c1b49fSNick Terrell ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
3871*2aa14b1aSNick Terrell ZSTD_window_enforceMaxDist(&ms->window, ip, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
3872e0c1b49fSNick Terrell
3873e0c1b49fSNick Terrell /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
3874e0c1b49fSNick Terrell if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
3875e0c1b49fSNick Terrell
3876e0c1b49fSNick Terrell { size_t cSize;
3877e0c1b49fSNick Terrell if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) {
3878e0c1b49fSNick Terrell cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock);
3879e0c1b49fSNick Terrell FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
3880e0c1b49fSNick Terrell assert(cSize > 0);
3881e0c1b49fSNick Terrell assert(cSize <= blockSize + ZSTD_blockHeaderSize);
3882*2aa14b1aSNick Terrell } else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) {
3883*2aa14b1aSNick Terrell cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock);
3884*2aa14b1aSNick Terrell FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed");
3885*2aa14b1aSNick Terrell assert(cSize > 0 || cctx->seqCollector.collectSequences == 1);
3886e0c1b49fSNick Terrell } else {
3887e0c1b49fSNick Terrell cSize = ZSTD_compressBlock_internal(cctx,
3888e0c1b49fSNick Terrell op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
3889e0c1b49fSNick Terrell ip, blockSize, 1 /* frame */);
3890e0c1b49fSNick Terrell FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed");
3891e0c1b49fSNick Terrell
3892e0c1b49fSNick Terrell if (cSize == 0) { /* block is not compressible */
3893e0c1b49fSNick Terrell cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
3894e0c1b49fSNick Terrell FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
3895e0c1b49fSNick Terrell } else {
3896e0c1b49fSNick Terrell U32 const cBlockHeader = cSize == 1 ?
3897e0c1b49fSNick Terrell lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
3898e0c1b49fSNick Terrell lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
3899e0c1b49fSNick Terrell MEM_writeLE24(op, cBlockHeader);
3900e0c1b49fSNick Terrell cSize += ZSTD_blockHeaderSize;
3901e0c1b49fSNick Terrell }
3902e0c1b49fSNick Terrell }
3903e0c1b49fSNick Terrell
3904e0c1b49fSNick Terrell
3905e0c1b49fSNick Terrell ip += blockSize;
3906e0c1b49fSNick Terrell assert(remaining >= blockSize);
3907e0c1b49fSNick Terrell remaining -= blockSize;
3908e0c1b49fSNick Terrell op += cSize;
3909e0c1b49fSNick Terrell assert(dstCapacity >= cSize);
3910e0c1b49fSNick Terrell dstCapacity -= cSize;
3911e0c1b49fSNick Terrell cctx->isFirstBlock = 0;
3912e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
3913e0c1b49fSNick Terrell (unsigned)cSize);
3914e0c1b49fSNick Terrell } }
3915e0c1b49fSNick Terrell
3916e0c1b49fSNick Terrell if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
3917e0c1b49fSNick Terrell return (size_t)(op-ostart);
3918e0c1b49fSNick Terrell }
3919e0c1b49fSNick Terrell
3920e0c1b49fSNick Terrell
ZSTD_writeFrameHeader(void * dst,size_t dstCapacity,const ZSTD_CCtx_params * params,U64 pledgedSrcSize,U32 dictID)3921e0c1b49fSNick Terrell static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
3922e0c1b49fSNick Terrell const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
3923e0c1b49fSNick Terrell { BYTE* const op = (BYTE*)dst;
3924e0c1b49fSNick Terrell U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
3925e0c1b49fSNick Terrell U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */
3926e0c1b49fSNick Terrell U32 const checksumFlag = params->fParams.checksumFlag>0;
3927e0c1b49fSNick Terrell U32 const windowSize = (U32)1 << params->cParams.windowLog;
3928e0c1b49fSNick Terrell U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
3929e0c1b49fSNick Terrell BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
3930e0c1b49fSNick Terrell U32 const fcsCode = params->fParams.contentSizeFlag ?
3931e0c1b49fSNick Terrell (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */
3932e0c1b49fSNick Terrell BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
3933e0c1b49fSNick Terrell size_t pos=0;
3934e0c1b49fSNick Terrell
3935e0c1b49fSNick Terrell assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
3936e0c1b49fSNick Terrell RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall,
3937e0c1b49fSNick Terrell "dst buf is too small to fit worst-case frame header size.");
3938e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
3939e0c1b49fSNick Terrell !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
3940e0c1b49fSNick Terrell if (params->format == ZSTD_f_zstd1) {
3941e0c1b49fSNick Terrell MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
3942e0c1b49fSNick Terrell pos = 4;
3943e0c1b49fSNick Terrell }
3944e0c1b49fSNick Terrell op[pos++] = frameHeaderDescriptionByte;
3945e0c1b49fSNick Terrell if (!singleSegment) op[pos++] = windowLogByte;
3946e0c1b49fSNick Terrell switch(dictIDSizeCode)
3947e0c1b49fSNick Terrell {
3948e0c1b49fSNick Terrell default:
3949e0c1b49fSNick Terrell assert(0); /* impossible */
3950e0c1b49fSNick Terrell ZSTD_FALLTHROUGH;
3951e0c1b49fSNick Terrell case 0 : break;
3952e0c1b49fSNick Terrell case 1 : op[pos] = (BYTE)(dictID); pos++; break;
3953e0c1b49fSNick Terrell case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
3954e0c1b49fSNick Terrell case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
3955e0c1b49fSNick Terrell }
3956e0c1b49fSNick Terrell switch(fcsCode)
3957e0c1b49fSNick Terrell {
3958e0c1b49fSNick Terrell default:
3959e0c1b49fSNick Terrell assert(0); /* impossible */
3960e0c1b49fSNick Terrell ZSTD_FALLTHROUGH;
3961e0c1b49fSNick Terrell case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
3962e0c1b49fSNick Terrell case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
3963e0c1b49fSNick Terrell case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
3964e0c1b49fSNick Terrell case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
3965e0c1b49fSNick Terrell }
3966e0c1b49fSNick Terrell return pos;
3967e0c1b49fSNick Terrell }
3968e0c1b49fSNick Terrell
3969e0c1b49fSNick Terrell /* ZSTD_writeSkippableFrame_advanced() :
3970e0c1b49fSNick Terrell * Writes out a skippable frame with the specified magic number variant (16 are supported),
3971e0c1b49fSNick Terrell * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data.
3972e0c1b49fSNick Terrell *
3973e0c1b49fSNick Terrell * Returns the total number of bytes written, or a ZSTD error code.
3974e0c1b49fSNick Terrell */
ZSTD_writeSkippableFrame(void * dst,size_t dstCapacity,const void * src,size_t srcSize,unsigned magicVariant)3975e0c1b49fSNick Terrell size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
3976e0c1b49fSNick Terrell const void* src, size_t srcSize, unsigned magicVariant) {
3977e0c1b49fSNick Terrell BYTE* op = (BYTE*)dst;
3978e0c1b49fSNick Terrell RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */,
3979e0c1b49fSNick Terrell dstSize_tooSmall, "Not enough room for skippable frame");
3980e0c1b49fSNick Terrell RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame");
3981e0c1b49fSNick Terrell RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported");
3982e0c1b49fSNick Terrell
3983e0c1b49fSNick Terrell MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant));
3984e0c1b49fSNick Terrell MEM_writeLE32(op+4, (U32)srcSize);
3985e0c1b49fSNick Terrell ZSTD_memcpy(op+8, src, srcSize);
3986e0c1b49fSNick Terrell return srcSize + ZSTD_SKIPPABLEHEADERSIZE;
3987e0c1b49fSNick Terrell }
3988e0c1b49fSNick Terrell
3989e0c1b49fSNick Terrell /* ZSTD_writeLastEmptyBlock() :
3990e0c1b49fSNick Terrell * output an empty Block with end-of-frame mark to complete a frame
3991e0c1b49fSNick Terrell * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
3992e0c1b49fSNick Terrell * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
3993e0c1b49fSNick Terrell */
ZSTD_writeLastEmptyBlock(void * dst,size_t dstCapacity)3994e0c1b49fSNick Terrell size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
3995e0c1b49fSNick Terrell {
3996e0c1b49fSNick Terrell RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall,
3997e0c1b49fSNick Terrell "dst buf is too small to write frame trailer empty block.");
3998e0c1b49fSNick Terrell { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */
3999e0c1b49fSNick Terrell MEM_writeLE24(dst, cBlockHeader24);
4000e0c1b49fSNick Terrell return ZSTD_blockHeaderSize;
4001e0c1b49fSNick Terrell }
4002e0c1b49fSNick Terrell }
4003e0c1b49fSNick Terrell
ZSTD_referenceExternalSequences(ZSTD_CCtx * cctx,rawSeq * seq,size_t nbSeq)4004e0c1b49fSNick Terrell size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
4005e0c1b49fSNick Terrell {
4006e0c1b49fSNick Terrell RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
4007e0c1b49fSNick Terrell "wrong cctx stage");
4008*2aa14b1aSNick Terrell RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable,
4009e0c1b49fSNick Terrell parameter_unsupported,
4010e0c1b49fSNick Terrell "incompatible with ldm");
4011e0c1b49fSNick Terrell cctx->externSeqStore.seq = seq;
4012e0c1b49fSNick Terrell cctx->externSeqStore.size = nbSeq;
4013e0c1b49fSNick Terrell cctx->externSeqStore.capacity = nbSeq;
4014e0c1b49fSNick Terrell cctx->externSeqStore.pos = 0;
4015e0c1b49fSNick Terrell cctx->externSeqStore.posInSequence = 0;
4016e0c1b49fSNick Terrell return 0;
4017e0c1b49fSNick Terrell }
4018e0c1b49fSNick Terrell
4019e0c1b49fSNick Terrell
ZSTD_compressContinue_internal(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 frame,U32 lastFrameChunk)4020e0c1b49fSNick Terrell static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
4021e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
4022e0c1b49fSNick Terrell const void* src, size_t srcSize,
4023e0c1b49fSNick Terrell U32 frame, U32 lastFrameChunk)
4024e0c1b49fSNick Terrell {
4025e0c1b49fSNick Terrell ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
4026e0c1b49fSNick Terrell size_t fhSize = 0;
4027e0c1b49fSNick Terrell
4028e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
4029e0c1b49fSNick Terrell cctx->stage, (unsigned)srcSize);
4030e0c1b49fSNick Terrell RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong,
4031e0c1b49fSNick Terrell "missing init (ZSTD_compressBegin)");
4032e0c1b49fSNick Terrell
4033e0c1b49fSNick Terrell if (frame && (cctx->stage==ZSTDcs_init)) {
4034e0c1b49fSNick Terrell fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams,
4035e0c1b49fSNick Terrell cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
4036e0c1b49fSNick Terrell FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
4037e0c1b49fSNick Terrell assert(fhSize <= dstCapacity);
4038e0c1b49fSNick Terrell dstCapacity -= fhSize;
4039e0c1b49fSNick Terrell dst = (char*)dst + fhSize;
4040e0c1b49fSNick Terrell cctx->stage = ZSTDcs_ongoing;
4041e0c1b49fSNick Terrell }
4042e0c1b49fSNick Terrell
4043e0c1b49fSNick Terrell if (!srcSize) return fhSize; /* do not generate an empty block if no input */
4044e0c1b49fSNick Terrell
4045*2aa14b1aSNick Terrell if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) {
4046*2aa14b1aSNick Terrell ms->forceNonContiguous = 0;
4047e0c1b49fSNick Terrell ms->nextToUpdate = ms->window.dictLimit;
4048e0c1b49fSNick Terrell }
4049*2aa14b1aSNick Terrell if (cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
4050*2aa14b1aSNick Terrell ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0);
4051e0c1b49fSNick Terrell }
4052e0c1b49fSNick Terrell
4053e0c1b49fSNick Terrell if (!frame) {
4054e0c1b49fSNick Terrell /* overflow check and correction for block mode */
4055e0c1b49fSNick Terrell ZSTD_overflowCorrectIfNeeded(
4056e0c1b49fSNick Terrell ms, &cctx->workspace, &cctx->appliedParams,
4057e0c1b49fSNick Terrell src, (BYTE const*)src + srcSize);
4058e0c1b49fSNick Terrell }
4059e0c1b49fSNick Terrell
4060e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
4061e0c1b49fSNick Terrell { size_t const cSize = frame ?
4062e0c1b49fSNick Terrell ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
4063e0c1b49fSNick Terrell ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
4064e0c1b49fSNick Terrell FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");
4065e0c1b49fSNick Terrell cctx->consumedSrcSize += srcSize;
4066e0c1b49fSNick Terrell cctx->producedCSize += (cSize + fhSize);
4067e0c1b49fSNick Terrell assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
4068e0c1b49fSNick Terrell if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */
4069e0c1b49fSNick Terrell ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
4070e0c1b49fSNick Terrell RETURN_ERROR_IF(
4071e0c1b49fSNick Terrell cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne,
4072e0c1b49fSNick Terrell srcSize_wrong,
4073e0c1b49fSNick Terrell "error : pledgedSrcSize = %u, while realSrcSize >= %u",
4074e0c1b49fSNick Terrell (unsigned)cctx->pledgedSrcSizePlusOne-1,
4075e0c1b49fSNick Terrell (unsigned)cctx->consumedSrcSize);
4076e0c1b49fSNick Terrell }
4077e0c1b49fSNick Terrell return cSize + fhSize;
4078e0c1b49fSNick Terrell }
4079e0c1b49fSNick Terrell }
4080e0c1b49fSNick Terrell
ZSTD_compressContinue(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize)4081e0c1b49fSNick Terrell size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
4082e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
4083e0c1b49fSNick Terrell const void* src, size_t srcSize)
4084e0c1b49fSNick Terrell {
4085e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
4086e0c1b49fSNick Terrell return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
4087e0c1b49fSNick Terrell }
4088e0c1b49fSNick Terrell
4089e0c1b49fSNick Terrell
ZSTD_getBlockSize(const ZSTD_CCtx * cctx)4090e0c1b49fSNick Terrell size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
4091e0c1b49fSNick Terrell {
4092e0c1b49fSNick Terrell ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
4093e0c1b49fSNick Terrell assert(!ZSTD_checkCParams(cParams));
4094e0c1b49fSNick Terrell return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
4095e0c1b49fSNick Terrell }
4096e0c1b49fSNick Terrell
ZSTD_compressBlock(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize)4097e0c1b49fSNick Terrell size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
4098e0c1b49fSNick Terrell {
4099e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
4100e0c1b49fSNick Terrell { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
4101e0c1b49fSNick Terrell RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
4102e0c1b49fSNick Terrell
4103e0c1b49fSNick Terrell return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
4104e0c1b49fSNick Terrell }
4105e0c1b49fSNick Terrell
4106e0c1b49fSNick Terrell /*! ZSTD_loadDictionaryContent() :
4107e0c1b49fSNick Terrell * @return : 0, or an error code
4108e0c1b49fSNick Terrell */
ZSTD_loadDictionaryContent(ZSTD_matchState_t * ms,ldmState_t * ls,ZSTD_cwksp * ws,ZSTD_CCtx_params const * params,const void * src,size_t srcSize,ZSTD_dictTableLoadMethod_e dtlm)4109e0c1b49fSNick Terrell static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
4110e0c1b49fSNick Terrell ldmState_t* ls,
4111e0c1b49fSNick Terrell ZSTD_cwksp* ws,
4112e0c1b49fSNick Terrell ZSTD_CCtx_params const* params,
4113e0c1b49fSNick Terrell const void* src, size_t srcSize,
4114e0c1b49fSNick Terrell ZSTD_dictTableLoadMethod_e dtlm)
4115e0c1b49fSNick Terrell {
4116e0c1b49fSNick Terrell const BYTE* ip = (const BYTE*) src;
4117e0c1b49fSNick Terrell const BYTE* const iend = ip + srcSize;
4118*2aa14b1aSNick Terrell int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL;
4119e0c1b49fSNick Terrell
4120e0c1b49fSNick Terrell /* Assert that we the ms params match the params we're being given */
4121e0c1b49fSNick Terrell ZSTD_assertEqualCParams(params->cParams, ms->cParams);
4122e0c1b49fSNick Terrell
4123*2aa14b1aSNick Terrell if (srcSize > ZSTD_CHUNKSIZE_MAX) {
4124*2aa14b1aSNick Terrell /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
4125*2aa14b1aSNick Terrell * Dictionaries right at the edge will immediately trigger overflow
4126*2aa14b1aSNick Terrell * correction, but I don't want to insert extra constraints here.
4127*2aa14b1aSNick Terrell */
4128*2aa14b1aSNick Terrell U32 const maxDictSize = ZSTD_CURRENT_MAX - 1;
4129*2aa14b1aSNick Terrell /* We must have cleared our windows when our source is this large. */
4130*2aa14b1aSNick Terrell assert(ZSTD_window_isEmpty(ms->window));
4131*2aa14b1aSNick Terrell if (loadLdmDict)
4132*2aa14b1aSNick Terrell assert(ZSTD_window_isEmpty(ls->window));
4133*2aa14b1aSNick Terrell /* If the dictionary is too large, only load the suffix of the dictionary. */
4134*2aa14b1aSNick Terrell if (srcSize > maxDictSize) {
4135*2aa14b1aSNick Terrell ip = iend - maxDictSize;
4136*2aa14b1aSNick Terrell src = ip;
4137*2aa14b1aSNick Terrell srcSize = maxDictSize;
4138*2aa14b1aSNick Terrell }
4139*2aa14b1aSNick Terrell }
4140*2aa14b1aSNick Terrell
4141*2aa14b1aSNick Terrell DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
4142*2aa14b1aSNick Terrell ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
4143*2aa14b1aSNick Terrell ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
4144*2aa14b1aSNick Terrell ms->forceNonContiguous = params->deterministicRefPrefix;
4145*2aa14b1aSNick Terrell
4146*2aa14b1aSNick Terrell if (loadLdmDict) {
4147*2aa14b1aSNick Terrell ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);
4148*2aa14b1aSNick Terrell ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
4149*2aa14b1aSNick Terrell }
4150*2aa14b1aSNick Terrell
4151e0c1b49fSNick Terrell if (srcSize <= HASH_READ_SIZE) return 0;
4152e0c1b49fSNick Terrell
4153*2aa14b1aSNick Terrell ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);
4154e0c1b49fSNick Terrell
4155*2aa14b1aSNick Terrell if (loadLdmDict)
4156*2aa14b1aSNick Terrell ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams);
4157e0c1b49fSNick Terrell
4158e0c1b49fSNick Terrell switch(params->cParams.strategy)
4159e0c1b49fSNick Terrell {
4160e0c1b49fSNick Terrell case ZSTD_fast:
4161*2aa14b1aSNick Terrell ZSTD_fillHashTable(ms, iend, dtlm);
4162e0c1b49fSNick Terrell break;
4163e0c1b49fSNick Terrell case ZSTD_dfast:
4164*2aa14b1aSNick Terrell ZSTD_fillDoubleHashTable(ms, iend, dtlm);
4165e0c1b49fSNick Terrell break;
4166e0c1b49fSNick Terrell
4167e0c1b49fSNick Terrell case ZSTD_greedy:
4168e0c1b49fSNick Terrell case ZSTD_lazy:
4169e0c1b49fSNick Terrell case ZSTD_lazy2:
4170*2aa14b1aSNick Terrell assert(srcSize >= HASH_READ_SIZE);
4171*2aa14b1aSNick Terrell if (ms->dedicatedDictSearch) {
4172*2aa14b1aSNick Terrell assert(ms->chainTable != NULL);
4173*2aa14b1aSNick Terrell ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE);
4174*2aa14b1aSNick Terrell } else {
4175*2aa14b1aSNick Terrell assert(params->useRowMatchFinder != ZSTD_ps_auto);
4176*2aa14b1aSNick Terrell if (params->useRowMatchFinder == ZSTD_ps_enable) {
4177*2aa14b1aSNick Terrell size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
4178*2aa14b1aSNick Terrell ZSTD_memset(ms->tagTable, 0, tagTableSize);
4179*2aa14b1aSNick Terrell ZSTD_row_update(ms, iend-HASH_READ_SIZE);
4180*2aa14b1aSNick Terrell DEBUGLOG(4, "Using row-based hash table for lazy dict");
4181*2aa14b1aSNick Terrell } else {
4182*2aa14b1aSNick Terrell ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
4183*2aa14b1aSNick Terrell DEBUGLOG(4, "Using chain-based hash table for lazy dict");
4184*2aa14b1aSNick Terrell }
4185e0c1b49fSNick Terrell }
4186e0c1b49fSNick Terrell break;
4187e0c1b49fSNick Terrell
4188e0c1b49fSNick Terrell case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
4189e0c1b49fSNick Terrell case ZSTD_btopt:
4190e0c1b49fSNick Terrell case ZSTD_btultra:
4191e0c1b49fSNick Terrell case ZSTD_btultra2:
4192*2aa14b1aSNick Terrell assert(srcSize >= HASH_READ_SIZE);
4193*2aa14b1aSNick Terrell ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
4194e0c1b49fSNick Terrell break;
4195e0c1b49fSNick Terrell
4196e0c1b49fSNick Terrell default:
4197e0c1b49fSNick Terrell assert(0); /* not possible : not a valid strategy id */
4198e0c1b49fSNick Terrell }
4199e0c1b49fSNick Terrell
4200e0c1b49fSNick Terrell ms->nextToUpdate = (U32)(iend - ms->window.base);
4201e0c1b49fSNick Terrell return 0;
4202e0c1b49fSNick Terrell }
4203e0c1b49fSNick Terrell
4204e0c1b49fSNick Terrell
4205e0c1b49fSNick Terrell /* Dictionaries that assign zero probability to symbols that show up causes problems
4206e0c1b49fSNick Terrell * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check
4207e0c1b49fSNick Terrell * and only dictionaries with 100% valid symbols can be assumed valid.
4208e0c1b49fSNick Terrell */
ZSTD_dictNCountRepeat(short * normalizedCounter,unsigned dictMaxSymbolValue,unsigned maxSymbolValue)4209e0c1b49fSNick Terrell static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)
4210e0c1b49fSNick Terrell {
4211e0c1b49fSNick Terrell U32 s;
4212e0c1b49fSNick Terrell if (dictMaxSymbolValue < maxSymbolValue) {
4213e0c1b49fSNick Terrell return FSE_repeat_check;
4214e0c1b49fSNick Terrell }
4215e0c1b49fSNick Terrell for (s = 0; s <= maxSymbolValue; ++s) {
4216e0c1b49fSNick Terrell if (normalizedCounter[s] == 0) {
4217e0c1b49fSNick Terrell return FSE_repeat_check;
4218e0c1b49fSNick Terrell }
4219e0c1b49fSNick Terrell }
4220e0c1b49fSNick Terrell return FSE_repeat_valid;
4221e0c1b49fSNick Terrell }
4222e0c1b49fSNick Terrell
ZSTD_loadCEntropy(ZSTD_compressedBlockState_t * bs,void * workspace,const void * const dict,size_t dictSize)4223e0c1b49fSNick Terrell size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
4224e0c1b49fSNick Terrell const void* const dict, size_t dictSize)
4225e0c1b49fSNick Terrell {
4226e0c1b49fSNick Terrell short offcodeNCount[MaxOff+1];
4227e0c1b49fSNick Terrell unsigned offcodeMaxValue = MaxOff;
4228e0c1b49fSNick Terrell const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */
4229e0c1b49fSNick Terrell const BYTE* const dictEnd = dictPtr + dictSize;
4230e0c1b49fSNick Terrell dictPtr += 8;
4231e0c1b49fSNick Terrell bs->entropy.huf.repeatMode = HUF_repeat_check;
4232e0c1b49fSNick Terrell
4233e0c1b49fSNick Terrell { unsigned maxSymbolValue = 255;
4234e0c1b49fSNick Terrell unsigned hasZeroWeights = 1;
4235e0c1b49fSNick Terrell size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr,
4236e0c1b49fSNick Terrell dictEnd-dictPtr, &hasZeroWeights);
4237e0c1b49fSNick Terrell
4238e0c1b49fSNick Terrell /* We only set the loaded table as valid if it contains all non-zero
4239e0c1b49fSNick Terrell * weights. Otherwise, we set it to check */
4240e0c1b49fSNick Terrell if (!hasZeroWeights)
4241e0c1b49fSNick Terrell bs->entropy.huf.repeatMode = HUF_repeat_valid;
4242e0c1b49fSNick Terrell
4243e0c1b49fSNick Terrell RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
4244e0c1b49fSNick Terrell RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
4245e0c1b49fSNick Terrell dictPtr += hufHeaderSize;
4246e0c1b49fSNick Terrell }
4247e0c1b49fSNick Terrell
4248e0c1b49fSNick Terrell { unsigned offcodeLog;
4249e0c1b49fSNick Terrell size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
4250e0c1b49fSNick Terrell RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
4251e0c1b49fSNick Terrell RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
4252e0c1b49fSNick Terrell /* fill all offset symbols to avoid garbage at end of table */
4253e0c1b49fSNick Terrell RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
4254e0c1b49fSNick Terrell bs->entropy.fse.offcodeCTable,
4255e0c1b49fSNick Terrell offcodeNCount, MaxOff, offcodeLog,
4256e0c1b49fSNick Terrell workspace, HUF_WORKSPACE_SIZE)),
4257e0c1b49fSNick Terrell dictionary_corrupted, "");
4258e0c1b49fSNick Terrell /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
4259e0c1b49fSNick Terrell dictPtr += offcodeHeaderSize;
4260e0c1b49fSNick Terrell }
4261e0c1b49fSNick Terrell
4262e0c1b49fSNick Terrell { short matchlengthNCount[MaxML+1];
4263e0c1b49fSNick Terrell unsigned matchlengthMaxValue = MaxML, matchlengthLog;
4264e0c1b49fSNick Terrell size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
4265e0c1b49fSNick Terrell RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
4266e0c1b49fSNick Terrell RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
4267e0c1b49fSNick Terrell RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
4268e0c1b49fSNick Terrell bs->entropy.fse.matchlengthCTable,
4269e0c1b49fSNick Terrell matchlengthNCount, matchlengthMaxValue, matchlengthLog,
4270e0c1b49fSNick Terrell workspace, HUF_WORKSPACE_SIZE)),
4271e0c1b49fSNick Terrell dictionary_corrupted, "");
4272e0c1b49fSNick Terrell bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML);
4273e0c1b49fSNick Terrell dictPtr += matchlengthHeaderSize;
4274e0c1b49fSNick Terrell }
4275e0c1b49fSNick Terrell
4276e0c1b49fSNick Terrell { short litlengthNCount[MaxLL+1];
4277e0c1b49fSNick Terrell unsigned litlengthMaxValue = MaxLL, litlengthLog;
4278e0c1b49fSNick Terrell size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
4279e0c1b49fSNick Terrell RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
4280e0c1b49fSNick Terrell RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
4281e0c1b49fSNick Terrell RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
4282e0c1b49fSNick Terrell bs->entropy.fse.litlengthCTable,
4283e0c1b49fSNick Terrell litlengthNCount, litlengthMaxValue, litlengthLog,
4284e0c1b49fSNick Terrell workspace, HUF_WORKSPACE_SIZE)),
4285e0c1b49fSNick Terrell dictionary_corrupted, "");
4286e0c1b49fSNick Terrell bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL);
4287e0c1b49fSNick Terrell dictPtr += litlengthHeaderSize;
4288e0c1b49fSNick Terrell }
4289e0c1b49fSNick Terrell
4290e0c1b49fSNick Terrell RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
4291e0c1b49fSNick Terrell bs->rep[0] = MEM_readLE32(dictPtr+0);
4292e0c1b49fSNick Terrell bs->rep[1] = MEM_readLE32(dictPtr+4);
4293e0c1b49fSNick Terrell bs->rep[2] = MEM_readLE32(dictPtr+8);
4294e0c1b49fSNick Terrell dictPtr += 12;
4295e0c1b49fSNick Terrell
4296e0c1b49fSNick Terrell { size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
4297e0c1b49fSNick Terrell U32 offcodeMax = MaxOff;
4298e0c1b49fSNick Terrell if (dictContentSize <= ((U32)-1) - 128 KB) {
4299e0c1b49fSNick Terrell U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
4300e0c1b49fSNick Terrell offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
4301e0c1b49fSNick Terrell }
4302e0c1b49fSNick Terrell /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */
4303e0c1b49fSNick Terrell bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff));
4304e0c1b49fSNick Terrell
4305e0c1b49fSNick Terrell /* All repCodes must be <= dictContentSize and != 0 */
4306e0c1b49fSNick Terrell { U32 u;
4307e0c1b49fSNick Terrell for (u=0; u<3; u++) {
4308e0c1b49fSNick Terrell RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
4309e0c1b49fSNick Terrell RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
4310e0c1b49fSNick Terrell } } }
4311e0c1b49fSNick Terrell
4312e0c1b49fSNick Terrell return dictPtr - (const BYTE*)dict;
4313e0c1b49fSNick Terrell }
4314e0c1b49fSNick Terrell
4315e0c1b49fSNick Terrell /* Dictionary format :
4316e0c1b49fSNick Terrell * See :
4317e0c1b49fSNick Terrell * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format
4318e0c1b49fSNick Terrell */
4319e0c1b49fSNick Terrell /*! ZSTD_loadZstdDictionary() :
4320e0c1b49fSNick Terrell * @return : dictID, or an error code
4321e0c1b49fSNick Terrell * assumptions : magic number supposed already checked
4322e0c1b49fSNick Terrell * dictSize supposed >= 8
4323e0c1b49fSNick Terrell */
ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t * bs,ZSTD_matchState_t * ms,ZSTD_cwksp * ws,ZSTD_CCtx_params const * params,const void * dict,size_t dictSize,ZSTD_dictTableLoadMethod_e dtlm,void * workspace)4324e0c1b49fSNick Terrell static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
4325e0c1b49fSNick Terrell ZSTD_matchState_t* ms,
4326e0c1b49fSNick Terrell ZSTD_cwksp* ws,
4327e0c1b49fSNick Terrell ZSTD_CCtx_params const* params,
4328e0c1b49fSNick Terrell const void* dict, size_t dictSize,
4329e0c1b49fSNick Terrell ZSTD_dictTableLoadMethod_e dtlm,
4330e0c1b49fSNick Terrell void* workspace)
4331e0c1b49fSNick Terrell {
4332e0c1b49fSNick Terrell const BYTE* dictPtr = (const BYTE*)dict;
4333e0c1b49fSNick Terrell const BYTE* const dictEnd = dictPtr + dictSize;
4334e0c1b49fSNick Terrell size_t dictID;
4335e0c1b49fSNick Terrell size_t eSize;
4336e0c1b49fSNick Terrell ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
4337e0c1b49fSNick Terrell assert(dictSize >= 8);
4338e0c1b49fSNick Terrell assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
4339e0c1b49fSNick Terrell
4340e0c1b49fSNick Terrell dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ );
4341e0c1b49fSNick Terrell eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize);
4342e0c1b49fSNick Terrell FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed");
4343e0c1b49fSNick Terrell dictPtr += eSize;
4344e0c1b49fSNick Terrell
4345e0c1b49fSNick Terrell {
4346e0c1b49fSNick Terrell size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
4347e0c1b49fSNick Terrell FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
4348e0c1b49fSNick Terrell ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
4349e0c1b49fSNick Terrell }
4350e0c1b49fSNick Terrell return dictID;
4351e0c1b49fSNick Terrell }
4352e0c1b49fSNick Terrell
4353e0c1b49fSNick Terrell /* ZSTD_compress_insertDictionary() :
4354e0c1b49fSNick Terrell * @return : dictID, or an error code */
4355e0c1b49fSNick Terrell static size_t
ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t * bs,ZSTD_matchState_t * ms,ldmState_t * ls,ZSTD_cwksp * ws,const ZSTD_CCtx_params * params,const void * dict,size_t dictSize,ZSTD_dictContentType_e dictContentType,ZSTD_dictTableLoadMethod_e dtlm,void * workspace)4356e0c1b49fSNick Terrell ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
4357e0c1b49fSNick Terrell ZSTD_matchState_t* ms,
4358e0c1b49fSNick Terrell ldmState_t* ls,
4359e0c1b49fSNick Terrell ZSTD_cwksp* ws,
4360e0c1b49fSNick Terrell const ZSTD_CCtx_params* params,
4361e0c1b49fSNick Terrell const void* dict, size_t dictSize,
4362e0c1b49fSNick Terrell ZSTD_dictContentType_e dictContentType,
4363e0c1b49fSNick Terrell ZSTD_dictTableLoadMethod_e dtlm,
4364e0c1b49fSNick Terrell void* workspace)
4365e0c1b49fSNick Terrell {
4366e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
4367e0c1b49fSNick Terrell if ((dict==NULL) || (dictSize<8)) {
4368e0c1b49fSNick Terrell RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
4369e0c1b49fSNick Terrell return 0;
4370e0c1b49fSNick Terrell }
4371e0c1b49fSNick Terrell
4372e0c1b49fSNick Terrell ZSTD_reset_compressedBlockState(bs);
4373e0c1b49fSNick Terrell
4374e0c1b49fSNick Terrell /* dict restricted modes */
4375e0c1b49fSNick Terrell if (dictContentType == ZSTD_dct_rawContent)
4376e0c1b49fSNick Terrell return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
4377e0c1b49fSNick Terrell
4378e0c1b49fSNick Terrell if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
4379e0c1b49fSNick Terrell if (dictContentType == ZSTD_dct_auto) {
4380e0c1b49fSNick Terrell DEBUGLOG(4, "raw content dictionary detected");
4381e0c1b49fSNick Terrell return ZSTD_loadDictionaryContent(
4382e0c1b49fSNick Terrell ms, ls, ws, params, dict, dictSize, dtlm);
4383e0c1b49fSNick Terrell }
4384e0c1b49fSNick Terrell RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
4385e0c1b49fSNick Terrell assert(0); /* impossible */
4386e0c1b49fSNick Terrell }
4387e0c1b49fSNick Terrell
4388e0c1b49fSNick Terrell /* dict as full zstd dictionary */
4389e0c1b49fSNick Terrell return ZSTD_loadZstdDictionary(
4390e0c1b49fSNick Terrell bs, ms, ws, params, dict, dictSize, dtlm, workspace);
4391e0c1b49fSNick Terrell }
4392e0c1b49fSNick Terrell
4393e0c1b49fSNick Terrell #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
4394e0c1b49fSNick Terrell #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
4395e0c1b49fSNick Terrell
4396e0c1b49fSNick Terrell /*! ZSTD_compressBegin_internal() :
4397e0c1b49fSNick Terrell * @return : 0, or an error code */
ZSTD_compressBegin_internal(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,ZSTD_dictContentType_e dictContentType,ZSTD_dictTableLoadMethod_e dtlm,const ZSTD_CDict * cdict,const ZSTD_CCtx_params * params,U64 pledgedSrcSize,ZSTD_buffered_policy_e zbuff)4398e0c1b49fSNick Terrell static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
4399e0c1b49fSNick Terrell const void* dict, size_t dictSize,
4400e0c1b49fSNick Terrell ZSTD_dictContentType_e dictContentType,
4401e0c1b49fSNick Terrell ZSTD_dictTableLoadMethod_e dtlm,
4402e0c1b49fSNick Terrell const ZSTD_CDict* cdict,
4403e0c1b49fSNick Terrell const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
4404e0c1b49fSNick Terrell ZSTD_buffered_policy_e zbuff)
4405e0c1b49fSNick Terrell {
4406*2aa14b1aSNick Terrell size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize;
4407e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
4408e0c1b49fSNick Terrell /* params are supposed to be fully validated at this point */
4409e0c1b49fSNick Terrell assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
4410e0c1b49fSNick Terrell assert(!((dict) && (cdict))); /* either dict or cdict, not both */
4411e0c1b49fSNick Terrell if ( (cdict)
4412e0c1b49fSNick Terrell && (cdict->dictContentSize > 0)
4413e0c1b49fSNick Terrell && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
4414e0c1b49fSNick Terrell || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
4415e0c1b49fSNick Terrell || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
4416e0c1b49fSNick Terrell || cdict->compressionLevel == 0)
4417e0c1b49fSNick Terrell && (params->attachDictPref != ZSTD_dictForceLoad) ) {
4418e0c1b49fSNick Terrell return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
4419e0c1b49fSNick Terrell }
4420e0c1b49fSNick Terrell
4421*2aa14b1aSNick Terrell FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
4422*2aa14b1aSNick Terrell dictContentSize,
4423e0c1b49fSNick Terrell ZSTDcrp_makeClean, zbuff) , "");
4424e0c1b49fSNick Terrell { size_t const dictID = cdict ?
4425e0c1b49fSNick Terrell ZSTD_compress_insertDictionary(
4426e0c1b49fSNick Terrell cctx->blockState.prevCBlock, &cctx->blockState.matchState,
4427e0c1b49fSNick Terrell &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
4428e0c1b49fSNick Terrell cdict->dictContentSize, cdict->dictContentType, dtlm,
4429e0c1b49fSNick Terrell cctx->entropyWorkspace)
4430e0c1b49fSNick Terrell : ZSTD_compress_insertDictionary(
4431e0c1b49fSNick Terrell cctx->blockState.prevCBlock, &cctx->blockState.matchState,
4432e0c1b49fSNick Terrell &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
4433e0c1b49fSNick Terrell dictContentType, dtlm, cctx->entropyWorkspace);
4434e0c1b49fSNick Terrell FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
4435e0c1b49fSNick Terrell assert(dictID <= UINT_MAX);
4436e0c1b49fSNick Terrell cctx->dictID = (U32)dictID;
4437*2aa14b1aSNick Terrell cctx->dictContentSize = dictContentSize;
4438e0c1b49fSNick Terrell }
4439e0c1b49fSNick Terrell return 0;
4440e0c1b49fSNick Terrell }
4441e0c1b49fSNick Terrell
ZSTD_compressBegin_advanced_internal(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,ZSTD_dictContentType_e dictContentType,ZSTD_dictTableLoadMethod_e dtlm,const ZSTD_CDict * cdict,const ZSTD_CCtx_params * params,unsigned long long pledgedSrcSize)4442e0c1b49fSNick Terrell size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
4443e0c1b49fSNick Terrell const void* dict, size_t dictSize,
4444e0c1b49fSNick Terrell ZSTD_dictContentType_e dictContentType,
4445e0c1b49fSNick Terrell ZSTD_dictTableLoadMethod_e dtlm,
4446e0c1b49fSNick Terrell const ZSTD_CDict* cdict,
4447e0c1b49fSNick Terrell const ZSTD_CCtx_params* params,
4448e0c1b49fSNick Terrell unsigned long long pledgedSrcSize)
4449e0c1b49fSNick Terrell {
4450e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);
4451e0c1b49fSNick Terrell /* compression parameters verification and optimization */
4452e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , "");
4453e0c1b49fSNick Terrell return ZSTD_compressBegin_internal(cctx,
4454e0c1b49fSNick Terrell dict, dictSize, dictContentType, dtlm,
4455e0c1b49fSNick Terrell cdict,
4456e0c1b49fSNick Terrell params, pledgedSrcSize,
4457e0c1b49fSNick Terrell ZSTDb_not_buffered);
4458e0c1b49fSNick Terrell }
4459e0c1b49fSNick Terrell
4460e0c1b49fSNick Terrell /*! ZSTD_compressBegin_advanced() :
4461e0c1b49fSNick Terrell * @return : 0, or an error code */
ZSTD_compressBegin_advanced(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,ZSTD_parameters params,unsigned long long pledgedSrcSize)4462e0c1b49fSNick Terrell size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
4463e0c1b49fSNick Terrell const void* dict, size_t dictSize,
4464e0c1b49fSNick Terrell ZSTD_parameters params, unsigned long long pledgedSrcSize)
4465e0c1b49fSNick Terrell {
4466e0c1b49fSNick Terrell ZSTD_CCtx_params cctxParams;
4467e0c1b49fSNick Terrell ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL);
4468e0c1b49fSNick Terrell return ZSTD_compressBegin_advanced_internal(cctx,
4469e0c1b49fSNick Terrell dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
4470e0c1b49fSNick Terrell NULL /*cdict*/,
4471e0c1b49fSNick Terrell &cctxParams, pledgedSrcSize);
4472e0c1b49fSNick Terrell }
4473e0c1b49fSNick Terrell
ZSTD_compressBegin_usingDict(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,int compressionLevel)4474e0c1b49fSNick Terrell size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
4475e0c1b49fSNick Terrell {
4476e0c1b49fSNick Terrell ZSTD_CCtx_params cctxParams;
4477e0c1b49fSNick Terrell {
4478e0c1b49fSNick Terrell ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
4479e0c1b49fSNick Terrell ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel);
4480e0c1b49fSNick Terrell }
4481e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
4482e0c1b49fSNick Terrell return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
4483e0c1b49fSNick Terrell &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
4484e0c1b49fSNick Terrell }
4485e0c1b49fSNick Terrell
ZSTD_compressBegin(ZSTD_CCtx * cctx,int compressionLevel)4486e0c1b49fSNick Terrell size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
4487e0c1b49fSNick Terrell {
4488e0c1b49fSNick Terrell return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
4489e0c1b49fSNick Terrell }
4490e0c1b49fSNick Terrell
4491e0c1b49fSNick Terrell
4492e0c1b49fSNick Terrell /*! ZSTD_writeEpilogue() :
4493e0c1b49fSNick Terrell * Ends a frame.
4494e0c1b49fSNick Terrell * @return : nb of bytes written into dst (or an error code) */
ZSTD_writeEpilogue(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity)4495e0c1b49fSNick Terrell static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
4496e0c1b49fSNick Terrell {
4497e0c1b49fSNick Terrell BYTE* const ostart = (BYTE*)dst;
4498e0c1b49fSNick Terrell BYTE* op = ostart;
4499e0c1b49fSNick Terrell size_t fhSize = 0;
4500e0c1b49fSNick Terrell
4501e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_writeEpilogue");
4502e0c1b49fSNick Terrell RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
4503e0c1b49fSNick Terrell
4504e0c1b49fSNick Terrell /* special case : empty frame */
4505e0c1b49fSNick Terrell if (cctx->stage == ZSTDcs_init) {
4506e0c1b49fSNick Terrell fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
4507e0c1b49fSNick Terrell FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
4508e0c1b49fSNick Terrell dstCapacity -= fhSize;
4509e0c1b49fSNick Terrell op += fhSize;
4510e0c1b49fSNick Terrell cctx->stage = ZSTDcs_ongoing;
4511e0c1b49fSNick Terrell }
4512e0c1b49fSNick Terrell
4513e0c1b49fSNick Terrell if (cctx->stage != ZSTDcs_ending) {
4514e0c1b49fSNick Terrell /* write one last empty block, make it the "last" block */
4515e0c1b49fSNick Terrell U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
4516e0c1b49fSNick Terrell RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
4517e0c1b49fSNick Terrell MEM_writeLE32(op, cBlockHeader24);
4518e0c1b49fSNick Terrell op += ZSTD_blockHeaderSize;
4519e0c1b49fSNick Terrell dstCapacity -= ZSTD_blockHeaderSize;
4520e0c1b49fSNick Terrell }
4521e0c1b49fSNick Terrell
4522e0c1b49fSNick Terrell if (cctx->appliedParams.fParams.checksumFlag) {
4523e0c1b49fSNick Terrell U32 const checksum = (U32) xxh64_digest(&cctx->xxhState);
4524e0c1b49fSNick Terrell RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
4525e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);
4526e0c1b49fSNick Terrell MEM_writeLE32(op, checksum);
4527e0c1b49fSNick Terrell op += 4;
4528e0c1b49fSNick Terrell }
4529e0c1b49fSNick Terrell
4530e0c1b49fSNick Terrell cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
4531e0c1b49fSNick Terrell return op-ostart;
4532e0c1b49fSNick Terrell }
4533e0c1b49fSNick Terrell
ZSTD_CCtx_trace(ZSTD_CCtx * cctx,size_t extraCSize)4534e0c1b49fSNick Terrell void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
4535e0c1b49fSNick Terrell {
4536e0c1b49fSNick Terrell (void)cctx;
4537e0c1b49fSNick Terrell (void)extraCSize;
4538e0c1b49fSNick Terrell }
4539e0c1b49fSNick Terrell
ZSTD_compressEnd(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize)4540e0c1b49fSNick Terrell size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
4541e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
4542e0c1b49fSNick Terrell const void* src, size_t srcSize)
4543e0c1b49fSNick Terrell {
4544e0c1b49fSNick Terrell size_t endResult;
4545e0c1b49fSNick Terrell size_t const cSize = ZSTD_compressContinue_internal(cctx,
4546e0c1b49fSNick Terrell dst, dstCapacity, src, srcSize,
4547e0c1b49fSNick Terrell 1 /* frame mode */, 1 /* last chunk */);
4548e0c1b49fSNick Terrell FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed");
4549e0c1b49fSNick Terrell endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
4550e0c1b49fSNick Terrell FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed");
4551e0c1b49fSNick Terrell assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
4552e0c1b49fSNick Terrell if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */
4553e0c1b49fSNick Terrell ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
4554e0c1b49fSNick Terrell DEBUGLOG(4, "end of frame : controlling src size");
4555e0c1b49fSNick Terrell RETURN_ERROR_IF(
4556e0c1b49fSNick Terrell cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1,
4557e0c1b49fSNick Terrell srcSize_wrong,
4558e0c1b49fSNick Terrell "error : pledgedSrcSize = %u, while realSrcSize = %u",
4559e0c1b49fSNick Terrell (unsigned)cctx->pledgedSrcSizePlusOne-1,
4560e0c1b49fSNick Terrell (unsigned)cctx->consumedSrcSize);
4561e0c1b49fSNick Terrell }
4562e0c1b49fSNick Terrell ZSTD_CCtx_trace(cctx, endResult);
4563e0c1b49fSNick Terrell return cSize + endResult;
4564e0c1b49fSNick Terrell }
4565e0c1b49fSNick Terrell
ZSTD_compress_advanced(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,ZSTD_parameters params)4566e0c1b49fSNick Terrell size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
4567e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
4568e0c1b49fSNick Terrell const void* src, size_t srcSize,
4569e0c1b49fSNick Terrell const void* dict,size_t dictSize,
4570e0c1b49fSNick Terrell ZSTD_parameters params)
4571e0c1b49fSNick Terrell {
4572e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_compress_advanced");
4573e0c1b49fSNick Terrell FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
4574*2aa14b1aSNick Terrell ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, ZSTD_NO_CLEVEL);
4575e0c1b49fSNick Terrell return ZSTD_compress_advanced_internal(cctx,
4576e0c1b49fSNick Terrell dst, dstCapacity,
4577e0c1b49fSNick Terrell src, srcSize,
4578e0c1b49fSNick Terrell dict, dictSize,
4579*2aa14b1aSNick Terrell &cctx->simpleApiParams);
4580e0c1b49fSNick Terrell }
4581e0c1b49fSNick Terrell
4582e0c1b49fSNick Terrell /* Internal */
ZSTD_compress_advanced_internal(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,const ZSTD_CCtx_params * params)4583e0c1b49fSNick Terrell size_t ZSTD_compress_advanced_internal(
4584e0c1b49fSNick Terrell ZSTD_CCtx* cctx,
4585e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
4586e0c1b49fSNick Terrell const void* src, size_t srcSize,
4587e0c1b49fSNick Terrell const void* dict,size_t dictSize,
4588e0c1b49fSNick Terrell const ZSTD_CCtx_params* params)
4589e0c1b49fSNick Terrell {
4590e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);
4591e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
4592e0c1b49fSNick Terrell dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
4593e0c1b49fSNick Terrell params, srcSize, ZSTDb_not_buffered) , "");
4594e0c1b49fSNick Terrell return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
4595e0c1b49fSNick Terrell }
4596e0c1b49fSNick Terrell
ZSTD_compress_usingDict(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,int compressionLevel)4597e0c1b49fSNick Terrell size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
4598e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
4599e0c1b49fSNick Terrell const void* src, size_t srcSize,
4600e0c1b49fSNick Terrell const void* dict, size_t dictSize,
4601e0c1b49fSNick Terrell int compressionLevel)
4602e0c1b49fSNick Terrell {
4603e0c1b49fSNick Terrell {
4604e0c1b49fSNick Terrell ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
4605e0c1b49fSNick Terrell assert(params.fParams.contentSizeFlag == 1);
4606*2aa14b1aSNick Terrell ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);
4607e0c1b49fSNick Terrell }
4608e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
4609*2aa14b1aSNick Terrell return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams);
4610e0c1b49fSNick Terrell }
4611e0c1b49fSNick Terrell
ZSTD_compressCCtx(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,int compressionLevel)4612e0c1b49fSNick Terrell size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
4613e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
4614e0c1b49fSNick Terrell const void* src, size_t srcSize,
4615e0c1b49fSNick Terrell int compressionLevel)
4616e0c1b49fSNick Terrell {
4617e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize);
4618e0c1b49fSNick Terrell assert(cctx != NULL);
4619e0c1b49fSNick Terrell return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
4620e0c1b49fSNick Terrell }
4621e0c1b49fSNick Terrell
ZSTD_compress(void * dst,size_t dstCapacity,const void * src,size_t srcSize,int compressionLevel)4622e0c1b49fSNick Terrell size_t ZSTD_compress(void* dst, size_t dstCapacity,
4623e0c1b49fSNick Terrell const void* src, size_t srcSize,
4624e0c1b49fSNick Terrell int compressionLevel)
4625e0c1b49fSNick Terrell {
4626e0c1b49fSNick Terrell size_t result;
4627e0c1b49fSNick Terrell ZSTD_CCtx* cctx = ZSTD_createCCtx();
4628e0c1b49fSNick Terrell RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed");
4629e0c1b49fSNick Terrell result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
4630e0c1b49fSNick Terrell ZSTD_freeCCtx(cctx);
4631e0c1b49fSNick Terrell return result;
4632e0c1b49fSNick Terrell }
4633e0c1b49fSNick Terrell
4634e0c1b49fSNick Terrell
4635e0c1b49fSNick Terrell /* ===== Dictionary API ===== */
4636e0c1b49fSNick Terrell
4637e0c1b49fSNick Terrell /*! ZSTD_estimateCDictSize_advanced() :
4638e0c1b49fSNick Terrell * Estimate amount of memory that will be needed to create a dictionary with following arguments */
ZSTD_estimateCDictSize_advanced(size_t dictSize,ZSTD_compressionParameters cParams,ZSTD_dictLoadMethod_e dictLoadMethod)4639e0c1b49fSNick Terrell size_t ZSTD_estimateCDictSize_advanced(
4640e0c1b49fSNick Terrell size_t dictSize, ZSTD_compressionParameters cParams,
4641e0c1b49fSNick Terrell ZSTD_dictLoadMethod_e dictLoadMethod)
4642e0c1b49fSNick Terrell {
4643e0c1b49fSNick Terrell DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
4644e0c1b49fSNick Terrell return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
4645e0c1b49fSNick Terrell + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
4646*2aa14b1aSNick Terrell /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small
4647*2aa14b1aSNick Terrell * in case we are using DDS with row-hash. */
4648*2aa14b1aSNick Terrell + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams),
4649*2aa14b1aSNick Terrell /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0)
4650e0c1b49fSNick Terrell + (dictLoadMethod == ZSTD_dlm_byRef ? 0
4651e0c1b49fSNick Terrell : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
4652e0c1b49fSNick Terrell }
4653e0c1b49fSNick Terrell
ZSTD_estimateCDictSize(size_t dictSize,int compressionLevel)4654e0c1b49fSNick Terrell size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
4655e0c1b49fSNick Terrell {
4656e0c1b49fSNick Terrell ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
4657e0c1b49fSNick Terrell return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
4658e0c1b49fSNick Terrell }
4659e0c1b49fSNick Terrell
ZSTD_sizeof_CDict(const ZSTD_CDict * cdict)4660e0c1b49fSNick Terrell size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
4661e0c1b49fSNick Terrell {
4662e0c1b49fSNick Terrell if (cdict==NULL) return 0; /* support sizeof on NULL */
4663e0c1b49fSNick Terrell DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));
4664e0c1b49fSNick Terrell /* cdict may be in the workspace */
4665e0c1b49fSNick Terrell return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict))
4666e0c1b49fSNick Terrell + ZSTD_cwksp_sizeof(&cdict->workspace);
4667e0c1b49fSNick Terrell }
4668e0c1b49fSNick Terrell
ZSTD_initCDict_internal(ZSTD_CDict * cdict,const void * dictBuffer,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_CCtx_params params)4669e0c1b49fSNick Terrell static size_t ZSTD_initCDict_internal(
4670e0c1b49fSNick Terrell ZSTD_CDict* cdict,
4671e0c1b49fSNick Terrell const void* dictBuffer, size_t dictSize,
4672e0c1b49fSNick Terrell ZSTD_dictLoadMethod_e dictLoadMethod,
4673e0c1b49fSNick Terrell ZSTD_dictContentType_e dictContentType,
4674e0c1b49fSNick Terrell ZSTD_CCtx_params params)
4675e0c1b49fSNick Terrell {
4676e0c1b49fSNick Terrell DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);
4677e0c1b49fSNick Terrell assert(!ZSTD_checkCParams(params.cParams));
4678e0c1b49fSNick Terrell cdict->matchState.cParams = params.cParams;
4679e0c1b49fSNick Terrell cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;
4680e0c1b49fSNick Terrell if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
4681e0c1b49fSNick Terrell cdict->dictContent = dictBuffer;
4682e0c1b49fSNick Terrell } else {
4683e0c1b49fSNick Terrell void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));
4684e0c1b49fSNick Terrell RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!");
4685e0c1b49fSNick Terrell cdict->dictContent = internalBuffer;
4686e0c1b49fSNick Terrell ZSTD_memcpy(internalBuffer, dictBuffer, dictSize);
4687e0c1b49fSNick Terrell }
4688e0c1b49fSNick Terrell cdict->dictContentSize = dictSize;
4689e0c1b49fSNick Terrell cdict->dictContentType = dictContentType;
4690e0c1b49fSNick Terrell
4691e0c1b49fSNick Terrell cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
4692e0c1b49fSNick Terrell
4693e0c1b49fSNick Terrell
4694e0c1b49fSNick Terrell /* Reset the state to no dictionary */
4695e0c1b49fSNick Terrell ZSTD_reset_compressedBlockState(&cdict->cBlockState);
4696e0c1b49fSNick Terrell FORWARD_IF_ERROR(ZSTD_reset_matchState(
4697e0c1b49fSNick Terrell &cdict->matchState,
4698e0c1b49fSNick Terrell &cdict->workspace,
4699e0c1b49fSNick Terrell ¶ms.cParams,
4700*2aa14b1aSNick Terrell params.useRowMatchFinder,
4701e0c1b49fSNick Terrell ZSTDcrp_makeClean,
4702e0c1b49fSNick Terrell ZSTDirp_reset,
4703e0c1b49fSNick Terrell ZSTD_resetTarget_CDict), "");
4704e0c1b49fSNick Terrell /* (Maybe) load the dictionary
4705e0c1b49fSNick Terrell * Skips loading the dictionary if it is < 8 bytes.
4706e0c1b49fSNick Terrell */
4707e0c1b49fSNick Terrell { params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
4708e0c1b49fSNick Terrell params.fParams.contentSizeFlag = 1;
4709e0c1b49fSNick Terrell { size_t const dictID = ZSTD_compress_insertDictionary(
4710e0c1b49fSNick Terrell &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
4711e0c1b49fSNick Terrell ¶ms, cdict->dictContent, cdict->dictContentSize,
4712e0c1b49fSNick Terrell dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
4713e0c1b49fSNick Terrell FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
4714e0c1b49fSNick Terrell assert(dictID <= (size_t)(U32)-1);
4715e0c1b49fSNick Terrell cdict->dictID = (U32)dictID;
4716e0c1b49fSNick Terrell }
4717e0c1b49fSNick Terrell }
4718e0c1b49fSNick Terrell
4719e0c1b49fSNick Terrell return 0;
4720e0c1b49fSNick Terrell }
4721e0c1b49fSNick Terrell
ZSTD_createCDict_advanced_internal(size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_compressionParameters cParams,ZSTD_paramSwitch_e useRowMatchFinder,U32 enableDedicatedDictSearch,ZSTD_customMem customMem)4722e0c1b49fSNick Terrell static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
4723e0c1b49fSNick Terrell ZSTD_dictLoadMethod_e dictLoadMethod,
4724*2aa14b1aSNick Terrell ZSTD_compressionParameters cParams,
4725*2aa14b1aSNick Terrell ZSTD_paramSwitch_e useRowMatchFinder,
4726*2aa14b1aSNick Terrell U32 enableDedicatedDictSearch,
4727*2aa14b1aSNick Terrell ZSTD_customMem customMem)
4728e0c1b49fSNick Terrell {
4729e0c1b49fSNick Terrell if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
4730e0c1b49fSNick Terrell
4731e0c1b49fSNick Terrell { size_t const workspaceSize =
4732e0c1b49fSNick Terrell ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
4733e0c1b49fSNick Terrell ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
4734*2aa14b1aSNick Terrell ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) +
4735e0c1b49fSNick Terrell (dictLoadMethod == ZSTD_dlm_byRef ? 0
4736e0c1b49fSNick Terrell : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
4737e0c1b49fSNick Terrell void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);
4738e0c1b49fSNick Terrell ZSTD_cwksp ws;
4739e0c1b49fSNick Terrell ZSTD_CDict* cdict;
4740e0c1b49fSNick Terrell
4741e0c1b49fSNick Terrell if (!workspace) {
4742e0c1b49fSNick Terrell ZSTD_customFree(workspace, customMem);
4743e0c1b49fSNick Terrell return NULL;
4744e0c1b49fSNick Terrell }
4745e0c1b49fSNick Terrell
4746e0c1b49fSNick Terrell ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc);
4747e0c1b49fSNick Terrell
4748e0c1b49fSNick Terrell cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
4749e0c1b49fSNick Terrell assert(cdict != NULL);
4750e0c1b49fSNick Terrell ZSTD_cwksp_move(&cdict->workspace, &ws);
4751e0c1b49fSNick Terrell cdict->customMem = customMem;
4752e0c1b49fSNick Terrell cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */
4753*2aa14b1aSNick Terrell cdict->useRowMatchFinder = useRowMatchFinder;
4754e0c1b49fSNick Terrell return cdict;
4755e0c1b49fSNick Terrell }
4756e0c1b49fSNick Terrell }
4757e0c1b49fSNick Terrell
ZSTD_createCDict_advanced(const void * dictBuffer,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_compressionParameters cParams,ZSTD_customMem customMem)4758e0c1b49fSNick Terrell ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
4759e0c1b49fSNick Terrell ZSTD_dictLoadMethod_e dictLoadMethod,
4760e0c1b49fSNick Terrell ZSTD_dictContentType_e dictContentType,
4761e0c1b49fSNick Terrell ZSTD_compressionParameters cParams,
4762e0c1b49fSNick Terrell ZSTD_customMem customMem)
4763e0c1b49fSNick Terrell {
4764e0c1b49fSNick Terrell ZSTD_CCtx_params cctxParams;
4765e0c1b49fSNick Terrell ZSTD_memset(&cctxParams, 0, sizeof(cctxParams));
4766e0c1b49fSNick Terrell ZSTD_CCtxParams_init(&cctxParams, 0);
4767e0c1b49fSNick Terrell cctxParams.cParams = cParams;
4768e0c1b49fSNick Terrell cctxParams.customMem = customMem;
4769e0c1b49fSNick Terrell return ZSTD_createCDict_advanced2(
4770e0c1b49fSNick Terrell dictBuffer, dictSize,
4771e0c1b49fSNick Terrell dictLoadMethod, dictContentType,
4772e0c1b49fSNick Terrell &cctxParams, customMem);
4773e0c1b49fSNick Terrell }
4774e0c1b49fSNick Terrell
ZSTD_createCDict_advanced2(const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,const ZSTD_CCtx_params * originalCctxParams,ZSTD_customMem customMem)4775*2aa14b1aSNick Terrell ZSTD_CDict* ZSTD_createCDict_advanced2(
4776e0c1b49fSNick Terrell const void* dict, size_t dictSize,
4777e0c1b49fSNick Terrell ZSTD_dictLoadMethod_e dictLoadMethod,
4778e0c1b49fSNick Terrell ZSTD_dictContentType_e dictContentType,
4779e0c1b49fSNick Terrell const ZSTD_CCtx_params* originalCctxParams,
4780e0c1b49fSNick Terrell ZSTD_customMem customMem)
4781e0c1b49fSNick Terrell {
4782e0c1b49fSNick Terrell ZSTD_CCtx_params cctxParams = *originalCctxParams;
4783e0c1b49fSNick Terrell ZSTD_compressionParameters cParams;
4784e0c1b49fSNick Terrell ZSTD_CDict* cdict;
4785e0c1b49fSNick Terrell
4786e0c1b49fSNick Terrell DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType);
4787e0c1b49fSNick Terrell if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
4788e0c1b49fSNick Terrell
4789e0c1b49fSNick Terrell if (cctxParams.enableDedicatedDictSearch) {
4790e0c1b49fSNick Terrell cParams = ZSTD_dedicatedDictSearch_getCParams(
4791e0c1b49fSNick Terrell cctxParams.compressionLevel, dictSize);
4792e0c1b49fSNick Terrell ZSTD_overrideCParams(&cParams, &cctxParams.cParams);
4793e0c1b49fSNick Terrell } else {
4794e0c1b49fSNick Terrell cParams = ZSTD_getCParamsFromCCtxParams(
4795e0c1b49fSNick Terrell &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
4796e0c1b49fSNick Terrell }
4797e0c1b49fSNick Terrell
4798e0c1b49fSNick Terrell if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) {
4799e0c1b49fSNick Terrell /* Fall back to non-DDSS params */
4800e0c1b49fSNick Terrell cctxParams.enableDedicatedDictSearch = 0;
4801e0c1b49fSNick Terrell cParams = ZSTD_getCParamsFromCCtxParams(
4802e0c1b49fSNick Terrell &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
4803e0c1b49fSNick Terrell }
4804e0c1b49fSNick Terrell
4805*2aa14b1aSNick Terrell DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch);
4806e0c1b49fSNick Terrell cctxParams.cParams = cParams;
4807*2aa14b1aSNick Terrell cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
4808e0c1b49fSNick Terrell
4809e0c1b49fSNick Terrell cdict = ZSTD_createCDict_advanced_internal(dictSize,
4810e0c1b49fSNick Terrell dictLoadMethod, cctxParams.cParams,
4811*2aa14b1aSNick Terrell cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
4812e0c1b49fSNick Terrell customMem);
4813e0c1b49fSNick Terrell
4814e0c1b49fSNick Terrell if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
4815e0c1b49fSNick Terrell dict, dictSize,
4816e0c1b49fSNick Terrell dictLoadMethod, dictContentType,
4817e0c1b49fSNick Terrell cctxParams) )) {
4818e0c1b49fSNick Terrell ZSTD_freeCDict(cdict);
4819e0c1b49fSNick Terrell return NULL;
4820e0c1b49fSNick Terrell }
4821e0c1b49fSNick Terrell
4822e0c1b49fSNick Terrell return cdict;
4823e0c1b49fSNick Terrell }
4824e0c1b49fSNick Terrell
ZSTD_createCDict(const void * dict,size_t dictSize,int compressionLevel)4825e0c1b49fSNick Terrell ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
4826e0c1b49fSNick Terrell {
4827e0c1b49fSNick Terrell ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
4828e0c1b49fSNick Terrell ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
4829e0c1b49fSNick Terrell ZSTD_dlm_byCopy, ZSTD_dct_auto,
4830e0c1b49fSNick Terrell cParams, ZSTD_defaultCMem);
4831e0c1b49fSNick Terrell if (cdict)
4832e0c1b49fSNick Terrell cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
4833e0c1b49fSNick Terrell return cdict;
4834e0c1b49fSNick Terrell }
4835e0c1b49fSNick Terrell
ZSTD_createCDict_byReference(const void * dict,size_t dictSize,int compressionLevel)4836e0c1b49fSNick Terrell ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
4837e0c1b49fSNick Terrell {
4838e0c1b49fSNick Terrell ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
4839e0c1b49fSNick Terrell ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
4840e0c1b49fSNick Terrell ZSTD_dlm_byRef, ZSTD_dct_auto,
4841e0c1b49fSNick Terrell cParams, ZSTD_defaultCMem);
4842e0c1b49fSNick Terrell if (cdict)
4843e0c1b49fSNick Terrell cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
4844e0c1b49fSNick Terrell return cdict;
4845e0c1b49fSNick Terrell }
4846e0c1b49fSNick Terrell
ZSTD_freeCDict(ZSTD_CDict * cdict)4847e0c1b49fSNick Terrell size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
4848e0c1b49fSNick Terrell {
4849e0c1b49fSNick Terrell if (cdict==NULL) return 0; /* support free on NULL */
4850e0c1b49fSNick Terrell { ZSTD_customMem const cMem = cdict->customMem;
4851e0c1b49fSNick Terrell int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);
4852e0c1b49fSNick Terrell ZSTD_cwksp_free(&cdict->workspace, cMem);
4853e0c1b49fSNick Terrell if (!cdictInWorkspace) {
4854e0c1b49fSNick Terrell ZSTD_customFree(cdict, cMem);
4855e0c1b49fSNick Terrell }
4856e0c1b49fSNick Terrell return 0;
4857e0c1b49fSNick Terrell }
4858e0c1b49fSNick Terrell }
4859e0c1b49fSNick Terrell
4860e0c1b49fSNick Terrell /*! ZSTD_initStaticCDict_advanced() :
4861e0c1b49fSNick Terrell * Generate a digested dictionary in provided memory area.
4862e0c1b49fSNick Terrell * workspace: The memory area to emplace the dictionary into.
4863e0c1b49fSNick Terrell * Provided pointer must 8-bytes aligned.
4864e0c1b49fSNick Terrell * It must outlive dictionary usage.
4865e0c1b49fSNick Terrell * workspaceSize: Use ZSTD_estimateCDictSize()
4866e0c1b49fSNick Terrell * to determine how large workspace must be.
4867e0c1b49fSNick Terrell * cParams : use ZSTD_getCParams() to transform a compression level
4868e0c1b49fSNick Terrell * into its relevants cParams.
4869e0c1b49fSNick Terrell * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
4870e0c1b49fSNick Terrell * Note : there is no corresponding "free" function.
4871e0c1b49fSNick Terrell * Since workspace was allocated externally, it must be freed externally.
4872e0c1b49fSNick Terrell */
ZSTD_initStaticCDict(void * workspace,size_t workspaceSize,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_compressionParameters cParams)4873e0c1b49fSNick Terrell const ZSTD_CDict* ZSTD_initStaticCDict(
4874e0c1b49fSNick Terrell void* workspace, size_t workspaceSize,
4875e0c1b49fSNick Terrell const void* dict, size_t dictSize,
4876e0c1b49fSNick Terrell ZSTD_dictLoadMethod_e dictLoadMethod,
4877e0c1b49fSNick Terrell ZSTD_dictContentType_e dictContentType,
4878e0c1b49fSNick Terrell ZSTD_compressionParameters cParams)
4879e0c1b49fSNick Terrell {
4880*2aa14b1aSNick Terrell ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams);
4881*2aa14b1aSNick Terrell /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */
4882*2aa14b1aSNick Terrell size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0);
4883e0c1b49fSNick Terrell size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
4884e0c1b49fSNick Terrell + (dictLoadMethod == ZSTD_dlm_byRef ? 0
4885e0c1b49fSNick Terrell : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
4886e0c1b49fSNick Terrell + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
4887e0c1b49fSNick Terrell + matchStateSize;
4888e0c1b49fSNick Terrell ZSTD_CDict* cdict;
4889e0c1b49fSNick Terrell ZSTD_CCtx_params params;
4890e0c1b49fSNick Terrell
4891e0c1b49fSNick Terrell if ((size_t)workspace & 7) return NULL; /* 8-aligned */
4892e0c1b49fSNick Terrell
4893e0c1b49fSNick Terrell {
4894e0c1b49fSNick Terrell ZSTD_cwksp ws;
4895e0c1b49fSNick Terrell ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
4896e0c1b49fSNick Terrell cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
4897e0c1b49fSNick Terrell if (cdict == NULL) return NULL;
4898e0c1b49fSNick Terrell ZSTD_cwksp_move(&cdict->workspace, &ws);
4899e0c1b49fSNick Terrell }
4900e0c1b49fSNick Terrell
4901e0c1b49fSNick Terrell DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
4902e0c1b49fSNick Terrell (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
4903e0c1b49fSNick Terrell if (workspaceSize < neededSize) return NULL;
4904e0c1b49fSNick Terrell
4905e0c1b49fSNick Terrell ZSTD_CCtxParams_init(¶ms, 0);
4906e0c1b49fSNick Terrell params.cParams = cParams;
4907*2aa14b1aSNick Terrell params.useRowMatchFinder = useRowMatchFinder;
4908*2aa14b1aSNick Terrell cdict->useRowMatchFinder = useRowMatchFinder;
4909e0c1b49fSNick Terrell
4910e0c1b49fSNick Terrell if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
4911e0c1b49fSNick Terrell dict, dictSize,
4912e0c1b49fSNick Terrell dictLoadMethod, dictContentType,
4913e0c1b49fSNick Terrell params) ))
4914e0c1b49fSNick Terrell return NULL;
4915e0c1b49fSNick Terrell
4916e0c1b49fSNick Terrell return cdict;
4917e0c1b49fSNick Terrell }
4918e0c1b49fSNick Terrell
ZSTD_getCParamsFromCDict(const ZSTD_CDict * cdict)4919e0c1b49fSNick Terrell ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
4920e0c1b49fSNick Terrell {
4921e0c1b49fSNick Terrell assert(cdict != NULL);
4922e0c1b49fSNick Terrell return cdict->matchState.cParams;
4923e0c1b49fSNick Terrell }
4924e0c1b49fSNick Terrell
4925e0c1b49fSNick Terrell /*! ZSTD_getDictID_fromCDict() :
4926e0c1b49fSNick Terrell * Provides the dictID of the dictionary loaded into `cdict`.
4927e0c1b49fSNick Terrell * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
4928e0c1b49fSNick Terrell * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
ZSTD_getDictID_fromCDict(const ZSTD_CDict * cdict)4929e0c1b49fSNick Terrell unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict)
4930e0c1b49fSNick Terrell {
4931e0c1b49fSNick Terrell if (cdict==NULL) return 0;
4932e0c1b49fSNick Terrell return cdict->dictID;
4933e0c1b49fSNick Terrell }
4934e0c1b49fSNick Terrell
4935*2aa14b1aSNick Terrell /* ZSTD_compressBegin_usingCDict_internal() :
4936*2aa14b1aSNick Terrell * Implementation of various ZSTD_compressBegin_usingCDict* functions.
4937*2aa14b1aSNick Terrell */
ZSTD_compressBegin_usingCDict_internal(ZSTD_CCtx * const cctx,const ZSTD_CDict * const cdict,ZSTD_frameParameters const fParams,unsigned long long const pledgedSrcSize)4938*2aa14b1aSNick Terrell static size_t ZSTD_compressBegin_usingCDict_internal(
4939e0c1b49fSNick Terrell ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
4940e0c1b49fSNick Terrell ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
4941e0c1b49fSNick Terrell {
4942e0c1b49fSNick Terrell ZSTD_CCtx_params cctxParams;
4943*2aa14b1aSNick Terrell DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal");
4944e0c1b49fSNick Terrell RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
4945e0c1b49fSNick Terrell /* Initialize the cctxParams from the cdict */
4946e0c1b49fSNick Terrell {
4947e0c1b49fSNick Terrell ZSTD_parameters params;
4948e0c1b49fSNick Terrell params.fParams = fParams;
4949e0c1b49fSNick Terrell params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
4950e0c1b49fSNick Terrell || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
4951e0c1b49fSNick Terrell || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
4952e0c1b49fSNick Terrell || cdict->compressionLevel == 0 ) ?
4953e0c1b49fSNick Terrell ZSTD_getCParamsFromCDict(cdict)
4954e0c1b49fSNick Terrell : ZSTD_getCParams(cdict->compressionLevel,
4955e0c1b49fSNick Terrell pledgedSrcSize,
4956e0c1b49fSNick Terrell cdict->dictContentSize);
4957e0c1b49fSNick Terrell ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, cdict->compressionLevel);
4958e0c1b49fSNick Terrell }
4959e0c1b49fSNick Terrell /* Increase window log to fit the entire dictionary and source if the
4960e0c1b49fSNick Terrell * source size is known. Limit the increase to 19, which is the
4961e0c1b49fSNick Terrell * window log for compression level 1 with the largest source size.
4962e0c1b49fSNick Terrell */
4963e0c1b49fSNick Terrell if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
4964e0c1b49fSNick Terrell U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);
4965e0c1b49fSNick Terrell U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
4966e0c1b49fSNick Terrell cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog);
4967e0c1b49fSNick Terrell }
4968e0c1b49fSNick Terrell return ZSTD_compressBegin_internal(cctx,
4969e0c1b49fSNick Terrell NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
4970e0c1b49fSNick Terrell cdict,
4971e0c1b49fSNick Terrell &cctxParams, pledgedSrcSize,
4972e0c1b49fSNick Terrell ZSTDb_not_buffered);
4973e0c1b49fSNick Terrell }
4974e0c1b49fSNick Terrell
4975*2aa14b1aSNick Terrell
4976*2aa14b1aSNick Terrell /* ZSTD_compressBegin_usingCDict_advanced() :
4977*2aa14b1aSNick Terrell * This function is DEPRECATED.
4978*2aa14b1aSNick Terrell * cdict must be != NULL */
ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx * const cctx,const ZSTD_CDict * const cdict,ZSTD_frameParameters const fParams,unsigned long long const pledgedSrcSize)4979*2aa14b1aSNick Terrell size_t ZSTD_compressBegin_usingCDict_advanced(
4980*2aa14b1aSNick Terrell ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
4981*2aa14b1aSNick Terrell ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
4982*2aa14b1aSNick Terrell {
4983*2aa14b1aSNick Terrell return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize);
4984*2aa14b1aSNick Terrell }
4985*2aa14b1aSNick Terrell
4986e0c1b49fSNick Terrell /* ZSTD_compressBegin_usingCDict() :
4987*2aa14b1aSNick Terrell * cdict must be != NULL */
ZSTD_compressBegin_usingCDict(ZSTD_CCtx * cctx,const ZSTD_CDict * cdict)4988e0c1b49fSNick Terrell size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
4989e0c1b49fSNick Terrell {
4990e0c1b49fSNick Terrell ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
4991*2aa14b1aSNick Terrell return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
4992e0c1b49fSNick Terrell }
4993e0c1b49fSNick Terrell
4994*2aa14b1aSNick Terrell /*! ZSTD_compress_usingCDict_internal():
4995*2aa14b1aSNick Terrell * Implementation of various ZSTD_compress_usingCDict* functions.
4996*2aa14b1aSNick Terrell */
ZSTD_compress_usingCDict_internal(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const ZSTD_CDict * cdict,ZSTD_frameParameters fParams)4997*2aa14b1aSNick Terrell static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,
4998*2aa14b1aSNick Terrell void* dst, size_t dstCapacity,
4999*2aa14b1aSNick Terrell const void* src, size_t srcSize,
5000*2aa14b1aSNick Terrell const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
5001*2aa14b1aSNick Terrell {
5002*2aa14b1aSNick Terrell FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
5003*2aa14b1aSNick Terrell return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
5004*2aa14b1aSNick Terrell }
5005*2aa14b1aSNick Terrell
5006*2aa14b1aSNick Terrell /*! ZSTD_compress_usingCDict_advanced():
5007*2aa14b1aSNick Terrell * This function is DEPRECATED.
5008*2aa14b1aSNick Terrell */
ZSTD_compress_usingCDict_advanced(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const ZSTD_CDict * cdict,ZSTD_frameParameters fParams)5009e0c1b49fSNick Terrell size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
5010e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
5011e0c1b49fSNick Terrell const void* src, size_t srcSize,
5012e0c1b49fSNick Terrell const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
5013e0c1b49fSNick Terrell {
5014*2aa14b1aSNick Terrell return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
5015e0c1b49fSNick Terrell }
5016e0c1b49fSNick Terrell
5017e0c1b49fSNick Terrell /*! ZSTD_compress_usingCDict() :
5018e0c1b49fSNick Terrell * Compression using a digested Dictionary.
5019e0c1b49fSNick Terrell * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
5020e0c1b49fSNick Terrell * Note that compression parameters are decided at CDict creation time
5021e0c1b49fSNick Terrell * while frame parameters are hardcoded */
ZSTD_compress_usingCDict(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const ZSTD_CDict * cdict)5022e0c1b49fSNick Terrell size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
5023e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
5024e0c1b49fSNick Terrell const void* src, size_t srcSize,
5025e0c1b49fSNick Terrell const ZSTD_CDict* cdict)
5026e0c1b49fSNick Terrell {
5027e0c1b49fSNick Terrell ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
5028*2aa14b1aSNick Terrell return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
5029e0c1b49fSNick Terrell }
5030e0c1b49fSNick Terrell
5031e0c1b49fSNick Terrell
5032e0c1b49fSNick Terrell
5033e0c1b49fSNick Terrell /* ******************************************************************
5034e0c1b49fSNick Terrell * Streaming
5035e0c1b49fSNick Terrell ********************************************************************/
5036e0c1b49fSNick Terrell
ZSTD_createCStream(void)5037e0c1b49fSNick Terrell ZSTD_CStream* ZSTD_createCStream(void)
5038e0c1b49fSNick Terrell {
5039e0c1b49fSNick Terrell DEBUGLOG(3, "ZSTD_createCStream");
5040e0c1b49fSNick Terrell return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
5041e0c1b49fSNick Terrell }
5042e0c1b49fSNick Terrell
ZSTD_initStaticCStream(void * workspace,size_t workspaceSize)5043e0c1b49fSNick Terrell ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)
5044e0c1b49fSNick Terrell {
5045e0c1b49fSNick Terrell return ZSTD_initStaticCCtx(workspace, workspaceSize);
5046e0c1b49fSNick Terrell }
5047e0c1b49fSNick Terrell
ZSTD_createCStream_advanced(ZSTD_customMem customMem)5048e0c1b49fSNick Terrell ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
5049e0c1b49fSNick Terrell { /* CStream and CCtx are now same object */
5050e0c1b49fSNick Terrell return ZSTD_createCCtx_advanced(customMem);
5051e0c1b49fSNick Terrell }
5052e0c1b49fSNick Terrell
ZSTD_freeCStream(ZSTD_CStream * zcs)5053e0c1b49fSNick Terrell size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
5054e0c1b49fSNick Terrell {
5055e0c1b49fSNick Terrell return ZSTD_freeCCtx(zcs); /* same object */
5056e0c1b49fSNick Terrell }
5057e0c1b49fSNick Terrell
5058e0c1b49fSNick Terrell
5059e0c1b49fSNick Terrell
5060e0c1b49fSNick Terrell /*====== Initialization ======*/
5061e0c1b49fSNick Terrell
ZSTD_CStreamInSize(void)5062e0c1b49fSNick Terrell size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; }
5063e0c1b49fSNick Terrell
ZSTD_CStreamOutSize(void)5064e0c1b49fSNick Terrell size_t ZSTD_CStreamOutSize(void)
5065e0c1b49fSNick Terrell {
5066e0c1b49fSNick Terrell return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
5067e0c1b49fSNick Terrell }
5068e0c1b49fSNick Terrell
ZSTD_getCParamMode(ZSTD_CDict const * cdict,ZSTD_CCtx_params const * params,U64 pledgedSrcSize)5069e0c1b49fSNick Terrell static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize)
5070e0c1b49fSNick Terrell {
5071e0c1b49fSNick Terrell if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize))
5072e0c1b49fSNick Terrell return ZSTD_cpm_attachDict;
5073e0c1b49fSNick Terrell else
5074e0c1b49fSNick Terrell return ZSTD_cpm_noAttachDict;
5075e0c1b49fSNick Terrell }
5076e0c1b49fSNick Terrell
5077e0c1b49fSNick Terrell /* ZSTD_resetCStream():
5078e0c1b49fSNick Terrell * pledgedSrcSize == 0 means "unknown" */
ZSTD_resetCStream(ZSTD_CStream * zcs,unsigned long long pss)5079e0c1b49fSNick Terrell size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)
5080e0c1b49fSNick Terrell {
5081e0c1b49fSNick Terrell /* temporary : 0 interpreted as "unknown" during transition period.
5082e0c1b49fSNick Terrell * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
5083e0c1b49fSNick Terrell * 0 will be interpreted as "empty" in the future.
5084e0c1b49fSNick Terrell */
5085e0c1b49fSNick Terrell U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
5086e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize);
5087e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
5088e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
5089e0c1b49fSNick Terrell return 0;
5090e0c1b49fSNick Terrell }
5091e0c1b49fSNick Terrell
5092e0c1b49fSNick Terrell /*! ZSTD_initCStream_internal() :
5093e0c1b49fSNick Terrell * Note : for lib/compress only. Used by zstdmt_compress.c.
5094e0c1b49fSNick Terrell * Assumption 1 : params are valid
5095e0c1b49fSNick Terrell * Assumption 2 : either dict, or cdict, is defined, not both */
ZSTD_initCStream_internal(ZSTD_CStream * zcs,const void * dict,size_t dictSize,const ZSTD_CDict * cdict,const ZSTD_CCtx_params * params,unsigned long long pledgedSrcSize)5096e0c1b49fSNick Terrell size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
5097e0c1b49fSNick Terrell const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
5098e0c1b49fSNick Terrell const ZSTD_CCtx_params* params,
5099e0c1b49fSNick Terrell unsigned long long pledgedSrcSize)
5100e0c1b49fSNick Terrell {
5101e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_initCStream_internal");
5102e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
5103e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
5104e0c1b49fSNick Terrell assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
5105e0c1b49fSNick Terrell zcs->requestedParams = *params;
5106e0c1b49fSNick Terrell assert(!((dict) && (cdict))); /* either dict or cdict, not both */
5107e0c1b49fSNick Terrell if (dict) {
5108e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
5109e0c1b49fSNick Terrell } else {
5110e0c1b49fSNick Terrell /* Dictionary is cleared if !cdict */
5111e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
5112e0c1b49fSNick Terrell }
5113e0c1b49fSNick Terrell return 0;
5114e0c1b49fSNick Terrell }
5115e0c1b49fSNick Terrell
5116e0c1b49fSNick Terrell /* ZSTD_initCStream_usingCDict_advanced() :
5117e0c1b49fSNick Terrell * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream * zcs,const ZSTD_CDict * cdict,ZSTD_frameParameters fParams,unsigned long long pledgedSrcSize)5118e0c1b49fSNick Terrell size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
5119e0c1b49fSNick Terrell const ZSTD_CDict* cdict,
5120e0c1b49fSNick Terrell ZSTD_frameParameters fParams,
5121e0c1b49fSNick Terrell unsigned long long pledgedSrcSize)
5122e0c1b49fSNick Terrell {
5123e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");
5124e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
5125e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
5126e0c1b49fSNick Terrell zcs->requestedParams.fParams = fParams;
5127e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
5128e0c1b49fSNick Terrell return 0;
5129e0c1b49fSNick Terrell }
5130e0c1b49fSNick Terrell
5131e0c1b49fSNick Terrell /* note : cdict must outlive compression session */
ZSTD_initCStream_usingCDict(ZSTD_CStream * zcs,const ZSTD_CDict * cdict)5132e0c1b49fSNick Terrell size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
5133e0c1b49fSNick Terrell {
5134e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_initCStream_usingCDict");
5135e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
5136e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
5137e0c1b49fSNick Terrell return 0;
5138e0c1b49fSNick Terrell }
5139e0c1b49fSNick Terrell
5140e0c1b49fSNick Terrell
5141e0c1b49fSNick Terrell /* ZSTD_initCStream_advanced() :
5142e0c1b49fSNick Terrell * pledgedSrcSize must be exact.
5143e0c1b49fSNick Terrell * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
5144e0c1b49fSNick Terrell * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */
ZSTD_initCStream_advanced(ZSTD_CStream * zcs,const void * dict,size_t dictSize,ZSTD_parameters params,unsigned long long pss)5145e0c1b49fSNick Terrell size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
5146e0c1b49fSNick Terrell const void* dict, size_t dictSize,
5147e0c1b49fSNick Terrell ZSTD_parameters params, unsigned long long pss)
5148e0c1b49fSNick Terrell {
5149e0c1b49fSNick Terrell /* for compatibility with older programs relying on this behavior.
5150e0c1b49fSNick Terrell * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN.
5151e0c1b49fSNick Terrell * This line will be removed in the future.
5152e0c1b49fSNick Terrell */
5153e0c1b49fSNick Terrell U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
5154e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_initCStream_advanced");
5155e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
5156e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
5157e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
5158e0c1b49fSNick Terrell ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, ¶ms);
5159e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
5160e0c1b49fSNick Terrell return 0;
5161e0c1b49fSNick Terrell }
5162e0c1b49fSNick Terrell
ZSTD_initCStream_usingDict(ZSTD_CStream * zcs,const void * dict,size_t dictSize,int compressionLevel)5163e0c1b49fSNick Terrell size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
5164e0c1b49fSNick Terrell {
5165e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_initCStream_usingDict");
5166e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
5167e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
5168e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
5169e0c1b49fSNick Terrell return 0;
5170e0c1b49fSNick Terrell }
5171e0c1b49fSNick Terrell
ZSTD_initCStream_srcSize(ZSTD_CStream * zcs,int compressionLevel,unsigned long long pss)5172e0c1b49fSNick Terrell size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)
5173e0c1b49fSNick Terrell {
5174e0c1b49fSNick Terrell /* temporary : 0 interpreted as "unknown" during transition period.
5175e0c1b49fSNick Terrell * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
5176e0c1b49fSNick Terrell * 0 will be interpreted as "empty" in the future.
5177e0c1b49fSNick Terrell */
5178e0c1b49fSNick Terrell U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
5179e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_initCStream_srcSize");
5180e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
5181e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
5182e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
5183e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
5184e0c1b49fSNick Terrell return 0;
5185e0c1b49fSNick Terrell }
5186e0c1b49fSNick Terrell
ZSTD_initCStream(ZSTD_CStream * zcs,int compressionLevel)5187e0c1b49fSNick Terrell size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
5188e0c1b49fSNick Terrell {
5189e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_initCStream");
5190e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
5191e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
5192e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
5193e0c1b49fSNick Terrell return 0;
5194e0c1b49fSNick Terrell }
5195e0c1b49fSNick Terrell
5196e0c1b49fSNick Terrell /*====== Compression ======*/
5197e0c1b49fSNick Terrell
ZSTD_nextInputSizeHint(const ZSTD_CCtx * cctx)5198e0c1b49fSNick Terrell static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
5199e0c1b49fSNick Terrell {
5200e0c1b49fSNick Terrell size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
5201e0c1b49fSNick Terrell if (hintInSize==0) hintInSize = cctx->blockSize;
5202e0c1b49fSNick Terrell return hintInSize;
5203e0c1b49fSNick Terrell }
5204e0c1b49fSNick Terrell
5205e0c1b49fSNick Terrell /* ZSTD_compressStream_generic():
5206e0c1b49fSNick Terrell * internal function for all *compressStream*() variants
5207e0c1b49fSNick Terrell * non-static, because can be called from zstdmt_compress.c
5208e0c1b49fSNick Terrell * @return : hint size for next input */
ZSTD_compressStream_generic(ZSTD_CStream * zcs,ZSTD_outBuffer * output,ZSTD_inBuffer * input,ZSTD_EndDirective const flushMode)5209e0c1b49fSNick Terrell static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
5210e0c1b49fSNick Terrell ZSTD_outBuffer* output,
5211e0c1b49fSNick Terrell ZSTD_inBuffer* input,
5212e0c1b49fSNick Terrell ZSTD_EndDirective const flushMode)
5213e0c1b49fSNick Terrell {
5214e0c1b49fSNick Terrell const char* const istart = (const char*)input->src;
5215e0c1b49fSNick Terrell const char* const iend = input->size != 0 ? istart + input->size : istart;
5216e0c1b49fSNick Terrell const char* ip = input->pos != 0 ? istart + input->pos : istart;
5217e0c1b49fSNick Terrell char* const ostart = (char*)output->dst;
5218e0c1b49fSNick Terrell char* const oend = output->size != 0 ? ostart + output->size : ostart;
5219e0c1b49fSNick Terrell char* op = output->pos != 0 ? ostart + output->pos : ostart;
5220e0c1b49fSNick Terrell U32 someMoreWork = 1;
5221e0c1b49fSNick Terrell
5222e0c1b49fSNick Terrell /* check expectations */
5223e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
5224e0c1b49fSNick Terrell if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
5225e0c1b49fSNick Terrell assert(zcs->inBuff != NULL);
5226e0c1b49fSNick Terrell assert(zcs->inBuffSize > 0);
5227e0c1b49fSNick Terrell }
5228e0c1b49fSNick Terrell if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) {
5229e0c1b49fSNick Terrell assert(zcs->outBuff != NULL);
5230e0c1b49fSNick Terrell assert(zcs->outBuffSize > 0);
5231e0c1b49fSNick Terrell }
5232e0c1b49fSNick Terrell assert(output->pos <= output->size);
5233e0c1b49fSNick Terrell assert(input->pos <= input->size);
5234e0c1b49fSNick Terrell assert((U32)flushMode <= (U32)ZSTD_e_end);
5235e0c1b49fSNick Terrell
5236e0c1b49fSNick Terrell while (someMoreWork) {
5237e0c1b49fSNick Terrell switch(zcs->streamStage)
5238e0c1b49fSNick Terrell {
5239e0c1b49fSNick Terrell case zcss_init:
5240e0c1b49fSNick Terrell RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!");
5241e0c1b49fSNick Terrell
5242e0c1b49fSNick Terrell case zcss_load:
5243e0c1b49fSNick Terrell if ( (flushMode == ZSTD_e_end)
5244e0c1b49fSNick Terrell && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */
5245e0c1b49fSNick Terrell || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */
5246e0c1b49fSNick Terrell && (zcs->inBuffPos == 0) ) {
5247e0c1b49fSNick Terrell /* shortcut to compression pass directly into output buffer */
5248e0c1b49fSNick Terrell size_t const cSize = ZSTD_compressEnd(zcs,
5249e0c1b49fSNick Terrell op, oend-op, ip, iend-ip);
5250e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
5251e0c1b49fSNick Terrell FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
5252e0c1b49fSNick Terrell ip = iend;
5253e0c1b49fSNick Terrell op += cSize;
5254e0c1b49fSNick Terrell zcs->frameEnded = 1;
5255e0c1b49fSNick Terrell ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
5256e0c1b49fSNick Terrell someMoreWork = 0; break;
5257e0c1b49fSNick Terrell }
5258e0c1b49fSNick Terrell /* complete loading into inBuffer in buffered mode */
5259e0c1b49fSNick Terrell if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
5260e0c1b49fSNick Terrell size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
5261e0c1b49fSNick Terrell size_t const loaded = ZSTD_limitCopy(
5262e0c1b49fSNick Terrell zcs->inBuff + zcs->inBuffPos, toLoad,
5263e0c1b49fSNick Terrell ip, iend-ip);
5264e0c1b49fSNick Terrell zcs->inBuffPos += loaded;
5265e0c1b49fSNick Terrell if (loaded != 0)
5266e0c1b49fSNick Terrell ip += loaded;
5267e0c1b49fSNick Terrell if ( (flushMode == ZSTD_e_continue)
5268e0c1b49fSNick Terrell && (zcs->inBuffPos < zcs->inBuffTarget) ) {
5269e0c1b49fSNick Terrell /* not enough input to fill full block : stop here */
5270e0c1b49fSNick Terrell someMoreWork = 0; break;
5271e0c1b49fSNick Terrell }
5272e0c1b49fSNick Terrell if ( (flushMode == ZSTD_e_flush)
5273e0c1b49fSNick Terrell && (zcs->inBuffPos == zcs->inToCompress) ) {
5274e0c1b49fSNick Terrell /* empty */
5275e0c1b49fSNick Terrell someMoreWork = 0; break;
5276e0c1b49fSNick Terrell }
5277e0c1b49fSNick Terrell }
5278e0c1b49fSNick Terrell /* compress current block (note : this stage cannot be stopped in the middle) */
5279e0c1b49fSNick Terrell DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
5280e0c1b49fSNick Terrell { int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered);
5281e0c1b49fSNick Terrell void* cDst;
5282e0c1b49fSNick Terrell size_t cSize;
5283e0c1b49fSNick Terrell size_t oSize = oend-op;
5284e0c1b49fSNick Terrell size_t const iSize = inputBuffered
5285e0c1b49fSNick Terrell ? zcs->inBuffPos - zcs->inToCompress
5286e0c1b49fSNick Terrell : MIN((size_t)(iend - ip), zcs->blockSize);
5287e0c1b49fSNick Terrell if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
5288e0c1b49fSNick Terrell cDst = op; /* compress into output buffer, to skip flush stage */
5289e0c1b49fSNick Terrell else
5290e0c1b49fSNick Terrell cDst = zcs->outBuff, oSize = zcs->outBuffSize;
5291e0c1b49fSNick Terrell if (inputBuffered) {
5292e0c1b49fSNick Terrell unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
5293e0c1b49fSNick Terrell cSize = lastBlock ?
5294e0c1b49fSNick Terrell ZSTD_compressEnd(zcs, cDst, oSize,
5295e0c1b49fSNick Terrell zcs->inBuff + zcs->inToCompress, iSize) :
5296e0c1b49fSNick Terrell ZSTD_compressContinue(zcs, cDst, oSize,
5297e0c1b49fSNick Terrell zcs->inBuff + zcs->inToCompress, iSize);
5298e0c1b49fSNick Terrell FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
5299e0c1b49fSNick Terrell zcs->frameEnded = lastBlock;
5300e0c1b49fSNick Terrell /* prepare next block */
5301e0c1b49fSNick Terrell zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
5302e0c1b49fSNick Terrell if (zcs->inBuffTarget > zcs->inBuffSize)
5303e0c1b49fSNick Terrell zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
5304e0c1b49fSNick Terrell DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
5305e0c1b49fSNick Terrell (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
5306e0c1b49fSNick Terrell if (!lastBlock)
5307e0c1b49fSNick Terrell assert(zcs->inBuffTarget <= zcs->inBuffSize);
5308e0c1b49fSNick Terrell zcs->inToCompress = zcs->inBuffPos;
5309e0c1b49fSNick Terrell } else {
5310e0c1b49fSNick Terrell unsigned const lastBlock = (ip + iSize == iend);
5311e0c1b49fSNick Terrell assert(flushMode == ZSTD_e_end /* Already validated */);
5312e0c1b49fSNick Terrell cSize = lastBlock ?
5313e0c1b49fSNick Terrell ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) :
5314e0c1b49fSNick Terrell ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize);
5315e0c1b49fSNick Terrell /* Consume the input prior to error checking to mirror buffered mode. */
5316e0c1b49fSNick Terrell if (iSize > 0)
5317e0c1b49fSNick Terrell ip += iSize;
5318e0c1b49fSNick Terrell FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
5319e0c1b49fSNick Terrell zcs->frameEnded = lastBlock;
5320e0c1b49fSNick Terrell if (lastBlock)
5321e0c1b49fSNick Terrell assert(ip == iend);
5322e0c1b49fSNick Terrell }
5323e0c1b49fSNick Terrell if (cDst == op) { /* no need to flush */
5324e0c1b49fSNick Terrell op += cSize;
5325e0c1b49fSNick Terrell if (zcs->frameEnded) {
5326e0c1b49fSNick Terrell DEBUGLOG(5, "Frame completed directly in outBuffer");
5327e0c1b49fSNick Terrell someMoreWork = 0;
5328e0c1b49fSNick Terrell ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
5329e0c1b49fSNick Terrell }
5330e0c1b49fSNick Terrell break;
5331e0c1b49fSNick Terrell }
5332e0c1b49fSNick Terrell zcs->outBuffContentSize = cSize;
5333e0c1b49fSNick Terrell zcs->outBuffFlushedSize = 0;
5334e0c1b49fSNick Terrell zcs->streamStage = zcss_flush; /* pass-through to flush stage */
5335e0c1b49fSNick Terrell }
5336e0c1b49fSNick Terrell ZSTD_FALLTHROUGH;
5337e0c1b49fSNick Terrell case zcss_flush:
5338e0c1b49fSNick Terrell DEBUGLOG(5, "flush stage");
5339e0c1b49fSNick Terrell assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered);
5340e0c1b49fSNick Terrell { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
5341e0c1b49fSNick Terrell size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
5342e0c1b49fSNick Terrell zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
5343e0c1b49fSNick Terrell DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
5344e0c1b49fSNick Terrell (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
5345e0c1b49fSNick Terrell if (flushed)
5346e0c1b49fSNick Terrell op += flushed;
5347e0c1b49fSNick Terrell zcs->outBuffFlushedSize += flushed;
5348e0c1b49fSNick Terrell if (toFlush!=flushed) {
5349e0c1b49fSNick Terrell /* flush not fully completed, presumably because dst is too small */
5350e0c1b49fSNick Terrell assert(op==oend);
5351e0c1b49fSNick Terrell someMoreWork = 0;
5352e0c1b49fSNick Terrell break;
5353e0c1b49fSNick Terrell }
5354e0c1b49fSNick Terrell zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
5355e0c1b49fSNick Terrell if (zcs->frameEnded) {
5356e0c1b49fSNick Terrell DEBUGLOG(5, "Frame completed on flush");
5357e0c1b49fSNick Terrell someMoreWork = 0;
5358e0c1b49fSNick Terrell ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
5359e0c1b49fSNick Terrell break;
5360e0c1b49fSNick Terrell }
5361e0c1b49fSNick Terrell zcs->streamStage = zcss_load;
5362e0c1b49fSNick Terrell break;
5363e0c1b49fSNick Terrell }
5364e0c1b49fSNick Terrell
5365e0c1b49fSNick Terrell default: /* impossible */
5366e0c1b49fSNick Terrell assert(0);
5367e0c1b49fSNick Terrell }
5368e0c1b49fSNick Terrell }
5369e0c1b49fSNick Terrell
5370e0c1b49fSNick Terrell input->pos = ip - istart;
5371e0c1b49fSNick Terrell output->pos = op - ostart;
5372e0c1b49fSNick Terrell if (zcs->frameEnded) return 0;
5373e0c1b49fSNick Terrell return ZSTD_nextInputSizeHint(zcs);
5374e0c1b49fSNick Terrell }
5375e0c1b49fSNick Terrell
ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx * cctx)5376e0c1b49fSNick Terrell static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx)
5377e0c1b49fSNick Terrell {
5378e0c1b49fSNick Terrell return ZSTD_nextInputSizeHint(cctx);
5379e0c1b49fSNick Terrell
5380e0c1b49fSNick Terrell }
5381e0c1b49fSNick Terrell
ZSTD_compressStream(ZSTD_CStream * zcs,ZSTD_outBuffer * output,ZSTD_inBuffer * input)5382e0c1b49fSNick Terrell size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
5383e0c1b49fSNick Terrell {
5384e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , "");
5385e0c1b49fSNick Terrell return ZSTD_nextInputSizeHint_MTorST(zcs);
5386e0c1b49fSNick Terrell }
5387e0c1b49fSNick Terrell
5388e0c1b49fSNick Terrell /* After a compression call set the expected input/output buffer.
5389e0c1b49fSNick Terrell * This is validated at the start of the next compression call.
5390e0c1b49fSNick Terrell */
ZSTD_setBufferExpectations(ZSTD_CCtx * cctx,ZSTD_outBuffer const * output,ZSTD_inBuffer const * input)5391e0c1b49fSNick Terrell static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input)
5392e0c1b49fSNick Terrell {
5393e0c1b49fSNick Terrell if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
5394e0c1b49fSNick Terrell cctx->expectedInBuffer = *input;
5395e0c1b49fSNick Terrell }
5396e0c1b49fSNick Terrell if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
5397e0c1b49fSNick Terrell cctx->expectedOutBufferSize = output->size - output->pos;
5398e0c1b49fSNick Terrell }
5399e0c1b49fSNick Terrell }
5400e0c1b49fSNick Terrell
5401e0c1b49fSNick Terrell /* Validate that the input/output buffers match the expectations set by
5402e0c1b49fSNick Terrell * ZSTD_setBufferExpectations.
5403e0c1b49fSNick Terrell */
ZSTD_checkBufferStability(ZSTD_CCtx const * cctx,ZSTD_outBuffer const * output,ZSTD_inBuffer const * input,ZSTD_EndDirective endOp)5404e0c1b49fSNick Terrell static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,
5405e0c1b49fSNick Terrell ZSTD_outBuffer const* output,
5406e0c1b49fSNick Terrell ZSTD_inBuffer const* input,
5407e0c1b49fSNick Terrell ZSTD_EndDirective endOp)
5408e0c1b49fSNick Terrell {
5409e0c1b49fSNick Terrell if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
5410e0c1b49fSNick Terrell ZSTD_inBuffer const expect = cctx->expectedInBuffer;
5411e0c1b49fSNick Terrell if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size)
5412e0c1b49fSNick Terrell RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!");
5413e0c1b49fSNick Terrell if (endOp != ZSTD_e_end)
5414e0c1b49fSNick Terrell RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
5415e0c1b49fSNick Terrell }
5416e0c1b49fSNick Terrell if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
5417e0c1b49fSNick Terrell size_t const outBufferSize = output->size - output->pos;
5418e0c1b49fSNick Terrell if (cctx->expectedOutBufferSize != outBufferSize)
5419e0c1b49fSNick Terrell RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!");
5420e0c1b49fSNick Terrell }
5421e0c1b49fSNick Terrell return 0;
5422e0c1b49fSNick Terrell }
5423e0c1b49fSNick Terrell
ZSTD_CCtx_init_compressStream2(ZSTD_CCtx * cctx,ZSTD_EndDirective endOp,size_t inSize)5424e0c1b49fSNick Terrell static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
5425e0c1b49fSNick Terrell ZSTD_EndDirective endOp,
5426e0c1b49fSNick Terrell size_t inSize) {
5427e0c1b49fSNick Terrell ZSTD_CCtx_params params = cctx->requestedParams;
5428e0c1b49fSNick Terrell ZSTD_prefixDict const prefixDict = cctx->prefixDict;
5429e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
5430e0c1b49fSNick Terrell ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
5431e0c1b49fSNick Terrell assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
5432*2aa14b1aSNick Terrell if (cctx->cdict && !cctx->localDict.cdict) {
5433*2aa14b1aSNick Terrell /* Let the cdict's compression level take priority over the requested params.
5434*2aa14b1aSNick Terrell * But do not take the cdict's compression level if the "cdict" is actually a localDict
5435*2aa14b1aSNick Terrell * generated from ZSTD_initLocalDict().
5436*2aa14b1aSNick Terrell */
5437*2aa14b1aSNick Terrell params.compressionLevel = cctx->cdict->compressionLevel;
5438*2aa14b1aSNick Terrell }
5439e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
5440e0c1b49fSNick Terrell if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */
5441e0c1b49fSNick Terrell {
5442e0c1b49fSNick Terrell size_t const dictSize = prefixDict.dict
5443e0c1b49fSNick Terrell ? prefixDict.dictSize
5444e0c1b49fSNick Terrell : (cctx->cdict ? cctx->cdict->dictContentSize : 0);
5445e0c1b49fSNick Terrell ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1);
5446e0c1b49fSNick Terrell params.cParams = ZSTD_getCParamsFromCCtxParams(
5447e0c1b49fSNick Terrell ¶ms, cctx->pledgedSrcSizePlusOne-1,
5448e0c1b49fSNick Terrell dictSize, mode);
5449e0c1b49fSNick Terrell }
5450e0c1b49fSNick Terrell
5451*2aa14b1aSNick Terrell params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams);
5452*2aa14b1aSNick Terrell params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams);
5453*2aa14b1aSNick Terrell params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams);
5454e0c1b49fSNick Terrell
5455e0c1b49fSNick Terrell { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
5456e0c1b49fSNick Terrell assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
5457e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
5458e0c1b49fSNick Terrell prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast,
5459e0c1b49fSNick Terrell cctx->cdict,
5460e0c1b49fSNick Terrell ¶ms, pledgedSrcSize,
5461e0c1b49fSNick Terrell ZSTDb_buffered) , "");
5462e0c1b49fSNick Terrell assert(cctx->appliedParams.nbWorkers == 0);
5463e0c1b49fSNick Terrell cctx->inToCompress = 0;
5464e0c1b49fSNick Terrell cctx->inBuffPos = 0;
5465e0c1b49fSNick Terrell if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) {
5466e0c1b49fSNick Terrell /* for small input: avoid automatic flush on reaching end of block, since
5467e0c1b49fSNick Terrell * it would require to add a 3-bytes null block to end frame
5468e0c1b49fSNick Terrell */
5469e0c1b49fSNick Terrell cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);
5470e0c1b49fSNick Terrell } else {
5471e0c1b49fSNick Terrell cctx->inBuffTarget = 0;
5472e0c1b49fSNick Terrell }
5473e0c1b49fSNick Terrell cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
5474e0c1b49fSNick Terrell cctx->streamStage = zcss_load;
5475e0c1b49fSNick Terrell cctx->frameEnded = 0;
5476e0c1b49fSNick Terrell }
5477e0c1b49fSNick Terrell return 0;
5478e0c1b49fSNick Terrell }
5479e0c1b49fSNick Terrell
ZSTD_compressStream2(ZSTD_CCtx * cctx,ZSTD_outBuffer * output,ZSTD_inBuffer * input,ZSTD_EndDirective endOp)5480e0c1b49fSNick Terrell size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
5481e0c1b49fSNick Terrell ZSTD_outBuffer* output,
5482e0c1b49fSNick Terrell ZSTD_inBuffer* input,
5483e0c1b49fSNick Terrell ZSTD_EndDirective endOp)
5484e0c1b49fSNick Terrell {
5485e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);
5486e0c1b49fSNick Terrell /* check conditions */
5487e0c1b49fSNick Terrell RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer");
5488e0c1b49fSNick Terrell RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "invalid input buffer");
5489e0c1b49fSNick Terrell RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective");
5490e0c1b49fSNick Terrell assert(cctx != NULL);
5491e0c1b49fSNick Terrell
5492e0c1b49fSNick Terrell /* transparent initialization stage */
5493e0c1b49fSNick Terrell if (cctx->streamStage == zcss_init) {
5494e0c1b49fSNick Terrell FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed");
5495e0c1b49fSNick Terrell ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */
5496e0c1b49fSNick Terrell }
5497e0c1b49fSNick Terrell /* end of transparent initialization stage */
5498e0c1b49fSNick Terrell
5499e0c1b49fSNick Terrell FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers");
5500e0c1b49fSNick Terrell /* compression stage */
5501e0c1b49fSNick Terrell FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");
5502e0c1b49fSNick Terrell DEBUGLOG(5, "completed ZSTD_compressStream2");
5503e0c1b49fSNick Terrell ZSTD_setBufferExpectations(cctx, output, input);
5504e0c1b49fSNick Terrell return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
5505e0c1b49fSNick Terrell }
5506e0c1b49fSNick Terrell
ZSTD_compressStream2_simpleArgs(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,size_t * dstPos,const void * src,size_t srcSize,size_t * srcPos,ZSTD_EndDirective endOp)5507e0c1b49fSNick Terrell size_t ZSTD_compressStream2_simpleArgs (
5508e0c1b49fSNick Terrell ZSTD_CCtx* cctx,
5509e0c1b49fSNick Terrell void* dst, size_t dstCapacity, size_t* dstPos,
5510e0c1b49fSNick Terrell const void* src, size_t srcSize, size_t* srcPos,
5511e0c1b49fSNick Terrell ZSTD_EndDirective endOp)
5512e0c1b49fSNick Terrell {
5513e0c1b49fSNick Terrell ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
5514e0c1b49fSNick Terrell ZSTD_inBuffer input = { src, srcSize, *srcPos };
5515e0c1b49fSNick Terrell /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
5516e0c1b49fSNick Terrell size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
5517e0c1b49fSNick Terrell *dstPos = output.pos;
5518e0c1b49fSNick Terrell *srcPos = input.pos;
5519e0c1b49fSNick Terrell return cErr;
5520e0c1b49fSNick Terrell }
5521e0c1b49fSNick Terrell
ZSTD_compress2(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize)5522e0c1b49fSNick Terrell size_t ZSTD_compress2(ZSTD_CCtx* cctx,
5523e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
5524e0c1b49fSNick Terrell const void* src, size_t srcSize)
5525e0c1b49fSNick Terrell {
5526e0c1b49fSNick Terrell ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode;
5527e0c1b49fSNick Terrell ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode;
5528e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize);
5529e0c1b49fSNick Terrell ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
5530e0c1b49fSNick Terrell /* Enable stable input/output buffers. */
5531e0c1b49fSNick Terrell cctx->requestedParams.inBufferMode = ZSTD_bm_stable;
5532e0c1b49fSNick Terrell cctx->requestedParams.outBufferMode = ZSTD_bm_stable;
5533e0c1b49fSNick Terrell { size_t oPos = 0;
5534e0c1b49fSNick Terrell size_t iPos = 0;
5535e0c1b49fSNick Terrell size_t const result = ZSTD_compressStream2_simpleArgs(cctx,
5536e0c1b49fSNick Terrell dst, dstCapacity, &oPos,
5537e0c1b49fSNick Terrell src, srcSize, &iPos,
5538e0c1b49fSNick Terrell ZSTD_e_end);
5539e0c1b49fSNick Terrell /* Reset to the original values. */
5540e0c1b49fSNick Terrell cctx->requestedParams.inBufferMode = originalInBufferMode;
5541e0c1b49fSNick Terrell cctx->requestedParams.outBufferMode = originalOutBufferMode;
5542e0c1b49fSNick Terrell FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
5543e0c1b49fSNick Terrell if (result != 0) { /* compression not completed, due to lack of output space */
5544e0c1b49fSNick Terrell assert(oPos == dstCapacity);
5545e0c1b49fSNick Terrell RETURN_ERROR(dstSize_tooSmall, "");
5546e0c1b49fSNick Terrell }
5547e0c1b49fSNick Terrell assert(iPos == srcSize); /* all input is expected consumed */
5548e0c1b49fSNick Terrell return oPos;
5549e0c1b49fSNick Terrell }
5550e0c1b49fSNick Terrell }
5551e0c1b49fSNick Terrell
5552e0c1b49fSNick Terrell typedef struct {
5553e0c1b49fSNick Terrell U32 idx; /* Index in array of ZSTD_Sequence */
5554e0c1b49fSNick Terrell U32 posInSequence; /* Position within sequence at idx */
5555e0c1b49fSNick Terrell size_t posInSrc; /* Number of bytes given by sequences provided so far */
5556e0c1b49fSNick Terrell } ZSTD_sequencePosition;
5557e0c1b49fSNick Terrell
5558*2aa14b1aSNick Terrell /* ZSTD_validateSequence() :
5559*2aa14b1aSNick Terrell * @offCode : is presumed to follow format required by ZSTD_storeSeq()
5560*2aa14b1aSNick Terrell * @returns a ZSTD error code if sequence is not valid
5561*2aa14b1aSNick Terrell */
5562*2aa14b1aSNick Terrell static size_t
ZSTD_validateSequence(U32 offCode,U32 matchLength,size_t posInSrc,U32 windowLog,size_t dictSize)5563*2aa14b1aSNick Terrell ZSTD_validateSequence(U32 offCode, U32 matchLength,
5564*2aa14b1aSNick Terrell size_t posInSrc, U32 windowLog, size_t dictSize)
5565*2aa14b1aSNick Terrell {
5566*2aa14b1aSNick Terrell U32 const windowSize = 1 << windowLog;
55677486f5c6SJilin Yuan /* posInSrc represents the amount of data the decoder would decode up to this point.
5568e0c1b49fSNick Terrell * As long as the amount of data decoded is less than or equal to window size, offsets may be
5569e0c1b49fSNick Terrell * larger than the total length of output decoded in order to reference the dict, even larger than
5570e0c1b49fSNick Terrell * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
5571e0c1b49fSNick Terrell */
5572*2aa14b1aSNick Terrell size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
5573*2aa14b1aSNick Terrell RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!");
5574*2aa14b1aSNick Terrell RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small");
5575e0c1b49fSNick Terrell return 0;
5576e0c1b49fSNick Terrell }
5577e0c1b49fSNick Terrell
5578e0c1b49fSNick Terrell /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
ZSTD_finalizeOffCode(U32 rawOffset,const U32 rep[ZSTD_REP_NUM],U32 ll0)5579*2aa14b1aSNick Terrell static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0)
5580*2aa14b1aSNick Terrell {
5581*2aa14b1aSNick Terrell U32 offCode = STORE_OFFSET(rawOffset);
5582e0c1b49fSNick Terrell
5583e0c1b49fSNick Terrell if (!ll0 && rawOffset == rep[0]) {
5584*2aa14b1aSNick Terrell offCode = STORE_REPCODE_1;
5585e0c1b49fSNick Terrell } else if (rawOffset == rep[1]) {
5586*2aa14b1aSNick Terrell offCode = STORE_REPCODE(2 - ll0);
5587e0c1b49fSNick Terrell } else if (rawOffset == rep[2]) {
5588*2aa14b1aSNick Terrell offCode = STORE_REPCODE(3 - ll0);
5589e0c1b49fSNick Terrell } else if (ll0 && rawOffset == rep[0] - 1) {
5590*2aa14b1aSNick Terrell offCode = STORE_REPCODE_3;
5591e0c1b49fSNick Terrell }
5592e0c1b49fSNick Terrell return offCode;
5593e0c1b49fSNick Terrell }
5594e0c1b49fSNick Terrell
5595e0c1b49fSNick Terrell /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
5596e0c1b49fSNick Terrell * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
5597e0c1b49fSNick Terrell */
5598*2aa14b1aSNick Terrell static size_t
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx * cctx,ZSTD_sequencePosition * seqPos,const ZSTD_Sequence * const inSeqs,size_t inSeqsSize,const void * src,size_t blockSize)5599*2aa14b1aSNick Terrell ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
5600*2aa14b1aSNick Terrell ZSTD_sequencePosition* seqPos,
5601e0c1b49fSNick Terrell const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
5602*2aa14b1aSNick Terrell const void* src, size_t blockSize)
5603*2aa14b1aSNick Terrell {
5604e0c1b49fSNick Terrell U32 idx = seqPos->idx;
5605e0c1b49fSNick Terrell BYTE const* ip = (BYTE const*)(src);
5606e0c1b49fSNick Terrell const BYTE* const iend = ip + blockSize;
5607e0c1b49fSNick Terrell repcodes_t updatedRepcodes;
5608e0c1b49fSNick Terrell U32 dictSize;
5609e0c1b49fSNick Terrell
5610e0c1b49fSNick Terrell if (cctx->cdict) {
5611e0c1b49fSNick Terrell dictSize = (U32)cctx->cdict->dictContentSize;
5612e0c1b49fSNick Terrell } else if (cctx->prefixDict.dict) {
5613e0c1b49fSNick Terrell dictSize = (U32)cctx->prefixDict.dictSize;
5614e0c1b49fSNick Terrell } else {
5615e0c1b49fSNick Terrell dictSize = 0;
5616e0c1b49fSNick Terrell }
5617e0c1b49fSNick Terrell ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
5618e0c1b49fSNick Terrell for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
5619*2aa14b1aSNick Terrell U32 const litLength = inSeqs[idx].litLength;
5620*2aa14b1aSNick Terrell U32 const ll0 = (litLength == 0);
5621*2aa14b1aSNick Terrell U32 const matchLength = inSeqs[idx].matchLength;
5622*2aa14b1aSNick Terrell U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
5623*2aa14b1aSNick Terrell ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
5624e0c1b49fSNick Terrell
5625e0c1b49fSNick Terrell DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
5626e0c1b49fSNick Terrell if (cctx->appliedParams.validateSequences) {
5627e0c1b49fSNick Terrell seqPos->posInSrc += litLength + matchLength;
5628e0c1b49fSNick Terrell FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
5629*2aa14b1aSNick Terrell cctx->appliedParams.cParams.windowLog, dictSize),
5630e0c1b49fSNick Terrell "Sequence validation failed");
5631e0c1b49fSNick Terrell }
5632e0c1b49fSNick Terrell RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
5633e0c1b49fSNick Terrell "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
5634*2aa14b1aSNick Terrell ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);
5635e0c1b49fSNick Terrell ip += matchLength + litLength;
5636e0c1b49fSNick Terrell }
5637e0c1b49fSNick Terrell ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
5638e0c1b49fSNick Terrell
5639e0c1b49fSNick Terrell if (inSeqs[idx].litLength) {
5640e0c1b49fSNick Terrell DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength);
5641e0c1b49fSNick Terrell ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength);
5642e0c1b49fSNick Terrell ip += inSeqs[idx].litLength;
5643e0c1b49fSNick Terrell seqPos->posInSrc += inSeqs[idx].litLength;
5644e0c1b49fSNick Terrell }
5645e0c1b49fSNick Terrell RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!");
5646e0c1b49fSNick Terrell seqPos->idx = idx+1;
5647e0c1b49fSNick Terrell return 0;
5648e0c1b49fSNick Terrell }
5649e0c1b49fSNick Terrell
5650e0c1b49fSNick Terrell /* Returns the number of bytes to move the current read position back by. Only non-zero
5651e0c1b49fSNick Terrell * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
5652e0c1b49fSNick Terrell * went wrong.
5653e0c1b49fSNick Terrell *
5654e0c1b49fSNick Terrell * This function will attempt to scan through blockSize bytes represented by the sequences
5655e0c1b49fSNick Terrell * in inSeqs, storing any (partial) sequences.
5656e0c1b49fSNick Terrell *
5657e0c1b49fSNick Terrell * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
5658e0c1b49fSNick Terrell * avoid splitting a match, or to avoid splitting a match such that it would produce a match
5659e0c1b49fSNick Terrell * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
5660e0c1b49fSNick Terrell */
5661*2aa14b1aSNick Terrell static size_t
ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx * cctx,ZSTD_sequencePosition * seqPos,const ZSTD_Sequence * const inSeqs,size_t inSeqsSize,const void * src,size_t blockSize)5662*2aa14b1aSNick Terrell ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
5663e0c1b49fSNick Terrell const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
5664*2aa14b1aSNick Terrell const void* src, size_t blockSize)
5665*2aa14b1aSNick Terrell {
5666e0c1b49fSNick Terrell U32 idx = seqPos->idx;
5667e0c1b49fSNick Terrell U32 startPosInSequence = seqPos->posInSequence;
5668e0c1b49fSNick Terrell U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;
5669e0c1b49fSNick Terrell size_t dictSize;
5670e0c1b49fSNick Terrell BYTE const* ip = (BYTE const*)(src);
5671e0c1b49fSNick Terrell BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */
5672e0c1b49fSNick Terrell repcodes_t updatedRepcodes;
5673e0c1b49fSNick Terrell U32 bytesAdjustment = 0;
5674e0c1b49fSNick Terrell U32 finalMatchSplit = 0;
5675e0c1b49fSNick Terrell
5676e0c1b49fSNick Terrell if (cctx->cdict) {
5677e0c1b49fSNick Terrell dictSize = cctx->cdict->dictContentSize;
5678e0c1b49fSNick Terrell } else if (cctx->prefixDict.dict) {
5679e0c1b49fSNick Terrell dictSize = cctx->prefixDict.dictSize;
5680e0c1b49fSNick Terrell } else {
5681e0c1b49fSNick Terrell dictSize = 0;
5682e0c1b49fSNick Terrell }
5683e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
5684e0c1b49fSNick Terrell DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
5685e0c1b49fSNick Terrell ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
5686e0c1b49fSNick Terrell while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
5687e0c1b49fSNick Terrell const ZSTD_Sequence currSeq = inSeqs[idx];
5688*2aa14b1aSNick Terrell U32 litLength = currSeq.litLength;
5689*2aa14b1aSNick Terrell U32 matchLength = currSeq.matchLength;
5690*2aa14b1aSNick Terrell U32 const rawOffset = currSeq.offset;
5691*2aa14b1aSNick Terrell U32 offCode;
5692e0c1b49fSNick Terrell
5693e0c1b49fSNick Terrell /* Modify the sequence depending on where endPosInSequence lies */
5694e0c1b49fSNick Terrell if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
5695e0c1b49fSNick Terrell if (startPosInSequence >= litLength) {
5696e0c1b49fSNick Terrell startPosInSequence -= litLength;
5697e0c1b49fSNick Terrell litLength = 0;
5698e0c1b49fSNick Terrell matchLength -= startPosInSequence;
5699e0c1b49fSNick Terrell } else {
5700e0c1b49fSNick Terrell litLength -= startPosInSequence;
5701e0c1b49fSNick Terrell }
5702e0c1b49fSNick Terrell /* Move to the next sequence */
5703e0c1b49fSNick Terrell endPosInSequence -= currSeq.litLength + currSeq.matchLength;
5704e0c1b49fSNick Terrell startPosInSequence = 0;
5705e0c1b49fSNick Terrell idx++;
5706e0c1b49fSNick Terrell } else {
5707e0c1b49fSNick Terrell /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
5708e0c1b49fSNick Terrell does not reach the end of the match. So, we have to split the sequence */
5709e0c1b49fSNick Terrell DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",
5710e0c1b49fSNick Terrell currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
5711e0c1b49fSNick Terrell if (endPosInSequence > litLength) {
5712e0c1b49fSNick Terrell U32 firstHalfMatchLength;
5713e0c1b49fSNick Terrell litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
5714e0c1b49fSNick Terrell firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
5715e0c1b49fSNick Terrell if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) {
5716e0c1b49fSNick Terrell /* Only ever split the match if it is larger than the block size */
5717e0c1b49fSNick Terrell U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
5718e0c1b49fSNick Terrell if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {
5719e0c1b49fSNick Terrell /* Move the endPosInSequence backward so that it creates match of minMatch length */
5720e0c1b49fSNick Terrell endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
5721e0c1b49fSNick Terrell bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
5722e0c1b49fSNick Terrell firstHalfMatchLength -= bytesAdjustment;
5723e0c1b49fSNick Terrell }
5724e0c1b49fSNick Terrell matchLength = firstHalfMatchLength;
5725e0c1b49fSNick Terrell /* Flag that we split the last match - after storing the sequence, exit the loop,
5726e0c1b49fSNick Terrell but keep the value of endPosInSequence */
5727e0c1b49fSNick Terrell finalMatchSplit = 1;
5728e0c1b49fSNick Terrell } else {
5729e0c1b49fSNick Terrell /* Move the position in sequence backwards so that we don't split match, and break to store
5730e0c1b49fSNick Terrell * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence
5731e0c1b49fSNick Terrell * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so
5732e0c1b49fSNick Terrell * would cause the first half of the match to be too small
5733e0c1b49fSNick Terrell */
5734e0c1b49fSNick Terrell bytesAdjustment = endPosInSequence - currSeq.litLength;
5735e0c1b49fSNick Terrell endPosInSequence = currSeq.litLength;
5736e0c1b49fSNick Terrell break;
5737e0c1b49fSNick Terrell }
5738e0c1b49fSNick Terrell } else {
5739e0c1b49fSNick Terrell /* This sequence ends inside the literals, break to store the last literals */
5740e0c1b49fSNick Terrell break;
5741e0c1b49fSNick Terrell }
5742e0c1b49fSNick Terrell }
5743e0c1b49fSNick Terrell /* Check if this offset can be represented with a repcode */
5744*2aa14b1aSNick Terrell { U32 const ll0 = (litLength == 0);
5745e0c1b49fSNick Terrell offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
5746*2aa14b1aSNick Terrell ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
5747e0c1b49fSNick Terrell }
5748e0c1b49fSNick Terrell
5749e0c1b49fSNick Terrell if (cctx->appliedParams.validateSequences) {
5750e0c1b49fSNick Terrell seqPos->posInSrc += litLength + matchLength;
5751e0c1b49fSNick Terrell FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
5752*2aa14b1aSNick Terrell cctx->appliedParams.cParams.windowLog, dictSize),
5753e0c1b49fSNick Terrell "Sequence validation failed");
5754e0c1b49fSNick Terrell }
5755e0c1b49fSNick Terrell DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
5756e0c1b49fSNick Terrell RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
5757e0c1b49fSNick Terrell "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
5758*2aa14b1aSNick Terrell ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);
5759e0c1b49fSNick Terrell ip += matchLength + litLength;
5760e0c1b49fSNick Terrell }
5761e0c1b49fSNick Terrell DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
5762e0c1b49fSNick Terrell assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
5763e0c1b49fSNick Terrell seqPos->idx = idx;
5764e0c1b49fSNick Terrell seqPos->posInSequence = endPosInSequence;
5765e0c1b49fSNick Terrell ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
5766e0c1b49fSNick Terrell
5767e0c1b49fSNick Terrell iend -= bytesAdjustment;
5768e0c1b49fSNick Terrell if (ip != iend) {
5769e0c1b49fSNick Terrell /* Store any last literals */
5770e0c1b49fSNick Terrell U32 lastLLSize = (U32)(iend - ip);
5771e0c1b49fSNick Terrell assert(ip <= iend);
5772e0c1b49fSNick Terrell DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize);
5773e0c1b49fSNick Terrell ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize);
5774e0c1b49fSNick Terrell seqPos->posInSrc += lastLLSize;
5775e0c1b49fSNick Terrell }
5776e0c1b49fSNick Terrell
5777e0c1b49fSNick Terrell return bytesAdjustment;
5778e0c1b49fSNick Terrell }
5779e0c1b49fSNick Terrell
5780e0c1b49fSNick Terrell typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
5781e0c1b49fSNick Terrell const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
5782e0c1b49fSNick Terrell const void* src, size_t blockSize);
ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)5783*2aa14b1aSNick Terrell static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
5784*2aa14b1aSNick Terrell {
5785e0c1b49fSNick Terrell ZSTD_sequenceCopier sequenceCopier = NULL;
5786e0c1b49fSNick Terrell assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
5787e0c1b49fSNick Terrell if (mode == ZSTD_sf_explicitBlockDelimiters) {
5788e0c1b49fSNick Terrell return ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
5789e0c1b49fSNick Terrell } else if (mode == ZSTD_sf_noBlockDelimiters) {
5790e0c1b49fSNick Terrell return ZSTD_copySequencesToSeqStoreNoBlockDelim;
5791e0c1b49fSNick Terrell }
5792e0c1b49fSNick Terrell assert(sequenceCopier != NULL);
5793e0c1b49fSNick Terrell return sequenceCopier;
5794e0c1b49fSNick Terrell }
5795e0c1b49fSNick Terrell
5796e0c1b49fSNick Terrell /* Compress, block-by-block, all of the sequences given.
5797e0c1b49fSNick Terrell *
5798*2aa14b1aSNick Terrell * Returns the cumulative size of all compressed blocks (including their headers),
5799*2aa14b1aSNick Terrell * otherwise a ZSTD error.
5800e0c1b49fSNick Terrell */
5801*2aa14b1aSNick Terrell static size_t
ZSTD_compressSequences_internal(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const ZSTD_Sequence * inSeqs,size_t inSeqsSize,const void * src,size_t srcSize)5802*2aa14b1aSNick Terrell ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
5803e0c1b49fSNick Terrell void* dst, size_t dstCapacity,
5804e0c1b49fSNick Terrell const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
5805*2aa14b1aSNick Terrell const void* src, size_t srcSize)
5806*2aa14b1aSNick Terrell {
5807e0c1b49fSNick Terrell size_t cSize = 0;
5808e0c1b49fSNick Terrell U32 lastBlock;
5809e0c1b49fSNick Terrell size_t blockSize;
5810e0c1b49fSNick Terrell size_t compressedSeqsSize;
5811e0c1b49fSNick Terrell size_t remaining = srcSize;
5812e0c1b49fSNick Terrell ZSTD_sequencePosition seqPos = {0, 0, 0};
5813e0c1b49fSNick Terrell
5814e0c1b49fSNick Terrell BYTE const* ip = (BYTE const*)src;
5815e0c1b49fSNick Terrell BYTE* op = (BYTE*)dst;
5816*2aa14b1aSNick Terrell ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
5817e0c1b49fSNick Terrell
5818e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
5819e0c1b49fSNick Terrell /* Special case: empty frame */
5820e0c1b49fSNick Terrell if (remaining == 0) {
5821e0c1b49fSNick Terrell U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1);
5822e0c1b49fSNick Terrell RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header");
5823e0c1b49fSNick Terrell MEM_writeLE32(op, cBlockHeader24);
5824e0c1b49fSNick Terrell op += ZSTD_blockHeaderSize;
5825e0c1b49fSNick Terrell dstCapacity -= ZSTD_blockHeaderSize;
5826e0c1b49fSNick Terrell cSize += ZSTD_blockHeaderSize;
5827e0c1b49fSNick Terrell }
5828e0c1b49fSNick Terrell
5829e0c1b49fSNick Terrell while (remaining) {
5830e0c1b49fSNick Terrell size_t cBlockSize;
5831e0c1b49fSNick Terrell size_t additionalByteAdjustment;
5832e0c1b49fSNick Terrell lastBlock = remaining <= cctx->blockSize;
5833e0c1b49fSNick Terrell blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;
5834e0c1b49fSNick Terrell ZSTD_resetSeqStore(&cctx->seqStore);
5835e0c1b49fSNick Terrell DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize);
5836e0c1b49fSNick Terrell
5837e0c1b49fSNick Terrell additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
5838e0c1b49fSNick Terrell FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
5839e0c1b49fSNick Terrell blockSize -= additionalByteAdjustment;
5840e0c1b49fSNick Terrell
5841e0c1b49fSNick Terrell /* If blocks are too small, emit as a nocompress block */
5842e0c1b49fSNick Terrell if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
5843e0c1b49fSNick Terrell cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
5844e0c1b49fSNick Terrell FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
5845e0c1b49fSNick Terrell DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
5846e0c1b49fSNick Terrell cSize += cBlockSize;
5847e0c1b49fSNick Terrell ip += blockSize;
5848e0c1b49fSNick Terrell op += cBlockSize;
5849e0c1b49fSNick Terrell remaining -= blockSize;
5850e0c1b49fSNick Terrell dstCapacity -= cBlockSize;
5851e0c1b49fSNick Terrell continue;
5852e0c1b49fSNick Terrell }
5853e0c1b49fSNick Terrell
5854*2aa14b1aSNick Terrell compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore,
5855e0c1b49fSNick Terrell &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
5856e0c1b49fSNick Terrell &cctx->appliedParams,
5857e0c1b49fSNick Terrell op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
5858e0c1b49fSNick Terrell blockSize,
5859e0c1b49fSNick Terrell cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
5860e0c1b49fSNick Terrell cctx->bmi2);
5861e0c1b49fSNick Terrell FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
5862e0c1b49fSNick Terrell DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize);
5863e0c1b49fSNick Terrell
5864e0c1b49fSNick Terrell if (!cctx->isFirstBlock &&
5865e0c1b49fSNick Terrell ZSTD_maybeRLE(&cctx->seqStore) &&
5866e0c1b49fSNick Terrell ZSTD_isRLE((BYTE const*)src, srcSize)) {
5867e0c1b49fSNick Terrell /* We don't want to emit our first block as a RLE even if it qualifies because
5868e0c1b49fSNick Terrell * doing so will cause the decoder (cli only) to throw a "should consume all input error."
5869e0c1b49fSNick Terrell * This is only an issue for zstd <= v1.4.3
5870e0c1b49fSNick Terrell */
5871e0c1b49fSNick Terrell compressedSeqsSize = 1;
5872e0c1b49fSNick Terrell }
5873e0c1b49fSNick Terrell
5874e0c1b49fSNick Terrell if (compressedSeqsSize == 0) {
5875e0c1b49fSNick Terrell /* ZSTD_noCompressBlock writes the block header as well */
5876e0c1b49fSNick Terrell cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
5877e0c1b49fSNick Terrell FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
5878e0c1b49fSNick Terrell DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize);
5879e0c1b49fSNick Terrell } else if (compressedSeqsSize == 1) {
5880e0c1b49fSNick Terrell cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);
5881e0c1b49fSNick Terrell FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed");
5882e0c1b49fSNick Terrell DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize);
5883e0c1b49fSNick Terrell } else {
5884e0c1b49fSNick Terrell U32 cBlockHeader;
5885e0c1b49fSNick Terrell /* Error checking and repcodes update */
5886*2aa14b1aSNick Terrell ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState);
5887e0c1b49fSNick Terrell if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
5888e0c1b49fSNick Terrell cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
5889e0c1b49fSNick Terrell
5890e0c1b49fSNick Terrell /* Write block header into beginning of block*/
5891e0c1b49fSNick Terrell cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);
5892e0c1b49fSNick Terrell MEM_writeLE24(op, cBlockHeader);
5893e0c1b49fSNick Terrell cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;
5894e0c1b49fSNick Terrell DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize);
5895e0c1b49fSNick Terrell }
5896e0c1b49fSNick Terrell
5897e0c1b49fSNick Terrell cSize += cBlockSize;
5898e0c1b49fSNick Terrell DEBUGLOG(4, "cSize running total: %zu", cSize);
5899e0c1b49fSNick Terrell
5900e0c1b49fSNick Terrell if (lastBlock) {
5901e0c1b49fSNick Terrell break;
5902e0c1b49fSNick Terrell } else {
5903e0c1b49fSNick Terrell ip += blockSize;
5904e0c1b49fSNick Terrell op += cBlockSize;
5905e0c1b49fSNick Terrell remaining -= blockSize;
5906e0c1b49fSNick Terrell dstCapacity -= cBlockSize;
5907e0c1b49fSNick Terrell cctx->isFirstBlock = 0;
5908e0c1b49fSNick Terrell }
5909e0c1b49fSNick Terrell }
5910e0c1b49fSNick Terrell
5911e0c1b49fSNick Terrell return cSize;
5912e0c1b49fSNick Terrell }
5913e0c1b49fSNick Terrell
ZSTD_compressSequences(ZSTD_CCtx * const cctx,void * dst,size_t dstCapacity,const ZSTD_Sequence * inSeqs,size_t inSeqsSize,const void * src,size_t srcSize)5914e0c1b49fSNick Terrell size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity,
5915e0c1b49fSNick Terrell const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
5916*2aa14b1aSNick Terrell const void* src, size_t srcSize)
5917*2aa14b1aSNick Terrell {
5918e0c1b49fSNick Terrell BYTE* op = (BYTE*)dst;
5919e0c1b49fSNick Terrell size_t cSize = 0;
5920e0c1b49fSNick Terrell size_t compressedBlocksSize = 0;
5921e0c1b49fSNick Terrell size_t frameHeaderSize = 0;
5922e0c1b49fSNick Terrell
5923e0c1b49fSNick Terrell /* Transparent initialization stage, same as compressStream2() */
5924e0c1b49fSNick Terrell DEBUGLOG(3, "ZSTD_compressSequences()");
5925e0c1b49fSNick Terrell assert(cctx != NULL);
5926e0c1b49fSNick Terrell FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
5927e0c1b49fSNick Terrell /* Begin writing output, starting with frame header */
5928e0c1b49fSNick Terrell frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
5929e0c1b49fSNick Terrell op += frameHeaderSize;
5930e0c1b49fSNick Terrell dstCapacity -= frameHeaderSize;
5931e0c1b49fSNick Terrell cSize += frameHeaderSize;
5932e0c1b49fSNick Terrell if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
5933e0c1b49fSNick Terrell xxh64_update(&cctx->xxhState, src, srcSize);
5934e0c1b49fSNick Terrell }
5935e0c1b49fSNick Terrell /* cSize includes block header size and compressed sequences size */
5936e0c1b49fSNick Terrell compressedBlocksSize = ZSTD_compressSequences_internal(cctx,
5937e0c1b49fSNick Terrell op, dstCapacity,
5938e0c1b49fSNick Terrell inSeqs, inSeqsSize,
5939e0c1b49fSNick Terrell src, srcSize);
5940e0c1b49fSNick Terrell FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!");
5941e0c1b49fSNick Terrell cSize += compressedBlocksSize;
5942e0c1b49fSNick Terrell dstCapacity -= compressedBlocksSize;
5943e0c1b49fSNick Terrell
5944e0c1b49fSNick Terrell if (cctx->appliedParams.fParams.checksumFlag) {
5945e0c1b49fSNick Terrell U32 const checksum = (U32) xxh64_digest(&cctx->xxhState);
5946e0c1b49fSNick Terrell RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
5947e0c1b49fSNick Terrell DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum);
5948e0c1b49fSNick Terrell MEM_writeLE32((char*)dst + cSize, checksum);
5949e0c1b49fSNick Terrell cSize += 4;
5950e0c1b49fSNick Terrell }
5951e0c1b49fSNick Terrell
5952e0c1b49fSNick Terrell DEBUGLOG(3, "Final compressed size: %zu", cSize);
5953e0c1b49fSNick Terrell return cSize;
5954e0c1b49fSNick Terrell }
5955e0c1b49fSNick Terrell
5956e0c1b49fSNick Terrell /*====== Finalize ======*/
5957e0c1b49fSNick Terrell
5958e0c1b49fSNick Terrell /*! ZSTD_flushStream() :
5959e0c1b49fSNick Terrell * @return : amount of data remaining to flush */
ZSTD_flushStream(ZSTD_CStream * zcs,ZSTD_outBuffer * output)5960e0c1b49fSNick Terrell size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
5961e0c1b49fSNick Terrell {
5962e0c1b49fSNick Terrell ZSTD_inBuffer input = { NULL, 0, 0 };
5963e0c1b49fSNick Terrell return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
5964e0c1b49fSNick Terrell }
5965e0c1b49fSNick Terrell
5966e0c1b49fSNick Terrell
ZSTD_endStream(ZSTD_CStream * zcs,ZSTD_outBuffer * output)5967e0c1b49fSNick Terrell size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
5968e0c1b49fSNick Terrell {
5969e0c1b49fSNick Terrell ZSTD_inBuffer input = { NULL, 0, 0 };
5970e0c1b49fSNick Terrell size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
5971e0c1b49fSNick Terrell FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed");
5972e0c1b49fSNick Terrell if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */
5973e0c1b49fSNick Terrell /* single thread mode : attempt to calculate remaining to flush more precisely */
5974e0c1b49fSNick Terrell { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
5975e0c1b49fSNick Terrell size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
5976e0c1b49fSNick Terrell size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
5977e0c1b49fSNick Terrell DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
5978e0c1b49fSNick Terrell return toFlush;
5979e0c1b49fSNick Terrell }
5980e0c1b49fSNick Terrell }
5981e0c1b49fSNick Terrell
5982e0c1b49fSNick Terrell
5983e0c1b49fSNick Terrell /*-===== Pre-defined compression levels =====-*/
5984*2aa14b1aSNick Terrell #include "clevels.h"
5985e0c1b49fSNick Terrell
ZSTD_maxCLevel(void)5986e0c1b49fSNick Terrell int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
ZSTD_minCLevel(void)5987e0c1b49fSNick Terrell int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
ZSTD_defaultCLevel(void)5988*2aa14b1aSNick Terrell int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; }
5989e0c1b49fSNick Terrell
ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel,size_t const dictSize)5990e0c1b49fSNick Terrell static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize)
5991e0c1b49fSNick Terrell {
5992e0c1b49fSNick Terrell ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict);
5993e0c1b49fSNick Terrell switch (cParams.strategy) {
5994e0c1b49fSNick Terrell case ZSTD_fast:
5995e0c1b49fSNick Terrell case ZSTD_dfast:
5996e0c1b49fSNick Terrell break;
5997e0c1b49fSNick Terrell case ZSTD_greedy:
5998e0c1b49fSNick Terrell case ZSTD_lazy:
5999e0c1b49fSNick Terrell case ZSTD_lazy2:
6000e0c1b49fSNick Terrell cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG;
6001e0c1b49fSNick Terrell break;
6002e0c1b49fSNick Terrell case ZSTD_btlazy2:
6003e0c1b49fSNick Terrell case ZSTD_btopt:
6004e0c1b49fSNick Terrell case ZSTD_btultra:
6005e0c1b49fSNick Terrell case ZSTD_btultra2:
6006e0c1b49fSNick Terrell break;
6007e0c1b49fSNick Terrell }
6008e0c1b49fSNick Terrell return cParams;
6009e0c1b49fSNick Terrell }
6010e0c1b49fSNick Terrell
ZSTD_dedicatedDictSearch_isSupported(ZSTD_compressionParameters const * cParams)6011e0c1b49fSNick Terrell static int ZSTD_dedicatedDictSearch_isSupported(
6012e0c1b49fSNick Terrell ZSTD_compressionParameters const* cParams)
6013e0c1b49fSNick Terrell {
6014e0c1b49fSNick Terrell return (cParams->strategy >= ZSTD_greedy)
6015e0c1b49fSNick Terrell && (cParams->strategy <= ZSTD_lazy2)
6016*2aa14b1aSNick Terrell && (cParams->hashLog > cParams->chainLog)
6017e0c1b49fSNick Terrell && (cParams->chainLog <= 24);
6018e0c1b49fSNick Terrell }
6019e0c1b49fSNick Terrell
6020e0c1b49fSNick Terrell /*
6021e0c1b49fSNick Terrell * Reverses the adjustment applied to cparams when enabling dedicated dict
6022e0c1b49fSNick Terrell * search. This is used to recover the params set to be used in the working
6023e0c1b49fSNick Terrell * context. (Otherwise, those tables would also grow.)
6024e0c1b49fSNick Terrell */
ZSTD_dedicatedDictSearch_revertCParams(ZSTD_compressionParameters * cParams)6025e0c1b49fSNick Terrell static void ZSTD_dedicatedDictSearch_revertCParams(
6026e0c1b49fSNick Terrell ZSTD_compressionParameters* cParams) {
6027e0c1b49fSNick Terrell switch (cParams->strategy) {
6028e0c1b49fSNick Terrell case ZSTD_fast:
6029e0c1b49fSNick Terrell case ZSTD_dfast:
6030e0c1b49fSNick Terrell break;
6031e0c1b49fSNick Terrell case ZSTD_greedy:
6032e0c1b49fSNick Terrell case ZSTD_lazy:
6033e0c1b49fSNick Terrell case ZSTD_lazy2:
6034e0c1b49fSNick Terrell cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;
6035*2aa14b1aSNick Terrell if (cParams->hashLog < ZSTD_HASHLOG_MIN) {
6036*2aa14b1aSNick Terrell cParams->hashLog = ZSTD_HASHLOG_MIN;
6037*2aa14b1aSNick Terrell }
6038e0c1b49fSNick Terrell break;
6039e0c1b49fSNick Terrell case ZSTD_btlazy2:
6040e0c1b49fSNick Terrell case ZSTD_btopt:
6041e0c1b49fSNick Terrell case ZSTD_btultra:
6042e0c1b49fSNick Terrell case ZSTD_btultra2:
6043e0c1b49fSNick Terrell break;
6044e0c1b49fSNick Terrell }
6045e0c1b49fSNick Terrell }
6046e0c1b49fSNick Terrell
ZSTD_getCParamRowSize(U64 srcSizeHint,size_t dictSize,ZSTD_cParamMode_e mode)6047e0c1b49fSNick Terrell static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
6048e0c1b49fSNick Terrell {
6049e0c1b49fSNick Terrell switch (mode) {
6050e0c1b49fSNick Terrell case ZSTD_cpm_unknown:
6051e0c1b49fSNick Terrell case ZSTD_cpm_noAttachDict:
6052e0c1b49fSNick Terrell case ZSTD_cpm_createCDict:
6053e0c1b49fSNick Terrell break;
6054e0c1b49fSNick Terrell case ZSTD_cpm_attachDict:
6055e0c1b49fSNick Terrell dictSize = 0;
6056e0c1b49fSNick Terrell break;
6057e0c1b49fSNick Terrell default:
6058e0c1b49fSNick Terrell assert(0);
6059e0c1b49fSNick Terrell break;
6060e0c1b49fSNick Terrell }
6061e0c1b49fSNick Terrell { int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
6062e0c1b49fSNick Terrell size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
6063e0c1b49fSNick Terrell return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
6064e0c1b49fSNick Terrell }
6065e0c1b49fSNick Terrell }
6066e0c1b49fSNick Terrell
6067e0c1b49fSNick Terrell /*! ZSTD_getCParams_internal() :
6068e0c1b49fSNick Terrell * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
6069e0c1b49fSNick Terrell * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
6070e0c1b49fSNick Terrell * Use dictSize == 0 for unknown or unused.
6071e0c1b49fSNick Terrell * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */
ZSTD_getCParams_internal(int compressionLevel,unsigned long long srcSizeHint,size_t dictSize,ZSTD_cParamMode_e mode)6072e0c1b49fSNick Terrell static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
6073e0c1b49fSNick Terrell {
6074e0c1b49fSNick Terrell U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode);
6075e0c1b49fSNick Terrell U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
6076e0c1b49fSNick Terrell int row;
6077e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);
6078e0c1b49fSNick Terrell
6079e0c1b49fSNick Terrell /* row */
6080e0c1b49fSNick Terrell if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
6081e0c1b49fSNick Terrell else if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */
6082e0c1b49fSNick Terrell else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
6083e0c1b49fSNick Terrell else row = compressionLevel;
6084e0c1b49fSNick Terrell
6085e0c1b49fSNick Terrell { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
6086*2aa14b1aSNick Terrell DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy);
6087e0c1b49fSNick Terrell /* acceleration factor */
6088e0c1b49fSNick Terrell if (compressionLevel < 0) {
6089e0c1b49fSNick Terrell int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);
6090e0c1b49fSNick Terrell cp.targetLength = (unsigned)(-clampedCompressionLevel);
6091e0c1b49fSNick Terrell }
6092e0c1b49fSNick Terrell /* refine parameters based on srcSize & dictSize */
6093e0c1b49fSNick Terrell return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
6094e0c1b49fSNick Terrell }
6095e0c1b49fSNick Terrell }
6096e0c1b49fSNick Terrell
6097e0c1b49fSNick Terrell /*! ZSTD_getCParams() :
6098e0c1b49fSNick Terrell * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
6099e0c1b49fSNick Terrell * Size values are optional, provide 0 if not known or unused */
ZSTD_getCParams(int compressionLevel,unsigned long long srcSizeHint,size_t dictSize)6100e0c1b49fSNick Terrell ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
6101e0c1b49fSNick Terrell {
6102e0c1b49fSNick Terrell if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
6103e0c1b49fSNick Terrell return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
6104e0c1b49fSNick Terrell }
6105e0c1b49fSNick Terrell
6106e0c1b49fSNick Terrell /*! ZSTD_getParams() :
6107e0c1b49fSNick Terrell * same idea as ZSTD_getCParams()
6108e0c1b49fSNick Terrell * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
6109e0c1b49fSNick Terrell * Fields of `ZSTD_frameParameters` are set to default values */
ZSTD_getParams_internal(int compressionLevel,unsigned long long srcSizeHint,size_t dictSize,ZSTD_cParamMode_e mode)6110e0c1b49fSNick Terrell static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) {
6111e0c1b49fSNick Terrell ZSTD_parameters params;
6112e0c1b49fSNick Terrell ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode);
6113e0c1b49fSNick Terrell DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
6114e0c1b49fSNick Terrell ZSTD_memset(¶ms, 0, sizeof(params));
6115e0c1b49fSNick Terrell params.cParams = cParams;
6116e0c1b49fSNick Terrell params.fParams.contentSizeFlag = 1;
6117e0c1b49fSNick Terrell return params;
6118e0c1b49fSNick Terrell }
6119e0c1b49fSNick Terrell
6120e0c1b49fSNick Terrell /*! ZSTD_getParams() :
6121e0c1b49fSNick Terrell * same idea as ZSTD_getCParams()
6122e0c1b49fSNick Terrell * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
6123e0c1b49fSNick Terrell * Fields of `ZSTD_frameParameters` are set to default values */
ZSTD_getParams(int compressionLevel,unsigned long long srcSizeHint,size_t dictSize)6124e0c1b49fSNick Terrell ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
6125e0c1b49fSNick Terrell if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
6126e0c1b49fSNick Terrell return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
6127e0c1b49fSNick Terrell }
6128