1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
2 /*
3 * Copyright (c) Meta Platforms, Inc. and affiliates.
4 * All rights reserved.
5 *
6 * This source code is licensed under both the BSD-style license (found in the
7 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8 * in the COPYING file in the root directory of this source tree).
9 * You may select, at your option, one of the above-listed licenses.
10 */
11
12 /* zstd_ddict.c :
13 * concentrates all logic that needs to know the internals of ZSTD_DDict object */
14
15 /*-*******************************************************
16 * Dependencies
17 *********************************************************/
18 #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
19 #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
20 #include "../common/cpu.h" /* bmi2 */
21 #include "../common/mem.h" /* low level memory routines */
22 #define FSE_STATIC_LINKING_ONLY
23 #include "../common/fse.h"
24 #include "../common/huf.h"
25 #include "zstd_decompress_internal.h"
26 #include "zstd_ddict.h"
27
28
29
30
31 /*-*******************************************************
32 * Types
33 *********************************************************/
34 struct ZSTD_DDict_s {
35 void* dictBuffer;
36 const void* dictContent;
37 size_t dictSize;
38 ZSTD_entropyDTables_t entropy;
39 U32 dictID;
40 U32 entropyPresent;
41 ZSTD_customMem cMem;
42 }; /* typedef'd to ZSTD_DDict within "zstd.h" */
43
ZSTD_DDict_dictContent(const ZSTD_DDict * ddict)44 const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
45 {
46 assert(ddict != NULL);
47 return ddict->dictContent;
48 }
49
ZSTD_DDict_dictSize(const ZSTD_DDict * ddict)50 size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
51 {
52 assert(ddict != NULL);
53 return ddict->dictSize;
54 }
55
ZSTD_copyDDictParameters(ZSTD_DCtx * dctx,const ZSTD_DDict * ddict)56 void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
57 {
58 DEBUGLOG(4, "ZSTD_copyDDictParameters");
59 assert(dctx != NULL);
60 assert(ddict != NULL);
61 dctx->dictID = ddict->dictID;
62 dctx->prefixStart = ddict->dictContent;
63 dctx->virtualStart = ddict->dictContent;
64 dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
65 dctx->previousDstEnd = dctx->dictEnd;
66 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
67 dctx->dictContentBeginForFuzzing = dctx->prefixStart;
68 dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
69 #endif
70 if (ddict->entropyPresent) {
71 dctx->litEntropy = 1;
72 dctx->fseEntropy = 1;
73 dctx->LLTptr = ddict->entropy.LLTable;
74 dctx->MLTptr = ddict->entropy.MLTable;
75 dctx->OFTptr = ddict->entropy.OFTable;
76 dctx->HUFptr = ddict->entropy.hufTable;
77 dctx->entropy.rep[0] = ddict->entropy.rep[0];
78 dctx->entropy.rep[1] = ddict->entropy.rep[1];
79 dctx->entropy.rep[2] = ddict->entropy.rep[2];
80 } else {
81 dctx->litEntropy = 0;
82 dctx->fseEntropy = 0;
83 }
84 }
85
86
87 static size_t
ZSTD_loadEntropy_intoDDict(ZSTD_DDict * ddict,ZSTD_dictContentType_e dictContentType)88 ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
89 ZSTD_dictContentType_e dictContentType)
90 {
91 ddict->dictID = 0;
92 ddict->entropyPresent = 0;
93 if (dictContentType == ZSTD_dct_rawContent) return 0;
94
95 if (ddict->dictSize < 8) {
96 if (dictContentType == ZSTD_dct_fullDict)
97 return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
98 return 0; /* pure content mode */
99 }
100 { U32 const magic = MEM_readLE32(ddict->dictContent);
101 if (magic != ZSTD_MAGIC_DICTIONARY) {
102 if (dictContentType == ZSTD_dct_fullDict)
103 return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
104 return 0; /* pure content mode */
105 }
106 }
107 ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
108
109 /* load entropy tables */
110 RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
111 &ddict->entropy, ddict->dictContent, ddict->dictSize)),
112 dictionary_corrupted, "");
113 ddict->entropyPresent = 1;
114 return 0;
115 }
116
117
ZSTD_initDDict_internal(ZSTD_DDict * ddict,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)118 static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
119 const void* dict, size_t dictSize,
120 ZSTD_dictLoadMethod_e dictLoadMethod,
121 ZSTD_dictContentType_e dictContentType)
122 {
123 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
124 ddict->dictBuffer = NULL;
125 ddict->dictContent = dict;
126 if (!dict) dictSize = 0;
127 } else {
128 void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
129 ddict->dictBuffer = internalBuffer;
130 ddict->dictContent = internalBuffer;
131 if (!internalBuffer) return ERROR(memory_allocation);
132 ZSTD_memcpy(internalBuffer, dict, dictSize);
133 }
134 ddict->dictSize = dictSize;
135 ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */
136
137 /* parse dictionary content */
138 FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
139
140 return 0;
141 }
142
ZSTD_createDDict_advanced(const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_customMem customMem)143 ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
144 ZSTD_dictLoadMethod_e dictLoadMethod,
145 ZSTD_dictContentType_e dictContentType,
146 ZSTD_customMem customMem)
147 {
148 if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
149
150 { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
151 if (ddict == NULL) return NULL;
152 ddict->cMem = customMem;
153 { size_t const initResult = ZSTD_initDDict_internal(ddict,
154 dict, dictSize,
155 dictLoadMethod, dictContentType);
156 if (ZSTD_isError(initResult)) {
157 ZSTD_freeDDict(ddict);
158 return NULL;
159 } }
160 return ddict;
161 }
162 }
163
164 /*! ZSTD_createDDict() :
165 * Create a digested dictionary, to start decompression without startup delay.
166 * `dict` content is copied inside DDict.
167 * Consequently, `dict` can be released after `ZSTD_DDict` creation */
ZSTD_createDDict(const void * dict,size_t dictSize)168 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
169 {
170 ZSTD_customMem const allocator = { NULL, NULL, NULL };
171 return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
172 }
173
174 /*! ZSTD_createDDict_byReference() :
175 * Create a digested dictionary, to start decompression without startup delay.
176 * Dictionary content is simply referenced, it will be accessed during decompression.
177 * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
ZSTD_createDDict_byReference(const void * dictBuffer,size_t dictSize)178 ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
179 {
180 ZSTD_customMem const allocator = { NULL, NULL, NULL };
181 return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
182 }
183
184
ZSTD_initStaticDDict(void * sBuffer,size_t sBufferSize,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)185 const ZSTD_DDict* ZSTD_initStaticDDict(
186 void* sBuffer, size_t sBufferSize,
187 const void* dict, size_t dictSize,
188 ZSTD_dictLoadMethod_e dictLoadMethod,
189 ZSTD_dictContentType_e dictContentType)
190 {
191 size_t const neededSpace = sizeof(ZSTD_DDict)
192 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
193 ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
194 assert(sBuffer != NULL);
195 assert(dict != NULL);
196 if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
197 if (sBufferSize < neededSpace) return NULL;
198 if (dictLoadMethod == ZSTD_dlm_byCopy) {
199 ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */
200 dict = ddict+1;
201 }
202 if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
203 dict, dictSize,
204 ZSTD_dlm_byRef, dictContentType) ))
205 return NULL;
206 return ddict;
207 }
208
209
ZSTD_freeDDict(ZSTD_DDict * ddict)210 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
211 {
212 if (ddict==NULL) return 0; /* support free on NULL */
213 { ZSTD_customMem const cMem = ddict->cMem;
214 ZSTD_customFree(ddict->dictBuffer, cMem);
215 ZSTD_customFree(ddict, cMem);
216 return 0;
217 }
218 }
219
220 /*! ZSTD_estimateDDictSize() :
221 * Estimate amount of memory that will be needed to create a dictionary for decompression.
222 * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
ZSTD_estimateDDictSize(size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod)223 size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
224 {
225 return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
226 }
227
ZSTD_sizeof_DDict(const ZSTD_DDict * ddict)228 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
229 {
230 if (ddict==NULL) return 0; /* support sizeof on NULL */
231 return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
232 }
233
234 /*! ZSTD_getDictID_fromDDict() :
235 * Provides the dictID of the dictionary loaded into `ddict`.
236 * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
237 * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
ZSTD_getDictID_fromDDict(const ZSTD_DDict * ddict)238 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
239 {
240 if (ddict==NULL) return 0;
241 return ddict->dictID;
242 }
243