1 /*
2 * Copyright (c) Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 /* zstd_ddict.c :
12 * concentrates all logic that needs to know the internals of ZSTD_DDict object */
13
14 /*-*******************************************************
15 * Dependencies
16 *********************************************************/
17 #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18 #include "../common/cpu.h" /* bmi2 */
19 #include "../common/mem.h" /* low level memory routines */
20 #define FSE_STATIC_LINKING_ONLY
21 #include "../common/fse.h"
22 #define HUF_STATIC_LINKING_ONLY
23 #include "../common/huf.h"
24 #include "zstd_decompress_internal.h"
25 #include "zstd_ddict.h"
26
27
28
29
30 /*-*******************************************************
31 * Types
32 *********************************************************/
33 struct ZSTD_DDict_s {
34 void* dictBuffer;
35 const void* dictContent;
36 size_t dictSize;
37 ZSTD_entropyDTables_t entropy;
38 U32 dictID;
39 U32 entropyPresent;
40 ZSTD_customMem cMem;
41 }; /* typedef'd to ZSTD_DDict within "zstd.h" */
42
ZSTD_DDict_dictContent(const ZSTD_DDict * ddict)43 const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
44 {
45 assert(ddict != NULL);
46 return ddict->dictContent;
47 }
48
ZSTD_DDict_dictSize(const ZSTD_DDict * ddict)49 size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
50 {
51 assert(ddict != NULL);
52 return ddict->dictSize;
53 }
54
ZSTD_copyDDictParameters(ZSTD_DCtx * dctx,const ZSTD_DDict * ddict)55 void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
56 {
57 DEBUGLOG(4, "ZSTD_copyDDictParameters");
58 assert(dctx != NULL);
59 assert(ddict != NULL);
60 dctx->dictID = ddict->dictID;
61 dctx->prefixStart = ddict->dictContent;
62 dctx->virtualStart = ddict->dictContent;
63 dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
64 dctx->previousDstEnd = dctx->dictEnd;
65 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
66 dctx->dictContentBeginForFuzzing = dctx->prefixStart;
67 dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
68 #endif
69 if (ddict->entropyPresent) {
70 dctx->litEntropy = 1;
71 dctx->fseEntropy = 1;
72 dctx->LLTptr = ddict->entropy.LLTable;
73 dctx->MLTptr = ddict->entropy.MLTable;
74 dctx->OFTptr = ddict->entropy.OFTable;
75 dctx->HUFptr = ddict->entropy.hufTable;
76 dctx->entropy.rep[0] = ddict->entropy.rep[0];
77 dctx->entropy.rep[1] = ddict->entropy.rep[1];
78 dctx->entropy.rep[2] = ddict->entropy.rep[2];
79 } else {
80 dctx->litEntropy = 0;
81 dctx->fseEntropy = 0;
82 }
83 }
84
85
86 static size_t
ZSTD_loadEntropy_intoDDict(ZSTD_DDict * ddict,ZSTD_dictContentType_e dictContentType)87 ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
88 ZSTD_dictContentType_e dictContentType)
89 {
90 ddict->dictID = 0;
91 ddict->entropyPresent = 0;
92 if (dictContentType == ZSTD_dct_rawContent) return 0;
93
94 if (ddict->dictSize < 8) {
95 if (dictContentType == ZSTD_dct_fullDict)
96 return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
97 return 0; /* pure content mode */
98 }
99 { U32 const magic = MEM_readLE32(ddict->dictContent);
100 if (magic != ZSTD_MAGIC_DICTIONARY) {
101 if (dictContentType == ZSTD_dct_fullDict)
102 return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
103 return 0; /* pure content mode */
104 }
105 }
106 ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
107
108 /* load entropy tables */
109 RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
110 &ddict->entropy, ddict->dictContent, ddict->dictSize)),
111 dictionary_corrupted, "");
112 ddict->entropyPresent = 1;
113 return 0;
114 }
115
116
ZSTD_initDDict_internal(ZSTD_DDict * ddict,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)117 static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
118 const void* dict, size_t dictSize,
119 ZSTD_dictLoadMethod_e dictLoadMethod,
120 ZSTD_dictContentType_e dictContentType)
121 {
122 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
123 ddict->dictBuffer = NULL;
124 ddict->dictContent = dict;
125 if (!dict) dictSize = 0;
126 } else {
127 void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
128 ddict->dictBuffer = internalBuffer;
129 ddict->dictContent = internalBuffer;
130 if (!internalBuffer) return ERROR(memory_allocation);
131 ZSTD_memcpy(internalBuffer, dict, dictSize);
132 }
133 ddict->dictSize = dictSize;
134 ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
135
136 /* parse dictionary content */
137 FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
138
139 return 0;
140 }
141
ZSTD_createDDict_advanced(const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_customMem customMem)142 ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
143 ZSTD_dictLoadMethod_e dictLoadMethod,
144 ZSTD_dictContentType_e dictContentType,
145 ZSTD_customMem customMem)
146 {
147 if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
148
149 { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
150 if (ddict == NULL) return NULL;
151 ddict->cMem = customMem;
152 { size_t const initResult = ZSTD_initDDict_internal(ddict,
153 dict, dictSize,
154 dictLoadMethod, dictContentType);
155 if (ZSTD_isError(initResult)) {
156 ZSTD_freeDDict(ddict);
157 return NULL;
158 } }
159 return ddict;
160 }
161 }
162
163 /*! ZSTD_createDDict() :
164 * Create a digested dictionary, to start decompression without startup delay.
165 * `dict` content is copied inside DDict.
166 * Consequently, `dict` can be released after `ZSTD_DDict` creation */
ZSTD_createDDict(const void * dict,size_t dictSize)167 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
168 {
169 ZSTD_customMem const allocator = { NULL, NULL, NULL };
170 return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
171 }
172
173 /*! ZSTD_createDDict_byReference() :
174 * Create a digested dictionary, to start decompression without startup delay.
175 * Dictionary content is simply referenced, it will be accessed during decompression.
176 * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
ZSTD_createDDict_byReference(const void * dictBuffer,size_t dictSize)177 ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
178 {
179 ZSTD_customMem const allocator = { NULL, NULL, NULL };
180 return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
181 }
182
183
ZSTD_initStaticDDict(void * sBuffer,size_t sBufferSize,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)184 const ZSTD_DDict* ZSTD_initStaticDDict(
185 void* sBuffer, size_t sBufferSize,
186 const void* dict, size_t dictSize,
187 ZSTD_dictLoadMethod_e dictLoadMethod,
188 ZSTD_dictContentType_e dictContentType)
189 {
190 size_t const neededSpace = sizeof(ZSTD_DDict)
191 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
192 ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
193 assert(sBuffer != NULL);
194 assert(dict != NULL);
195 if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
196 if (sBufferSize < neededSpace) return NULL;
197 if (dictLoadMethod == ZSTD_dlm_byCopy) {
198 ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */
199 dict = ddict+1;
200 }
201 if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
202 dict, dictSize,
203 ZSTD_dlm_byRef, dictContentType) ))
204 return NULL;
205 return ddict;
206 }
207
208
ZSTD_freeDDict(ZSTD_DDict * ddict)209 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
210 {
211 if (ddict==NULL) return 0; /* support free on NULL */
212 { ZSTD_customMem const cMem = ddict->cMem;
213 ZSTD_customFree(ddict->dictBuffer, cMem);
214 ZSTD_customFree(ddict, cMem);
215 return 0;
216 }
217 }
218
219 /*! ZSTD_estimateDDictSize() :
220 * Estimate amount of memory that will be needed to create a dictionary for decompression.
221 * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
ZSTD_estimateDDictSize(size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod)222 size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
223 {
224 return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
225 }
226
ZSTD_sizeof_DDict(const ZSTD_DDict * ddict)227 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
228 {
229 if (ddict==NULL) return 0; /* support sizeof on NULL */
230 return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
231 }
232
233 /*! ZSTD_getDictID_fromDDict() :
234 * Provides the dictID of the dictionary loaded into `ddict`.
235 * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
236 * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
ZSTD_getDictID_fromDDict(const ZSTD_DDict * ddict)237 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
238 {
239 if (ddict==NULL) return 0;
240 return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
241 }
242