xref: /freebsd/sys/contrib/openzfs/module/zstd/lib/decompress/zstd_ddict.c (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1*61145dc2SMartin Matuska // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
2c03c5b1cSMartin Matuska /*
3c03c5b1cSMartin Matuska  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
4c03c5b1cSMartin Matuska  * All rights reserved.
5c03c5b1cSMartin Matuska  *
6c03c5b1cSMartin Matuska  * This source code is licensed under both the BSD-style license (found in the
7c03c5b1cSMartin Matuska  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8c03c5b1cSMartin Matuska  * in the COPYING file in the root directory of this source tree).
9c03c5b1cSMartin Matuska  * You may select, at your option, one of the above-listed licenses.
10c03c5b1cSMartin Matuska  */
11c03c5b1cSMartin Matuska 
12c03c5b1cSMartin Matuska /* zstd_ddict.c :
13c03c5b1cSMartin Matuska  * concentrates all logic that needs to know the internals of ZSTD_DDict object */
14c03c5b1cSMartin Matuska 
15c03c5b1cSMartin Matuska /*-*******************************************************
16c03c5b1cSMartin Matuska *  Dependencies
17c03c5b1cSMartin Matuska *********************************************************/
18c03c5b1cSMartin Matuska #include <string.h>      /* memcpy, memmove, memset */
19c03c5b1cSMartin Matuska #include "../common/cpu.h"         /* bmi2 */
20c03c5b1cSMartin Matuska #include "../common/mem.h"         /* low level memory routines */
21c03c5b1cSMartin Matuska #define FSE_STATIC_LINKING_ONLY
22c03c5b1cSMartin Matuska #include "../common/fse.h"
23c03c5b1cSMartin Matuska #define HUF_STATIC_LINKING_ONLY
24c03c5b1cSMartin Matuska #include "../common/huf.h"
25c03c5b1cSMartin Matuska #include "zstd_decompress_internal.h"
26c03c5b1cSMartin Matuska #include "zstd_ddict.h"
27c03c5b1cSMartin Matuska 
28c03c5b1cSMartin Matuska #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
29c03c5b1cSMartin Matuska #  include "../legacy/zstd_legacy.h"
30c03c5b1cSMartin Matuska #endif
31c03c5b1cSMartin Matuska 
32c03c5b1cSMartin Matuska 
33c03c5b1cSMartin Matuska 
34c03c5b1cSMartin Matuska /*-*******************************************************
35c03c5b1cSMartin Matuska *  Types
36c03c5b1cSMartin Matuska *********************************************************/
37c03c5b1cSMartin Matuska struct ZSTD_DDict_s {
38c03c5b1cSMartin Matuska     void* dictBuffer;
39c03c5b1cSMartin Matuska     const void* dictContent;
40c03c5b1cSMartin Matuska     size_t dictSize;
41c03c5b1cSMartin Matuska     ZSTD_entropyDTables_t entropy;
42c03c5b1cSMartin Matuska     U32 dictID;
43c03c5b1cSMartin Matuska     U32 entropyPresent;
44c03c5b1cSMartin Matuska     ZSTD_customMem cMem;
45c03c5b1cSMartin Matuska };  /* typedef'd to ZSTD_DDict within "zstd.h" */
46c03c5b1cSMartin Matuska 
ZSTD_DDict_dictContent(const ZSTD_DDict * ddict)47c03c5b1cSMartin Matuska const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
48c03c5b1cSMartin Matuska {
49c03c5b1cSMartin Matuska     assert(ddict != NULL);
50c03c5b1cSMartin Matuska     return ddict->dictContent;
51c03c5b1cSMartin Matuska }
52c03c5b1cSMartin Matuska 
ZSTD_DDict_dictSize(const ZSTD_DDict * ddict)53c03c5b1cSMartin Matuska size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
54c03c5b1cSMartin Matuska {
55c03c5b1cSMartin Matuska     assert(ddict != NULL);
56c03c5b1cSMartin Matuska     return ddict->dictSize;
57c03c5b1cSMartin Matuska }
58c03c5b1cSMartin Matuska 
ZSTD_copyDDictParameters(ZSTD_DCtx * dctx,const ZSTD_DDict * ddict)59c03c5b1cSMartin Matuska void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
60c03c5b1cSMartin Matuska {
61c03c5b1cSMartin Matuska     DEBUGLOG(4, "ZSTD_copyDDictParameters");
62c03c5b1cSMartin Matuska     assert(dctx != NULL);
63c03c5b1cSMartin Matuska     assert(ddict != NULL);
64c03c5b1cSMartin Matuska     dctx->dictID = ddict->dictID;
65c03c5b1cSMartin Matuska     dctx->prefixStart = ddict->dictContent;
66c03c5b1cSMartin Matuska     dctx->virtualStart = ddict->dictContent;
67c03c5b1cSMartin Matuska     dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
68c03c5b1cSMartin Matuska     dctx->previousDstEnd = dctx->dictEnd;
69c03c5b1cSMartin Matuska #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
70c03c5b1cSMartin Matuska     dctx->dictContentBeginForFuzzing = dctx->prefixStart;
71c03c5b1cSMartin Matuska     dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
72c03c5b1cSMartin Matuska #endif
73c03c5b1cSMartin Matuska     if (ddict->entropyPresent) {
74c03c5b1cSMartin Matuska         dctx->litEntropy = 1;
75c03c5b1cSMartin Matuska         dctx->fseEntropy = 1;
76c03c5b1cSMartin Matuska         dctx->LLTptr = ddict->entropy.LLTable;
77c03c5b1cSMartin Matuska         dctx->MLTptr = ddict->entropy.MLTable;
78c03c5b1cSMartin Matuska         dctx->OFTptr = ddict->entropy.OFTable;
79c03c5b1cSMartin Matuska         dctx->HUFptr = ddict->entropy.hufTable;
80c03c5b1cSMartin Matuska         dctx->entropy.rep[0] = ddict->entropy.rep[0];
81c03c5b1cSMartin Matuska         dctx->entropy.rep[1] = ddict->entropy.rep[1];
82c03c5b1cSMartin Matuska         dctx->entropy.rep[2] = ddict->entropy.rep[2];
83c03c5b1cSMartin Matuska     } else {
84c03c5b1cSMartin Matuska         dctx->litEntropy = 0;
85c03c5b1cSMartin Matuska         dctx->fseEntropy = 0;
86c03c5b1cSMartin Matuska     }
87c03c5b1cSMartin Matuska }
88c03c5b1cSMartin Matuska 
89c03c5b1cSMartin Matuska 
90c03c5b1cSMartin Matuska static size_t
ZSTD_loadEntropy_intoDDict(ZSTD_DDict * ddict,ZSTD_dictContentType_e dictContentType)91c03c5b1cSMartin Matuska ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
92c03c5b1cSMartin Matuska                            ZSTD_dictContentType_e dictContentType)
93c03c5b1cSMartin Matuska {
94c03c5b1cSMartin Matuska     ddict->dictID = 0;
95c03c5b1cSMartin Matuska     ddict->entropyPresent = 0;
96c03c5b1cSMartin Matuska     if (dictContentType == ZSTD_dct_rawContent) return 0;
97c03c5b1cSMartin Matuska 
98c03c5b1cSMartin Matuska     if (ddict->dictSize < 8) {
99c03c5b1cSMartin Matuska         if (dictContentType == ZSTD_dct_fullDict)
100c03c5b1cSMartin Matuska             return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
101c03c5b1cSMartin Matuska         return 0;   /* pure content mode */
102c03c5b1cSMartin Matuska     }
103c03c5b1cSMartin Matuska     {   U32 const magic = MEM_readLE32(ddict->dictContent);
104c03c5b1cSMartin Matuska         if (magic != ZSTD_MAGIC_DICTIONARY) {
105c03c5b1cSMartin Matuska             if (dictContentType == ZSTD_dct_fullDict)
106c03c5b1cSMartin Matuska                 return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
107c03c5b1cSMartin Matuska             return 0;   /* pure content mode */
108c03c5b1cSMartin Matuska         }
109c03c5b1cSMartin Matuska     }
110c03c5b1cSMartin Matuska     ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
111c03c5b1cSMartin Matuska 
112c03c5b1cSMartin Matuska     /* load entropy tables */
113c03c5b1cSMartin Matuska     RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
114c03c5b1cSMartin Matuska             &ddict->entropy, ddict->dictContent, ddict->dictSize)),
115c03c5b1cSMartin Matuska         dictionary_corrupted, "");
116c03c5b1cSMartin Matuska     ddict->entropyPresent = 1;
117c03c5b1cSMartin Matuska     return 0;
118c03c5b1cSMartin Matuska }
119c03c5b1cSMartin Matuska 
120c03c5b1cSMartin Matuska 
ZSTD_initDDict_internal(ZSTD_DDict * ddict,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)121c03c5b1cSMartin Matuska static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
122c03c5b1cSMartin Matuska                                       const void* dict, size_t dictSize,
123c03c5b1cSMartin Matuska                                       ZSTD_dictLoadMethod_e dictLoadMethod,
124c03c5b1cSMartin Matuska                                       ZSTD_dictContentType_e dictContentType)
125c03c5b1cSMartin Matuska {
126c03c5b1cSMartin Matuska     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
127c03c5b1cSMartin Matuska         ddict->dictBuffer = NULL;
128c03c5b1cSMartin Matuska         ddict->dictContent = dict;
129c03c5b1cSMartin Matuska         if (!dict) dictSize = 0;
130c03c5b1cSMartin Matuska     } else {
131c03c5b1cSMartin Matuska         void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
132c03c5b1cSMartin Matuska         ddict->dictBuffer = internalBuffer;
133c03c5b1cSMartin Matuska         ddict->dictContent = internalBuffer;
134c03c5b1cSMartin Matuska         if (!internalBuffer) return ERROR(memory_allocation);
135c03c5b1cSMartin Matuska         memcpy(internalBuffer, dict, dictSize);
136c03c5b1cSMartin Matuska     }
137c03c5b1cSMartin Matuska     ddict->dictSize = dictSize;
138c03c5b1cSMartin Matuska     ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
139c03c5b1cSMartin Matuska 
140c03c5b1cSMartin Matuska     /* parse dictionary content */
141c03c5b1cSMartin Matuska     FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
142c03c5b1cSMartin Matuska 
143c03c5b1cSMartin Matuska     return 0;
144c03c5b1cSMartin Matuska }
145c03c5b1cSMartin Matuska 
ZSTD_createDDict_advanced(const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_customMem customMem)146c03c5b1cSMartin Matuska ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
147c03c5b1cSMartin Matuska                                       ZSTD_dictLoadMethod_e dictLoadMethod,
148c03c5b1cSMartin Matuska                                       ZSTD_dictContentType_e dictContentType,
149c03c5b1cSMartin Matuska                                       ZSTD_customMem customMem)
150c03c5b1cSMartin Matuska {
151c03c5b1cSMartin Matuska     if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
152c03c5b1cSMartin Matuska 
153c03c5b1cSMartin Matuska     {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
154c03c5b1cSMartin Matuska         if (ddict == NULL) return NULL;
155c03c5b1cSMartin Matuska         ddict->cMem = customMem;
156c03c5b1cSMartin Matuska         {   size_t const initResult = ZSTD_initDDict_internal(ddict,
157c03c5b1cSMartin Matuska                                             dict, dictSize,
158c03c5b1cSMartin Matuska                                             dictLoadMethod, dictContentType);
159c03c5b1cSMartin Matuska             if (ZSTD_isError(initResult)) {
160c03c5b1cSMartin Matuska                 ZSTD_freeDDict(ddict);
161c03c5b1cSMartin Matuska                 return NULL;
162c03c5b1cSMartin Matuska         }   }
163c03c5b1cSMartin Matuska         return ddict;
164c03c5b1cSMartin Matuska     }
165c03c5b1cSMartin Matuska }
166c03c5b1cSMartin Matuska 
167c03c5b1cSMartin Matuska /*! ZSTD_createDDict() :
168c03c5b1cSMartin Matuska *   Create a digested dictionary, to start decompression without startup delay.
169c03c5b1cSMartin Matuska *   `dict` content is copied inside DDict.
170c03c5b1cSMartin Matuska *   Consequently, `dict` can be released after `ZSTD_DDict` creation */
ZSTD_createDDict(const void * dict,size_t dictSize)171c03c5b1cSMartin Matuska ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
172c03c5b1cSMartin Matuska {
173c03c5b1cSMartin Matuska     ZSTD_customMem const allocator = { NULL, NULL, NULL };
174c03c5b1cSMartin Matuska     return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
175c03c5b1cSMartin Matuska }
176c03c5b1cSMartin Matuska 
177c03c5b1cSMartin Matuska /*! ZSTD_createDDict_byReference() :
178c03c5b1cSMartin Matuska  *  Create a digested dictionary, to start decompression without startup delay.
179c03c5b1cSMartin Matuska  *  Dictionary content is simply referenced, it will be accessed during decompression.
180c03c5b1cSMartin Matuska  *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
ZSTD_createDDict_byReference(const void * dictBuffer,size_t dictSize)181c03c5b1cSMartin Matuska ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
182c03c5b1cSMartin Matuska {
183c03c5b1cSMartin Matuska     ZSTD_customMem const allocator = { NULL, NULL, NULL };
184c03c5b1cSMartin Matuska     return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
185c03c5b1cSMartin Matuska }
186c03c5b1cSMartin Matuska 
187c03c5b1cSMartin Matuska 
ZSTD_initStaticDDict(void * sBuffer,size_t sBufferSize,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)188c03c5b1cSMartin Matuska const ZSTD_DDict* ZSTD_initStaticDDict(
189c03c5b1cSMartin Matuska                                 void* sBuffer, size_t sBufferSize,
190c03c5b1cSMartin Matuska                                 const void* dict, size_t dictSize,
191c03c5b1cSMartin Matuska                                 ZSTD_dictLoadMethod_e dictLoadMethod,
192c03c5b1cSMartin Matuska                                 ZSTD_dictContentType_e dictContentType)
193c03c5b1cSMartin Matuska {
194c03c5b1cSMartin Matuska     size_t const neededSpace = sizeof(ZSTD_DDict)
195c03c5b1cSMartin Matuska                              + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
196c03c5b1cSMartin Matuska     ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
197c03c5b1cSMartin Matuska     assert(sBuffer != NULL);
198c03c5b1cSMartin Matuska     assert(dict != NULL);
199c03c5b1cSMartin Matuska     if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
200c03c5b1cSMartin Matuska     if (sBufferSize < neededSpace) return NULL;
201c03c5b1cSMartin Matuska     if (dictLoadMethod == ZSTD_dlm_byCopy) {
202c03c5b1cSMartin Matuska         memcpy(ddict+1, dict, dictSize);  /* local copy */
203c03c5b1cSMartin Matuska         dict = ddict+1;
204c03c5b1cSMartin Matuska     }
205c03c5b1cSMartin Matuska     if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
206c03c5b1cSMartin Matuska                                               dict, dictSize,
207c03c5b1cSMartin Matuska                                               ZSTD_dlm_byRef, dictContentType) ))
208c03c5b1cSMartin Matuska         return NULL;
209c03c5b1cSMartin Matuska     return ddict;
210c03c5b1cSMartin Matuska }
211c03c5b1cSMartin Matuska 
212c03c5b1cSMartin Matuska 
ZSTD_freeDDict(ZSTD_DDict * ddict)213c03c5b1cSMartin Matuska size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
214c03c5b1cSMartin Matuska {
215c03c5b1cSMartin Matuska     if (ddict==NULL) return 0;   /* support free on NULL */
216c03c5b1cSMartin Matuska     {   ZSTD_customMem const cMem = ddict->cMem;
217c03c5b1cSMartin Matuska         ZSTD_free(ddict->dictBuffer, cMem);
218c03c5b1cSMartin Matuska         ZSTD_free(ddict, cMem);
219c03c5b1cSMartin Matuska         return 0;
220c03c5b1cSMartin Matuska     }
221c03c5b1cSMartin Matuska }
222c03c5b1cSMartin Matuska 
223c03c5b1cSMartin Matuska /*! ZSTD_estimateDDictSize() :
224c03c5b1cSMartin Matuska  *  Estimate amount of memory that will be needed to create a dictionary for decompression.
225c03c5b1cSMartin Matuska  *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
ZSTD_estimateDDictSize(size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod)226c03c5b1cSMartin Matuska size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
227c03c5b1cSMartin Matuska {
228c03c5b1cSMartin Matuska     return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
229c03c5b1cSMartin Matuska }
230c03c5b1cSMartin Matuska 
ZSTD_sizeof_DDict(const ZSTD_DDict * ddict)231c03c5b1cSMartin Matuska size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
232c03c5b1cSMartin Matuska {
233c03c5b1cSMartin Matuska     if (ddict==NULL) return 0;   /* support sizeof on NULL */
234c03c5b1cSMartin Matuska     return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
235c03c5b1cSMartin Matuska }
236c03c5b1cSMartin Matuska 
237c03c5b1cSMartin Matuska /*! ZSTD_getDictID_fromDDict() :
238c03c5b1cSMartin Matuska  *  Provides the dictID of the dictionary loaded into `ddict`.
239c03c5b1cSMartin Matuska  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
240c03c5b1cSMartin Matuska  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
ZSTD_getDictID_fromDDict(const ZSTD_DDict * ddict)241c03c5b1cSMartin Matuska unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
242c03c5b1cSMartin Matuska {
243c03c5b1cSMartin Matuska     if (ddict==NULL) return 0;
244c03c5b1cSMartin Matuska     return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
245c03c5b1cSMartin Matuska }
246