xref: /freebsd/sys/contrib/zstd/lib/decompress/zstd_ddict.c (revision 2b9c00cb6bd9392645dc8afca59cf57c42df4e2d)
1a0483764SConrad Meyer /*
2a0483764SConrad Meyer  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3a0483764SConrad Meyer  * All rights reserved.
4a0483764SConrad Meyer  *
5a0483764SConrad Meyer  * This source code is licensed under both the BSD-style license (found in the
6a0483764SConrad Meyer  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7a0483764SConrad Meyer  * in the COPYING file in the root directory of this source tree).
8a0483764SConrad Meyer  * You may select, at your option, one of the above-listed licenses.
9a0483764SConrad Meyer  */
10a0483764SConrad Meyer 
11a0483764SConrad Meyer /* zstd_ddict.c :
12a0483764SConrad Meyer  * concentrates all logic that needs to know the internals of ZSTD_DDict object */
13a0483764SConrad Meyer 
14a0483764SConrad Meyer /*-*******************************************************
15a0483764SConrad Meyer *  Dependencies
16a0483764SConrad Meyer *********************************************************/
17a0483764SConrad Meyer #include <string.h>      /* memcpy, memmove, memset */
18a0483764SConrad Meyer #include "cpu.h"         /* bmi2 */
19a0483764SConrad Meyer #include "mem.h"         /* low level memory routines */
20a0483764SConrad Meyer #define FSE_STATIC_LINKING_ONLY
21a0483764SConrad Meyer #include "fse.h"
22a0483764SConrad Meyer #define HUF_STATIC_LINKING_ONLY
23a0483764SConrad Meyer #include "huf.h"
24a0483764SConrad Meyer #include "zstd_decompress_internal.h"
25a0483764SConrad Meyer #include "zstd_ddict.h"
26a0483764SConrad Meyer 
27a0483764SConrad Meyer #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
28a0483764SConrad Meyer #  include "zstd_legacy.h"
29a0483764SConrad Meyer #endif
30a0483764SConrad Meyer 
31a0483764SConrad Meyer 
32a0483764SConrad Meyer 
33a0483764SConrad Meyer /*-*******************************************************
34a0483764SConrad Meyer *  Types
35a0483764SConrad Meyer *********************************************************/
36a0483764SConrad Meyer struct ZSTD_DDict_s {
37a0483764SConrad Meyer     void* dictBuffer;
38a0483764SConrad Meyer     const void* dictContent;
39a0483764SConrad Meyer     size_t dictSize;
40a0483764SConrad Meyer     ZSTD_entropyDTables_t entropy;
41a0483764SConrad Meyer     U32 dictID;
42a0483764SConrad Meyer     U32 entropyPresent;
43a0483764SConrad Meyer     ZSTD_customMem cMem;
44a0483764SConrad Meyer };  /* typedef'd to ZSTD_DDict within "zstd.h" */
45a0483764SConrad Meyer 
46a0483764SConrad Meyer const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
47a0483764SConrad Meyer {
48a0483764SConrad Meyer     assert(ddict != NULL);
49a0483764SConrad Meyer     return ddict->dictContent;
50a0483764SConrad Meyer }
51a0483764SConrad Meyer 
52a0483764SConrad Meyer size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
53a0483764SConrad Meyer {
54a0483764SConrad Meyer     assert(ddict != NULL);
55a0483764SConrad Meyer     return ddict->dictSize;
56a0483764SConrad Meyer }
57a0483764SConrad Meyer 
58a0483764SConrad Meyer void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
59a0483764SConrad Meyer {
60a0483764SConrad Meyer     DEBUGLOG(4, "ZSTD_copyDDictParameters");
61a0483764SConrad Meyer     assert(dctx != NULL);
62a0483764SConrad Meyer     assert(ddict != NULL);
63a0483764SConrad Meyer     dctx->dictID = ddict->dictID;
64a0483764SConrad Meyer     dctx->prefixStart = ddict->dictContent;
65a0483764SConrad Meyer     dctx->virtualStart = ddict->dictContent;
66a0483764SConrad Meyer     dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
67a0483764SConrad Meyer     dctx->previousDstEnd = dctx->dictEnd;
68a0483764SConrad Meyer     if (ddict->entropyPresent) {
69a0483764SConrad Meyer         dctx->litEntropy = 1;
70a0483764SConrad Meyer         dctx->fseEntropy = 1;
71a0483764SConrad Meyer         dctx->LLTptr = ddict->entropy.LLTable;
72a0483764SConrad Meyer         dctx->MLTptr = ddict->entropy.MLTable;
73a0483764SConrad Meyer         dctx->OFTptr = ddict->entropy.OFTable;
74a0483764SConrad Meyer         dctx->HUFptr = ddict->entropy.hufTable;
75a0483764SConrad Meyer         dctx->entropy.rep[0] = ddict->entropy.rep[0];
76a0483764SConrad Meyer         dctx->entropy.rep[1] = ddict->entropy.rep[1];
77a0483764SConrad Meyer         dctx->entropy.rep[2] = ddict->entropy.rep[2];
78a0483764SConrad Meyer     } else {
79a0483764SConrad Meyer         dctx->litEntropy = 0;
80a0483764SConrad Meyer         dctx->fseEntropy = 0;
81a0483764SConrad Meyer     }
82a0483764SConrad Meyer }
83a0483764SConrad Meyer 
84a0483764SConrad Meyer 
85a0483764SConrad Meyer static size_t
86a0483764SConrad Meyer ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
87a0483764SConrad Meyer                            ZSTD_dictContentType_e dictContentType)
88a0483764SConrad Meyer {
89a0483764SConrad Meyer     ddict->dictID = 0;
90a0483764SConrad Meyer     ddict->entropyPresent = 0;
91a0483764SConrad Meyer     if (dictContentType == ZSTD_dct_rawContent) return 0;
92a0483764SConrad Meyer 
93a0483764SConrad Meyer     if (ddict->dictSize < 8) {
94a0483764SConrad Meyer         if (dictContentType == ZSTD_dct_fullDict)
95a0483764SConrad Meyer             return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
96a0483764SConrad Meyer         return 0;   /* pure content mode */
97a0483764SConrad Meyer     }
98a0483764SConrad Meyer     {   U32 const magic = MEM_readLE32(ddict->dictContent);
99a0483764SConrad Meyer         if (magic != ZSTD_MAGIC_DICTIONARY) {
100a0483764SConrad Meyer             if (dictContentType == ZSTD_dct_fullDict)
101a0483764SConrad Meyer                 return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
102a0483764SConrad Meyer             return 0;   /* pure content mode */
103a0483764SConrad Meyer         }
104a0483764SConrad Meyer     }
105a0483764SConrad Meyer     ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
106a0483764SConrad Meyer 
107a0483764SConrad Meyer     /* load entropy tables */
108*2b9c00cbSConrad Meyer     RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
109*2b9c00cbSConrad Meyer             &ddict->entropy, ddict->dictContent, ddict->dictSize)),
110a0483764SConrad Meyer         dictionary_corrupted);
111a0483764SConrad Meyer     ddict->entropyPresent = 1;
112a0483764SConrad Meyer     return 0;
113a0483764SConrad Meyer }
114a0483764SConrad Meyer 
115a0483764SConrad Meyer 
116a0483764SConrad Meyer static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
117a0483764SConrad Meyer                                       const void* dict, size_t dictSize,
118a0483764SConrad Meyer                                       ZSTD_dictLoadMethod_e dictLoadMethod,
119a0483764SConrad Meyer                                       ZSTD_dictContentType_e dictContentType)
120a0483764SConrad Meyer {
121a0483764SConrad Meyer     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
122a0483764SConrad Meyer         ddict->dictBuffer = NULL;
123a0483764SConrad Meyer         ddict->dictContent = dict;
124a0483764SConrad Meyer         if (!dict) dictSize = 0;
125a0483764SConrad Meyer     } else {
126a0483764SConrad Meyer         void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
127a0483764SConrad Meyer         ddict->dictBuffer = internalBuffer;
128a0483764SConrad Meyer         ddict->dictContent = internalBuffer;
129a0483764SConrad Meyer         if (!internalBuffer) return ERROR(memory_allocation);
130a0483764SConrad Meyer         memcpy(internalBuffer, dict, dictSize);
131a0483764SConrad Meyer     }
132a0483764SConrad Meyer     ddict->dictSize = dictSize;
133a0483764SConrad Meyer     ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
134a0483764SConrad Meyer 
135a0483764SConrad Meyer     /* parse dictionary content */
136*2b9c00cbSConrad Meyer     FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
137a0483764SConrad Meyer 
138a0483764SConrad Meyer     return 0;
139a0483764SConrad Meyer }
140a0483764SConrad Meyer 
141a0483764SConrad Meyer ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
142a0483764SConrad Meyer                                       ZSTD_dictLoadMethod_e dictLoadMethod,
143a0483764SConrad Meyer                                       ZSTD_dictContentType_e dictContentType,
144a0483764SConrad Meyer                                       ZSTD_customMem customMem)
145a0483764SConrad Meyer {
146a0483764SConrad Meyer     if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
147a0483764SConrad Meyer 
148a0483764SConrad Meyer     {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
149a0483764SConrad Meyer         if (ddict == NULL) return NULL;
150a0483764SConrad Meyer         ddict->cMem = customMem;
151a0483764SConrad Meyer         {   size_t const initResult = ZSTD_initDDict_internal(ddict,
152a0483764SConrad Meyer                                             dict, dictSize,
153a0483764SConrad Meyer                                             dictLoadMethod, dictContentType);
154a0483764SConrad Meyer             if (ZSTD_isError(initResult)) {
155a0483764SConrad Meyer                 ZSTD_freeDDict(ddict);
156a0483764SConrad Meyer                 return NULL;
157a0483764SConrad Meyer         }   }
158a0483764SConrad Meyer         return ddict;
159a0483764SConrad Meyer     }
160a0483764SConrad Meyer }
161a0483764SConrad Meyer 
162a0483764SConrad Meyer /*! ZSTD_createDDict() :
163a0483764SConrad Meyer *   Create a digested dictionary, to start decompression without startup delay.
164a0483764SConrad Meyer *   `dict` content is copied inside DDict.
165a0483764SConrad Meyer *   Consequently, `dict` can be released after `ZSTD_DDict` creation */
166a0483764SConrad Meyer ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
167a0483764SConrad Meyer {
168a0483764SConrad Meyer     ZSTD_customMem const allocator = { NULL, NULL, NULL };
169a0483764SConrad Meyer     return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
170a0483764SConrad Meyer }
171a0483764SConrad Meyer 
172a0483764SConrad Meyer /*! ZSTD_createDDict_byReference() :
173a0483764SConrad Meyer  *  Create a digested dictionary, to start decompression without startup delay.
174a0483764SConrad Meyer  *  Dictionary content is simply referenced, it will be accessed during decompression.
175a0483764SConrad Meyer  *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
176a0483764SConrad Meyer ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
177a0483764SConrad Meyer {
178a0483764SConrad Meyer     ZSTD_customMem const allocator = { NULL, NULL, NULL };
179a0483764SConrad Meyer     return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
180a0483764SConrad Meyer }
181a0483764SConrad Meyer 
182a0483764SConrad Meyer 
183a0483764SConrad Meyer const ZSTD_DDict* ZSTD_initStaticDDict(
184a0483764SConrad Meyer                                 void* sBuffer, size_t sBufferSize,
185a0483764SConrad Meyer                                 const void* dict, size_t dictSize,
186a0483764SConrad Meyer                                 ZSTD_dictLoadMethod_e dictLoadMethod,
187a0483764SConrad Meyer                                 ZSTD_dictContentType_e dictContentType)
188a0483764SConrad Meyer {
189a0483764SConrad Meyer     size_t const neededSpace = sizeof(ZSTD_DDict)
190a0483764SConrad Meyer                              + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
191a0483764SConrad Meyer     ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
192a0483764SConrad Meyer     assert(sBuffer != NULL);
193a0483764SConrad Meyer     assert(dict != NULL);
194a0483764SConrad Meyer     if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
195a0483764SConrad Meyer     if (sBufferSize < neededSpace) return NULL;
196a0483764SConrad Meyer     if (dictLoadMethod == ZSTD_dlm_byCopy) {
197a0483764SConrad Meyer         memcpy(ddict+1, dict, dictSize);  /* local copy */
198a0483764SConrad Meyer         dict = ddict+1;
199a0483764SConrad Meyer     }
200a0483764SConrad Meyer     if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
201a0483764SConrad Meyer                                               dict, dictSize,
202a0483764SConrad Meyer                                               ZSTD_dlm_byRef, dictContentType) ))
203a0483764SConrad Meyer         return NULL;
204a0483764SConrad Meyer     return ddict;
205a0483764SConrad Meyer }
206a0483764SConrad Meyer 
207a0483764SConrad Meyer 
208a0483764SConrad Meyer size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
209a0483764SConrad Meyer {
210a0483764SConrad Meyer     if (ddict==NULL) return 0;   /* support free on NULL */
211a0483764SConrad Meyer     {   ZSTD_customMem const cMem = ddict->cMem;
212a0483764SConrad Meyer         ZSTD_free(ddict->dictBuffer, cMem);
213a0483764SConrad Meyer         ZSTD_free(ddict, cMem);
214a0483764SConrad Meyer         return 0;
215a0483764SConrad Meyer     }
216a0483764SConrad Meyer }
217a0483764SConrad Meyer 
218a0483764SConrad Meyer /*! ZSTD_estimateDDictSize() :
219a0483764SConrad Meyer  *  Estimate amount of memory that will be needed to create a dictionary for decompression.
220a0483764SConrad Meyer  *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
221a0483764SConrad Meyer size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
222a0483764SConrad Meyer {
223a0483764SConrad Meyer     return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
224a0483764SConrad Meyer }
225a0483764SConrad Meyer 
226a0483764SConrad Meyer size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
227a0483764SConrad Meyer {
228a0483764SConrad Meyer     if (ddict==NULL) return 0;   /* support sizeof on NULL */
229a0483764SConrad Meyer     return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
230a0483764SConrad Meyer }
231a0483764SConrad Meyer 
232a0483764SConrad Meyer /*! ZSTD_getDictID_fromDDict() :
233a0483764SConrad Meyer  *  Provides the dictID of the dictionary loaded into `ddict`.
234a0483764SConrad Meyer  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
235a0483764SConrad Meyer  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
236a0483764SConrad Meyer unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
237a0483764SConrad Meyer {
238a0483764SConrad Meyer     if (ddict==NULL) return 0;
239a0483764SConrad Meyer     return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
240a0483764SConrad Meyer }
241