1a0483764SConrad Meyer /* 2*37f1f268SConrad Meyer * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3a0483764SConrad Meyer * All rights reserved. 4a0483764SConrad Meyer * 5a0483764SConrad Meyer * This source code is licensed under both the BSD-style license (found in the 6a0483764SConrad Meyer * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7a0483764SConrad Meyer * in the COPYING file in the root directory of this source tree). 8a0483764SConrad Meyer * You may select, at your option, one of the above-listed licenses. 9a0483764SConrad Meyer */ 10a0483764SConrad Meyer 11a0483764SConrad Meyer /* zstd_ddict.c : 12a0483764SConrad Meyer * concentrates all logic that needs to know the internals of ZSTD_DDict object */ 13a0483764SConrad Meyer 14a0483764SConrad Meyer /*-******************************************************* 15a0483764SConrad Meyer * Dependencies 16a0483764SConrad Meyer *********************************************************/ 17a0483764SConrad Meyer #include <string.h> /* memcpy, memmove, memset */ 18*37f1f268SConrad Meyer #include "../common/cpu.h" /* bmi2 */ 19*37f1f268SConrad Meyer #include "../common/mem.h" /* low level memory routines */ 20a0483764SConrad Meyer #define FSE_STATIC_LINKING_ONLY 21*37f1f268SConrad Meyer #include "../common/fse.h" 22a0483764SConrad Meyer #define HUF_STATIC_LINKING_ONLY 23*37f1f268SConrad Meyer #include "../common/huf.h" 24a0483764SConrad Meyer #include "zstd_decompress_internal.h" 25a0483764SConrad Meyer #include "zstd_ddict.h" 26a0483764SConrad Meyer 27a0483764SConrad Meyer #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) 28*37f1f268SConrad Meyer # include "../legacy/zstd_legacy.h" 29a0483764SConrad Meyer #endif 30a0483764SConrad Meyer 31a0483764SConrad Meyer 32a0483764SConrad Meyer 33a0483764SConrad Meyer /*-******************************************************* 34a0483764SConrad Meyer * Types 35a0483764SConrad Meyer *********************************************************/ 36a0483764SConrad Meyer struct ZSTD_DDict_s { 37a0483764SConrad Meyer void* dictBuffer; 38a0483764SConrad Meyer const void* dictContent; 39a0483764SConrad Meyer size_t dictSize; 40a0483764SConrad Meyer ZSTD_entropyDTables_t entropy; 41a0483764SConrad Meyer U32 dictID; 42a0483764SConrad Meyer U32 entropyPresent; 43a0483764SConrad Meyer ZSTD_customMem cMem; 44a0483764SConrad Meyer }; /* typedef'd to ZSTD_DDict within "zstd.h" */ 45a0483764SConrad Meyer 46a0483764SConrad Meyer const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) 47a0483764SConrad Meyer { 48a0483764SConrad Meyer assert(ddict != NULL); 49a0483764SConrad Meyer return ddict->dictContent; 50a0483764SConrad Meyer } 51a0483764SConrad Meyer 52a0483764SConrad Meyer size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) 53a0483764SConrad Meyer { 54a0483764SConrad Meyer assert(ddict != NULL); 55a0483764SConrad Meyer return ddict->dictSize; 56a0483764SConrad Meyer } 57a0483764SConrad Meyer 58a0483764SConrad Meyer void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) 59a0483764SConrad Meyer { 60a0483764SConrad Meyer DEBUGLOG(4, "ZSTD_copyDDictParameters"); 61a0483764SConrad Meyer assert(dctx != NULL); 62a0483764SConrad Meyer assert(ddict != NULL); 63a0483764SConrad Meyer dctx->dictID = ddict->dictID; 64a0483764SConrad Meyer dctx->prefixStart = ddict->dictContent; 65a0483764SConrad Meyer dctx->virtualStart = ddict->dictContent; 66a0483764SConrad Meyer dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; 67a0483764SConrad Meyer dctx->previousDstEnd = dctx->dictEnd; 68*37f1f268SConrad Meyer #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 69*37f1f268SConrad Meyer dctx->dictContentBeginForFuzzing = dctx->prefixStart; 70*37f1f268SConrad Meyer dctx->dictContentEndForFuzzing = dctx->previousDstEnd; 71*37f1f268SConrad Meyer #endif 72a0483764SConrad Meyer if (ddict->entropyPresent) { 73a0483764SConrad Meyer dctx->litEntropy = 1; 74a0483764SConrad Meyer dctx->fseEntropy = 1; 75a0483764SConrad Meyer dctx->LLTptr = ddict->entropy.LLTable; 76a0483764SConrad Meyer dctx->MLTptr = ddict->entropy.MLTable; 77a0483764SConrad Meyer dctx->OFTptr = ddict->entropy.OFTable; 78a0483764SConrad Meyer dctx->HUFptr = ddict->entropy.hufTable; 79a0483764SConrad Meyer dctx->entropy.rep[0] = ddict->entropy.rep[0]; 80a0483764SConrad Meyer dctx->entropy.rep[1] = ddict->entropy.rep[1]; 81a0483764SConrad Meyer dctx->entropy.rep[2] = ddict->entropy.rep[2]; 82a0483764SConrad Meyer } else { 83a0483764SConrad Meyer dctx->litEntropy = 0; 84a0483764SConrad Meyer dctx->fseEntropy = 0; 85a0483764SConrad Meyer } 86a0483764SConrad Meyer } 87a0483764SConrad Meyer 88a0483764SConrad Meyer 89a0483764SConrad Meyer static size_t 90a0483764SConrad Meyer ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, 91a0483764SConrad Meyer ZSTD_dictContentType_e dictContentType) 92a0483764SConrad Meyer { 93a0483764SConrad Meyer ddict->dictID = 0; 94a0483764SConrad Meyer ddict->entropyPresent = 0; 95a0483764SConrad Meyer if (dictContentType == ZSTD_dct_rawContent) return 0; 96a0483764SConrad Meyer 97a0483764SConrad Meyer if (ddict->dictSize < 8) { 98a0483764SConrad Meyer if (dictContentType == ZSTD_dct_fullDict) 99a0483764SConrad Meyer return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ 100a0483764SConrad Meyer return 0; /* pure content mode */ 101a0483764SConrad Meyer } 102a0483764SConrad Meyer { U32 const magic = MEM_readLE32(ddict->dictContent); 103a0483764SConrad Meyer if (magic != ZSTD_MAGIC_DICTIONARY) { 104a0483764SConrad Meyer if (dictContentType == ZSTD_dct_fullDict) 105a0483764SConrad Meyer return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ 106a0483764SConrad Meyer return 0; /* pure content mode */ 107a0483764SConrad Meyer } 108a0483764SConrad Meyer } 109a0483764SConrad Meyer ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); 110a0483764SConrad Meyer 111a0483764SConrad Meyer /* load entropy tables */ 1122b9c00cbSConrad Meyer RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( 1132b9c00cbSConrad Meyer &ddict->entropy, ddict->dictContent, ddict->dictSize)), 114*37f1f268SConrad Meyer dictionary_corrupted, ""); 115a0483764SConrad Meyer ddict->entropyPresent = 1; 116a0483764SConrad Meyer return 0; 117a0483764SConrad Meyer } 118a0483764SConrad Meyer 119a0483764SConrad Meyer 120a0483764SConrad Meyer static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, 121a0483764SConrad Meyer const void* dict, size_t dictSize, 122a0483764SConrad Meyer ZSTD_dictLoadMethod_e dictLoadMethod, 123a0483764SConrad Meyer ZSTD_dictContentType_e dictContentType) 124a0483764SConrad Meyer { 125a0483764SConrad Meyer if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { 126a0483764SConrad Meyer ddict->dictBuffer = NULL; 127a0483764SConrad Meyer ddict->dictContent = dict; 128a0483764SConrad Meyer if (!dict) dictSize = 0; 129a0483764SConrad Meyer } else { 130a0483764SConrad Meyer void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem); 131a0483764SConrad Meyer ddict->dictBuffer = internalBuffer; 132a0483764SConrad Meyer ddict->dictContent = internalBuffer; 133a0483764SConrad Meyer if (!internalBuffer) return ERROR(memory_allocation); 134a0483764SConrad Meyer memcpy(internalBuffer, dict, dictSize); 135a0483764SConrad Meyer } 136a0483764SConrad Meyer ddict->dictSize = dictSize; 137a0483764SConrad Meyer ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ 138a0483764SConrad Meyer 139a0483764SConrad Meyer /* parse dictionary content */ 140*37f1f268SConrad Meyer FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); 141a0483764SConrad Meyer 142a0483764SConrad Meyer return 0; 143a0483764SConrad Meyer } 144a0483764SConrad Meyer 145a0483764SConrad Meyer ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, 146a0483764SConrad Meyer ZSTD_dictLoadMethod_e dictLoadMethod, 147a0483764SConrad Meyer ZSTD_dictContentType_e dictContentType, 148a0483764SConrad Meyer ZSTD_customMem customMem) 149a0483764SConrad Meyer { 150a0483764SConrad Meyer if (!customMem.customAlloc ^ !customMem.customFree) return NULL; 151a0483764SConrad Meyer 152a0483764SConrad Meyer { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); 153a0483764SConrad Meyer if (ddict == NULL) return NULL; 154a0483764SConrad Meyer ddict->cMem = customMem; 155a0483764SConrad Meyer { size_t const initResult = ZSTD_initDDict_internal(ddict, 156a0483764SConrad Meyer dict, dictSize, 157a0483764SConrad Meyer dictLoadMethod, dictContentType); 158a0483764SConrad Meyer if (ZSTD_isError(initResult)) { 159a0483764SConrad Meyer ZSTD_freeDDict(ddict); 160a0483764SConrad Meyer return NULL; 161a0483764SConrad Meyer } } 162a0483764SConrad Meyer return ddict; 163a0483764SConrad Meyer } 164a0483764SConrad Meyer } 165a0483764SConrad Meyer 166a0483764SConrad Meyer /*! ZSTD_createDDict() : 167a0483764SConrad Meyer * Create a digested dictionary, to start decompression without startup delay. 168a0483764SConrad Meyer * `dict` content is copied inside DDict. 169a0483764SConrad Meyer * Consequently, `dict` can be released after `ZSTD_DDict` creation */ 170a0483764SConrad Meyer ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) 171a0483764SConrad Meyer { 172a0483764SConrad Meyer ZSTD_customMem const allocator = { NULL, NULL, NULL }; 173a0483764SConrad Meyer return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); 174a0483764SConrad Meyer } 175a0483764SConrad Meyer 176a0483764SConrad Meyer /*! ZSTD_createDDict_byReference() : 177a0483764SConrad Meyer * Create a digested dictionary, to start decompression without startup delay. 178a0483764SConrad Meyer * Dictionary content is simply referenced, it will be accessed during decompression. 179a0483764SConrad Meyer * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ 180a0483764SConrad Meyer ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) 181a0483764SConrad Meyer { 182a0483764SConrad Meyer ZSTD_customMem const allocator = { NULL, NULL, NULL }; 183a0483764SConrad Meyer return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); 184a0483764SConrad Meyer } 185a0483764SConrad Meyer 186a0483764SConrad Meyer 187a0483764SConrad Meyer const ZSTD_DDict* ZSTD_initStaticDDict( 188a0483764SConrad Meyer void* sBuffer, size_t sBufferSize, 189a0483764SConrad Meyer const void* dict, size_t dictSize, 190a0483764SConrad Meyer ZSTD_dictLoadMethod_e dictLoadMethod, 191a0483764SConrad Meyer ZSTD_dictContentType_e dictContentType) 192a0483764SConrad Meyer { 193a0483764SConrad Meyer size_t const neededSpace = sizeof(ZSTD_DDict) 194a0483764SConrad Meyer + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); 195a0483764SConrad Meyer ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; 196a0483764SConrad Meyer assert(sBuffer != NULL); 197a0483764SConrad Meyer assert(dict != NULL); 198a0483764SConrad Meyer if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ 199a0483764SConrad Meyer if (sBufferSize < neededSpace) return NULL; 200a0483764SConrad Meyer if (dictLoadMethod == ZSTD_dlm_byCopy) { 201a0483764SConrad Meyer memcpy(ddict+1, dict, dictSize); /* local copy */ 202a0483764SConrad Meyer dict = ddict+1; 203a0483764SConrad Meyer } 204a0483764SConrad Meyer if (ZSTD_isError( ZSTD_initDDict_internal(ddict, 205a0483764SConrad Meyer dict, dictSize, 206a0483764SConrad Meyer ZSTD_dlm_byRef, dictContentType) )) 207a0483764SConrad Meyer return NULL; 208a0483764SConrad Meyer return ddict; 209a0483764SConrad Meyer } 210a0483764SConrad Meyer 211a0483764SConrad Meyer 212a0483764SConrad Meyer size_t ZSTD_freeDDict(ZSTD_DDict* ddict) 213a0483764SConrad Meyer { 214a0483764SConrad Meyer if (ddict==NULL) return 0; /* support free on NULL */ 215a0483764SConrad Meyer { ZSTD_customMem const cMem = ddict->cMem; 216a0483764SConrad Meyer ZSTD_free(ddict->dictBuffer, cMem); 217a0483764SConrad Meyer ZSTD_free(ddict, cMem); 218a0483764SConrad Meyer return 0; 219a0483764SConrad Meyer } 220a0483764SConrad Meyer } 221a0483764SConrad Meyer 222a0483764SConrad Meyer /*! ZSTD_estimateDDictSize() : 223a0483764SConrad Meyer * Estimate amount of memory that will be needed to create a dictionary for decompression. 224a0483764SConrad Meyer * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ 225a0483764SConrad Meyer size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) 226a0483764SConrad Meyer { 227a0483764SConrad Meyer return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); 228a0483764SConrad Meyer } 229a0483764SConrad Meyer 230a0483764SConrad Meyer size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) 231a0483764SConrad Meyer { 232a0483764SConrad Meyer if (ddict==NULL) return 0; /* support sizeof on NULL */ 233a0483764SConrad Meyer return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; 234a0483764SConrad Meyer } 235a0483764SConrad Meyer 236a0483764SConrad Meyer /*! ZSTD_getDictID_fromDDict() : 237a0483764SConrad Meyer * Provides the dictID of the dictionary loaded into `ddict`. 238a0483764SConrad Meyer * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. 239a0483764SConrad Meyer * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ 240a0483764SConrad Meyer unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) 241a0483764SConrad Meyer { 242a0483764SConrad Meyer if (ddict==NULL) return 0; 243a0483764SConrad Meyer return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); 244a0483764SConrad Meyer } 245