1*61145dc2SMartin Matuska // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only 2c03c5b1cSMartin Matuska /* 3c03c5b1cSMartin Matuska * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 4c03c5b1cSMartin Matuska * All rights reserved. 5c03c5b1cSMartin Matuska * 6c03c5b1cSMartin Matuska * This source code is licensed under both the BSD-style license (found in the 7c03c5b1cSMartin Matuska * LICENSE file in the root directory of this source tree) and the GPLv2 (found 8c03c5b1cSMartin Matuska * in the COPYING file in the root directory of this source tree). 9c03c5b1cSMartin Matuska * You may select, at your option, one of the above-listed licenses. 10c03c5b1cSMartin Matuska */ 11c03c5b1cSMartin Matuska 12c03c5b1cSMartin Matuska #ifndef ZSTD_LDM_H 13c03c5b1cSMartin Matuska #define ZSTD_LDM_H 14c03c5b1cSMartin Matuska 15c03c5b1cSMartin Matuska #if defined (__cplusplus) 16c03c5b1cSMartin Matuska extern "C" { 17c03c5b1cSMartin Matuska #endif 18c03c5b1cSMartin Matuska 19c03c5b1cSMartin Matuska #include "zstd_compress_internal.h" /* ldmParams_t, U32 */ 20c03c5b1cSMartin Matuska #include "../zstd.h" /* ZSTD_CCtx, size_t */ 21c03c5b1cSMartin Matuska 22c03c5b1cSMartin Matuska /*-************************************* 23c03c5b1cSMartin Matuska * Long distance matching 24c03c5b1cSMartin Matuska ***************************************/ 25c03c5b1cSMartin Matuska 26c03c5b1cSMartin Matuska #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT 27c03c5b1cSMartin Matuska 28c03c5b1cSMartin Matuska void ZSTD_ldm_fillHashTable( 29c03c5b1cSMartin Matuska ldmState_t* state, const BYTE* ip, 30c03c5b1cSMartin Matuska const BYTE* iend, ldmParams_t const* params); 31c03c5b1cSMartin Matuska 32c03c5b1cSMartin Matuska /** 33c03c5b1cSMartin Matuska * ZSTD_ldm_generateSequences(): 34c03c5b1cSMartin Matuska * 35c03c5b1cSMartin Matuska * Generates the sequences using the long distance match finder. 36c03c5b1cSMartin Matuska * Generates long range matching sequences in `sequences`, which parse a prefix 37c03c5b1cSMartin Matuska * of the source. `sequences` must be large enough to store every sequence, 38c03c5b1cSMartin Matuska * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. 39c03c5b1cSMartin Matuska * @returns 0 or an error code. 40c03c5b1cSMartin Matuska * 41c03c5b1cSMartin Matuska * NOTE: The user must have called ZSTD_window_update() for all of the input 42c03c5b1cSMartin Matuska * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. 43c03c5b1cSMartin Matuska * NOTE: This function returns an error if it runs out of space to store 44c03c5b1cSMartin Matuska * sequences. 45c03c5b1cSMartin Matuska */ 46c03c5b1cSMartin Matuska size_t ZSTD_ldm_generateSequences( 47c03c5b1cSMartin Matuska ldmState_t* ldms, rawSeqStore_t* sequences, 48c03c5b1cSMartin Matuska ldmParams_t const* params, void const* src, size_t srcSize); 49c03c5b1cSMartin Matuska 50c03c5b1cSMartin Matuska /** 51c03c5b1cSMartin Matuska * ZSTD_ldm_blockCompress(): 52c03c5b1cSMartin Matuska * 53c03c5b1cSMartin Matuska * Compresses a block using the predefined sequences, along with a secondary 54c03c5b1cSMartin Matuska * block compressor. The literals section of every sequence is passed to the 55c03c5b1cSMartin Matuska * secondary block compressor, and those sequences are interspersed with the 56c03c5b1cSMartin Matuska * predefined sequences. Returns the length of the last literals. 57c03c5b1cSMartin Matuska * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. 58c03c5b1cSMartin Matuska * `rawSeqStore.seq` may also be updated to split the last sequence between two 59c03c5b1cSMartin Matuska * blocks. 60c03c5b1cSMartin Matuska * @return The length of the last literals. 61c03c5b1cSMartin Matuska * 62c03c5b1cSMartin Matuska * NOTE: The source must be at most the maximum block size, but the predefined 63c03c5b1cSMartin Matuska * sequences can be any size, and may be longer than the block. In the case that 64c03c5b1cSMartin Matuska * they are longer than the block, the last sequences may need to be split into 65c03c5b1cSMartin Matuska * two. We handle that case correctly, and update `rawSeqStore` appropriately. 66c03c5b1cSMartin Matuska * NOTE: This function does not return any errors. 67c03c5b1cSMartin Matuska */ 68c03c5b1cSMartin Matuska size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, 69c03c5b1cSMartin Matuska ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 70c03c5b1cSMartin Matuska void const* src, size_t srcSize); 71c03c5b1cSMartin Matuska 72c03c5b1cSMartin Matuska /** 73c03c5b1cSMartin Matuska * ZSTD_ldm_skipSequences(): 74c03c5b1cSMartin Matuska * 75c03c5b1cSMartin Matuska * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. 76c03c5b1cSMartin Matuska * Avoids emitting matches less than `minMatch` bytes. 77c03c5b1cSMartin Matuska * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). 78c03c5b1cSMartin Matuska */ 79c03c5b1cSMartin Matuska void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, 80c03c5b1cSMartin Matuska U32 const minMatch); 81c03c5b1cSMartin Matuska 82c03c5b1cSMartin Matuska 83c03c5b1cSMartin Matuska /** ZSTD_ldm_getTableSize() : 84c03c5b1cSMartin Matuska * Estimate the space needed for long distance matching tables or 0 if LDM is 85c03c5b1cSMartin Matuska * disabled. 86c03c5b1cSMartin Matuska */ 87c03c5b1cSMartin Matuska size_t ZSTD_ldm_getTableSize(ldmParams_t params); 88c03c5b1cSMartin Matuska 89c03c5b1cSMartin Matuska /** ZSTD_ldm_getSeqSpace() : 90c03c5b1cSMartin Matuska * Return an upper bound on the number of sequences that can be produced by 91c03c5b1cSMartin Matuska * the long distance matcher, or 0 if LDM is disabled. 92c03c5b1cSMartin Matuska */ 93c03c5b1cSMartin Matuska size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); 94c03c5b1cSMartin Matuska 95c03c5b1cSMartin Matuska /** ZSTD_ldm_adjustParameters() : 96c03c5b1cSMartin Matuska * If the params->hashRateLog is not set, set it to its default value based on 97c03c5b1cSMartin Matuska * windowLog and params->hashLog. 98c03c5b1cSMartin Matuska * 99c03c5b1cSMartin Matuska * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to 100c03c5b1cSMartin Matuska * params->hashLog if it is not). 101c03c5b1cSMartin Matuska * 102c03c5b1cSMartin Matuska * Ensures that the minMatchLength >= targetLength during optimal parsing. 103c03c5b1cSMartin Matuska */ 104c03c5b1cSMartin Matuska void ZSTD_ldm_adjustParameters(ldmParams_t* params, 105c03c5b1cSMartin Matuska ZSTD_compressionParameters const* cParams); 106c03c5b1cSMartin Matuska 107c03c5b1cSMartin Matuska #if defined (__cplusplus) 108c03c5b1cSMartin Matuska } 109c03c5b1cSMartin Matuska #endif 110c03c5b1cSMartin Matuska 111c03c5b1cSMartin Matuska #endif /* ZSTD_FAST_H */ 112