xref: /freebsd/sys/contrib/zstd/lib/compress/zstd_ldm.h (revision 19fcbaf1424b464269f1a7621fab747bb75afc36)
10c16b537SWarner Losh /*
20c16b537SWarner Losh  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
30c16b537SWarner Losh  * All rights reserved.
40c16b537SWarner Losh  *
50c16b537SWarner Losh  * This source code is licensed under both the BSD-style license (found in the
60c16b537SWarner Losh  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
70c16b537SWarner Losh  * in the COPYING file in the root directory of this source tree).
80c16b537SWarner Losh  */
90c16b537SWarner Losh 
100c16b537SWarner Losh #ifndef ZSTD_LDM_H
110c16b537SWarner Losh #define ZSTD_LDM_H
120c16b537SWarner Losh 
130c16b537SWarner Losh #if defined (__cplusplus)
140c16b537SWarner Losh extern "C" {
150c16b537SWarner Losh #endif
160c16b537SWarner Losh 
17052d3c12SConrad Meyer #include "zstd_compress_internal.h"   /* ldmParams_t, U32 */
18052d3c12SConrad Meyer #include "zstd.h"   /* ZSTD_CCtx, size_t */
19052d3c12SConrad Meyer 
200c16b537SWarner Losh /*-*************************************
210c16b537SWarner Losh *  Long distance matching
220c16b537SWarner Losh ***************************************/
230c16b537SWarner Losh 
240c16b537SWarner Losh #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_DEFAULTMAX
250c16b537SWarner Losh 
26*19fcbaf1SConrad Meyer /**
27*19fcbaf1SConrad Meyer  * ZSTD_ldm_generateSequences():
280c16b537SWarner Losh  *
29*19fcbaf1SConrad Meyer  * Generates the sequences using the long distance match finder.
30*19fcbaf1SConrad Meyer  * Generates long range matching sequences in `sequences`, which parse a prefix
31*19fcbaf1SConrad Meyer  * of the source. `sequences` must be large enough to store every sequence,
32*19fcbaf1SConrad Meyer  * which can be checked with `ZSTD_ldm_getMaxNbSeq()`.
33*19fcbaf1SConrad Meyer  * @returns 0 or an error code.
340c16b537SWarner Losh  *
35*19fcbaf1SConrad Meyer  * NOTE: The user must have called ZSTD_window_update() for all of the input
36*19fcbaf1SConrad Meyer  * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks.
37*19fcbaf1SConrad Meyer  * NOTE: This function returns an error if it runs out of space to store
38*19fcbaf1SConrad Meyer  *       sequences.
39*19fcbaf1SConrad Meyer  */
40*19fcbaf1SConrad Meyer size_t ZSTD_ldm_generateSequences(
41*19fcbaf1SConrad Meyer             ldmState_t* ldms, rawSeqStore_t* sequences,
42*19fcbaf1SConrad Meyer             ldmParams_t const* params, void const* src, size_t srcSize);
430c16b537SWarner Losh 
44*19fcbaf1SConrad Meyer /**
45*19fcbaf1SConrad Meyer  * ZSTD_ldm_blockCompress():
46*19fcbaf1SConrad Meyer  *
47*19fcbaf1SConrad Meyer  * Compresses a block using the predefined sequences, along with a secondary
48*19fcbaf1SConrad Meyer  * block compressor. The literals section of every sequence is passed to the
49*19fcbaf1SConrad Meyer  * secondary block compressor, and those sequences are interspersed with the
50*19fcbaf1SConrad Meyer  * predefined sequences. Returns the length of the last literals.
51*19fcbaf1SConrad Meyer  * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed.
52*19fcbaf1SConrad Meyer  * `rawSeqStore.seq` may also be updated to split the last sequence between two
53*19fcbaf1SConrad Meyer  * blocks.
54*19fcbaf1SConrad Meyer  * @return The length of the last literals.
55*19fcbaf1SConrad Meyer  *
56*19fcbaf1SConrad Meyer  * NOTE: The source must be at most the maximum block size, but the predefined
57*19fcbaf1SConrad Meyer  * sequences can be any size, and may be longer than the block. In the case that
58*19fcbaf1SConrad Meyer  * they are longer than the block, the last sequences may need to be split into
59*19fcbaf1SConrad Meyer  * two. We handle that case correctly, and update `rawSeqStore` appropriately.
60*19fcbaf1SConrad Meyer  * NOTE: This function does not return any errors.
61*19fcbaf1SConrad Meyer  */
62*19fcbaf1SConrad Meyer size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
63*19fcbaf1SConrad Meyer             ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
64*19fcbaf1SConrad Meyer             ZSTD_compressionParameters const* cParams,
65*19fcbaf1SConrad Meyer             void const* src, size_t srcSize,
66*19fcbaf1SConrad Meyer             int const extDict);
67*19fcbaf1SConrad Meyer 
68*19fcbaf1SConrad Meyer /**
69*19fcbaf1SConrad Meyer  * ZSTD_ldm_skipSequences():
70*19fcbaf1SConrad Meyer  *
71*19fcbaf1SConrad Meyer  * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
72*19fcbaf1SConrad Meyer  * Avoids emitting matches less than `minMatch` bytes.
73*19fcbaf1SConrad Meyer  * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
74*19fcbaf1SConrad Meyer  */
75*19fcbaf1SConrad Meyer void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
76*19fcbaf1SConrad Meyer     U32 const minMatch);
77*19fcbaf1SConrad Meyer 
780c16b537SWarner Losh 
790c16b537SWarner Losh /** ZSTD_ldm_getTableSize() :
80*19fcbaf1SConrad Meyer  *  Estimate the space needed for long distance matching tables or 0 if LDM is
81*19fcbaf1SConrad Meyer  *  disabled.
82*19fcbaf1SConrad Meyer  */
83*19fcbaf1SConrad Meyer size_t ZSTD_ldm_getTableSize(ldmParams_t params);
84*19fcbaf1SConrad Meyer 
85*19fcbaf1SConrad Meyer /** ZSTD_ldm_getSeqSpace() :
86*19fcbaf1SConrad Meyer  *  Return an upper bound on the number of sequences that can be produced by
87*19fcbaf1SConrad Meyer  *  the long distance matcher, or 0 if LDM is disabled.
88*19fcbaf1SConrad Meyer  */
89*19fcbaf1SConrad Meyer size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
900c16b537SWarner Losh 
910c16b537SWarner Losh /** ZSTD_ldm_getTableSize() :
920c16b537SWarner Losh  *  Return prime8bytes^(minMatchLength-1) */
930c16b537SWarner Losh U64 ZSTD_ldm_getHashPower(U32 minMatchLength);
940c16b537SWarner Losh 
950c16b537SWarner Losh /** ZSTD_ldm_adjustParameters() :
960c16b537SWarner Losh  *  If the params->hashEveryLog is not set, set it to its default value based on
970c16b537SWarner Losh  *  windowLog and params->hashLog.
980c16b537SWarner Losh  *
990c16b537SWarner Losh  *  Ensures that params->bucketSizeLog is <= params->hashLog (setting it to
100*19fcbaf1SConrad Meyer  *  params->hashLog if it is not).
101*19fcbaf1SConrad Meyer  *
102*19fcbaf1SConrad Meyer  *  Ensures that the minMatchLength >= targetLength during optimal parsing.
103*19fcbaf1SConrad Meyer  */
104*19fcbaf1SConrad Meyer void ZSTD_ldm_adjustParameters(ldmParams_t* params,
105*19fcbaf1SConrad Meyer                                ZSTD_compressionParameters const* cParams);
1060c16b537SWarner Losh 
1070c16b537SWarner Losh #if defined (__cplusplus)
1080c16b537SWarner Losh }
1090c16b537SWarner Losh #endif
1100c16b537SWarner Losh 
1110c16b537SWarner Losh #endif /* ZSTD_FAST_H */
112