xref: /linux/include/linux/zstd.h (revision 617a814f14b8914271f7a70366d72c6196d17663)
1 /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
2 /*
3  * Copyright (c) Yann Collet, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under both the BSD-style license (found in the
7  * LICENSE file in the root directory of https://github.com/facebook/zstd) and
8  * the GPLv2 (found in the COPYING file in the root directory of
9  * https://github.com/facebook/zstd). You may select, at your option, one of the
10  * above-listed licenses.
11  */
12 
13 #ifndef LINUX_ZSTD_H
14 #define LINUX_ZSTD_H
15 
16 /**
17  * This is a kernel-style API that wraps the upstream zstd API, which cannot be
18  * used directly because the symbols aren't exported. It exposes the minimal
19  * functionality which is currently required by users of zstd in the kernel.
20  * Expose extra functions from lib/zstd/zstd.h as needed.
21  */
22 
23 /* ======   Dependency   ====== */
24 #include <linux/types.h>
25 #include <linux/zstd_errors.h>
26 #include <linux/zstd_lib.h>
27 
28 /* ======   Helper Functions   ====== */
29 /**
30  * zstd_compress_bound() - maximum compressed size in worst case scenario
31  * @src_size: The size of the data to compress.
32  *
33  * Return:    The maximum compressed size in the worst case scenario.
34  */
35 size_t zstd_compress_bound(size_t src_size);
36 
37 /**
38  * zstd_is_error() - tells if a size_t function result is an error code
39  * @code:  The function result to check for error.
40  *
41  * Return: Non-zero iff the code is an error.
42  */
43 unsigned int zstd_is_error(size_t code);
44 
45 /**
46  * enum zstd_error_code - zstd error codes
47  */
48 typedef ZSTD_ErrorCode zstd_error_code;
49 
50 /**
51  * zstd_get_error_code() - translates an error function result to an error code
52  * @code:  The function result for which zstd_is_error(code) is true.
53  *
54  * Return: A unique error code for this error.
55  */
56 zstd_error_code zstd_get_error_code(size_t code);
57 
58 /**
59  * zstd_get_error_name() - translates an error function result to a string
60  * @code:  The function result for which zstd_is_error(code) is true.
61  *
62  * Return: An error string corresponding to the error code.
63  */
64 const char *zstd_get_error_name(size_t code);
65 
66 /**
67  * zstd_min_clevel() - minimum allowed compression level
68  *
69  * Return: The minimum allowed compression level.
70  */
71 int zstd_min_clevel(void);
72 
73 /**
74  * zstd_max_clevel() - maximum allowed compression level
75  *
76  * Return: The maximum allowed compression level.
77  */
78 int zstd_max_clevel(void);
79 
80 /**
81  * zstd_default_clevel() - default compression level
82  *
83  * Return: Default compression level.
84  */
85 int zstd_default_clevel(void);
86 
87 /**
88  * struct zstd_custom_mem - custom memory allocation
89  */
90 typedef ZSTD_customMem zstd_custom_mem;
91 
92 /**
93  * struct zstd_dict_load_method - Dictionary load method.
94  * See zstd_lib.h.
95  */
96 typedef ZSTD_dictLoadMethod_e zstd_dict_load_method;
97 
98 /**
99  * struct zstd_dict_content_type - Dictionary context type.
100  * See zstd_lib.h.
101  */
102 typedef ZSTD_dictContentType_e zstd_dict_content_type;
103 
104 /* ======   Parameter Selection   ====== */
105 
106 /**
107  * enum zstd_strategy - zstd compression search strategy
108  *
109  * From faster to stronger. See zstd_lib.h.
110  */
111 typedef ZSTD_strategy zstd_strategy;
112 
113 /**
114  * struct zstd_compression_parameters - zstd compression parameters
115  * @windowLog:    Log of the largest match distance. Larger means more
116  *                compression, and more memory needed during decompression.
117  * @chainLog:     Fully searched segment. Larger means more compression,
118  *                slower, and more memory (useless for fast).
119  * @hashLog:      Dispatch table. Larger means more compression,
120  *                slower, and more memory.
121  * @searchLog:    Number of searches. Larger means more compression and slower.
122  * @searchLength: Match length searched. Larger means faster decompression,
123  *                sometimes less compression.
124  * @targetLength: Acceptable match size for optimal parser (only). Larger means
125  *                more compression, and slower.
126  * @strategy:     The zstd compression strategy.
127  *
128  * See zstd_lib.h.
129  */
130 typedef ZSTD_compressionParameters zstd_compression_parameters;
131 
132 /**
133  * struct zstd_frame_parameters - zstd frame parameters
134  * @contentSizeFlag: Controls whether content size will be present in the
135  *                   frame header (when known).
136  * @checksumFlag:    Controls whether a 32-bit checksum is generated at the
137  *                   end of the frame for error detection.
138  * @noDictIDFlag:    Controls whether dictID will be saved into the frame
139  *                   header when using dictionary compression.
140  *
141  * The default value is all fields set to 0. See zstd_lib.h.
142  */
143 typedef ZSTD_frameParameters zstd_frame_parameters;
144 
145 /**
146  * struct zstd_parameters - zstd parameters
147  * @cParams: The compression parameters.
148  * @fParams: The frame parameters.
149  */
150 typedef ZSTD_parameters zstd_parameters;
151 
152 /**
153  * zstd_get_params() - returns zstd_parameters for selected level
154  * @level:              The compression level
155  * @estimated_src_size: The estimated source size to compress or 0
156  *                      if unknown.
157  *
158  * Return:              The selected zstd_parameters.
159  */
160 zstd_parameters zstd_get_params(int level,
161 	unsigned long long estimated_src_size);
162 
163 
164 /**
165  * zstd_get_cparams() - returns zstd_compression_parameters for selected level
166  * @level:              The compression level
167  * @estimated_src_size: The estimated source size to compress or 0
168  *                      if unknown.
169  * @dict_size:          Dictionary size.
170  *
171  * Return:              The selected zstd_compression_parameters.
172  */
173 zstd_compression_parameters zstd_get_cparams(int level,
174 	unsigned long long estimated_src_size, size_t dict_size);
175 
176 /* ======   Single-pass Compression   ====== */
177 
178 typedef ZSTD_CCtx zstd_cctx;
179 
180 /**
181  * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx
182  * @parameters: The compression parameters to be used.
183  *
184  * If multiple compression parameters might be used, the caller must call
185  * zstd_cctx_workspace_bound() for each set of parameters and use the maximum
186  * size.
187  *
188  * Return:      A lower bound on the size of the workspace that is passed to
189  *              zstd_init_cctx().
190  */
191 size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters);
192 
193 /**
194  * zstd_init_cctx() - initialize a zstd compression context
195  * @workspace:      The workspace to emplace the context into. It must outlive
196  *                  the returned context.
197  * @workspace_size: The size of workspace. Use zstd_cctx_workspace_bound() to
198  *                  determine how large the workspace must be.
199  *
200  * Return:          A zstd compression context or NULL on error.
201  */
202 zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size);
203 
204 /**
205  * zstd_compress_cctx() - compress src into dst with the initialized parameters
206  * @cctx:         The context. Must have been initialized with zstd_init_cctx().
207  * @dst:          The buffer to compress src into.
208  * @dst_capacity: The size of the destination buffer. May be any size, but
209  *                ZSTD_compressBound(srcSize) is guaranteed to be large enough.
210  * @src:          The data to compress.
211  * @src_size:     The size of the data to compress.
212  * @parameters:   The compression parameters to be used.
213  *
214  * Return:        The compressed size or an error, which can be checked using
215  *                zstd_is_error().
216  */
217 size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
218 	const void *src, size_t src_size, const zstd_parameters *parameters);
219 
220 /**
221  * zstd_create_cctx_advanced() - Create compression context
222  * @custom_mem:   Custom allocator.
223  *
224  * Return:        NULL on error, pointer to compression context otherwise.
225  */
226 zstd_cctx *zstd_create_cctx_advanced(zstd_custom_mem custom_mem);
227 
228 /**
229  * zstd_free_cctx() - Free compression context
230  * @cdict:        Pointer to compression context.
231  *
232  * Return:        Always 0.
233  */
234 size_t zstd_free_cctx(zstd_cctx* cctx);
235 
236 /**
237  * struct zstd_cdict - Compression dictionary.
238  * See zstd_lib.h.
239  */
240 typedef ZSTD_CDict zstd_cdict;
241 
242 /**
243  * zstd_create_cdict_byreference() - Create compression dictionary
244  * @dict:              Pointer to dictionary buffer.
245  * @dict_size:         Size of the dictionary buffer.
246  * @dict_load_method:  Dictionary load method.
247  * @dict_content_type: Dictionary content type.
248  * @custom_mem:        Memory allocator.
249  *
250  * Note, this uses @dict by reference (ZSTD_dlm_byRef), so it should be
251  * free before zstd_cdict is destroyed.
252  *
253  * Return:             NULL on error, pointer to compression dictionary
254  *                     otherwise.
255  */
256 zstd_cdict *zstd_create_cdict_byreference(const void *dict, size_t dict_size,
257 					  zstd_compression_parameters cparams,
258 					  zstd_custom_mem custom_mem);
259 
260 /**
261  * zstd_free_cdict() - Free compression dictionary
262  * @cdict:        Pointer to compression dictionary.
263  *
264  * Return:        Always 0.
265  */
266 size_t zstd_free_cdict(zstd_cdict* cdict);
267 
268 /**
269  * zstd_compress_using_cdict() - compress src into dst using a dictionary
270  * @cctx:         The context. Must have been initialized with zstd_init_cctx().
271  * @dst:          The buffer to compress src into.
272  * @dst_capacity: The size of the destination buffer. May be any size, but
273  *                ZSTD_compressBound(srcSize) is guaranteed to be large enough.
274  * @src:          The data to compress.
275  * @src_size:     The size of the data to compress.
276  * @cdict:        The dictionary to be used.
277  *
278  * Return:        The compressed size or an error, which can be checked using
279  *                zstd_is_error().
280  */
281 size_t zstd_compress_using_cdict(zstd_cctx *cctx, void *dst,
282 	size_t dst_capacity, const void *src, size_t src_size,
283 	const zstd_cdict *cdict);
284 
285 /* ======   Single-pass Decompression   ====== */
286 
287 typedef ZSTD_DCtx zstd_dctx;
288 
289 /**
290  * zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx
291  *
292  * Return: A lower bound on the size of the workspace that is passed to
293  *         zstd_init_dctx().
294  */
295 size_t zstd_dctx_workspace_bound(void);
296 
297 /**
298  * zstd_init_dctx() - initialize a zstd decompression context
299  * @workspace:      The workspace to emplace the context into. It must outlive
300  *                  the returned context.
301  * @workspace_size: The size of workspace. Use zstd_dctx_workspace_bound() to
302  *                  determine how large the workspace must be.
303  *
304  * Return:          A zstd decompression context or NULL on error.
305  */
306 zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size);
307 
308 /**
309  * zstd_decompress_dctx() - decompress zstd compressed src into dst
310  * @dctx:         The decompression context.
311  * @dst:          The buffer to decompress src into.
312  * @dst_capacity: The size of the destination buffer. Must be at least as large
313  *                as the decompressed size. If the caller cannot upper bound the
314  *                decompressed size, then it's better to use the streaming API.
315  * @src:          The zstd compressed data to decompress. Multiple concatenated
316  *                frames and skippable frames are allowed.
317  * @src_size:     The exact size of the data to decompress.
318  *
319  * Return:        The decompressed size or an error, which can be checked using
320  *                zstd_is_error().
321  */
322 size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
323 	const void *src, size_t src_size);
324 
325 /**
326  * struct zstd_ddict - Decompression dictionary.
327  * See zstd_lib.h.
328  */
329 typedef ZSTD_DDict zstd_ddict;
330 
331 /**
332  * zstd_create_ddict_byreference() - Create decompression dictionary
333  * @dict:              Pointer to dictionary buffer.
334  * @dict_size:         Size of the dictionary buffer.
335  * @dict_load_method:  Dictionary load method.
336  * @dict_content_type: Dictionary content type.
337  * @custom_mem:        Memory allocator.
338  *
339  * Note, this uses @dict by reference (ZSTD_dlm_byRef), so it should be
340  * free before zstd_ddict is destroyed.
341  *
342  * Return:             NULL on error, pointer to decompression dictionary
343  *                     otherwise.
344  */
345 zstd_ddict *zstd_create_ddict_byreference(const void *dict, size_t dict_size,
346 					  zstd_custom_mem custom_mem);
347 /**
348  * zstd_free_ddict() - Free decompression dictionary
349  * @dict:         Pointer to the dictionary.
350  *
351  * Return:        Always 0.
352  */
353 size_t zstd_free_ddict(zstd_ddict *ddict);
354 
355 /**
356  * zstd_create_dctx_advanced() - Create decompression context
357  * @custom_mem:   Custom allocator.
358  *
359  * Return:        NULL on error, pointer to decompression context otherwise.
360  */
361 zstd_dctx *zstd_create_dctx_advanced(zstd_custom_mem custom_mem);
362 
363 /**
364  * zstd_free_dctx() -- Free decompression context
365  * @dctx:         Pointer to decompression context.
366  * Return:        Always 0.
367  */
368 size_t zstd_free_dctx(zstd_dctx *dctx);
369 
370 /**
371  * zstd_decompress_using_ddict() - decompress src into dst using a dictionary
372  * @dctx:         The decompression context.
373  * @dst:          The buffer to decompress src into.
374  * @dst_capacity: The size of the destination buffer. Must be at least as large
375  *                as the decompressed size. If the caller cannot upper bound the
376  *                decompressed size, then it's better to use the streaming API.
377  * @src:          The zstd compressed data to decompress. Multiple concatenated
378  *                frames and skippable frames are allowed.
379  * @src_size:     The exact size of the data to decompress.
380  * @ddict:        The dictionary to be used.
381  *
382  * Return:        The decompressed size or an error, which can be checked using
383  *                zstd_is_error().
384  */
385 size_t zstd_decompress_using_ddict(zstd_dctx *dctx,
386 	void *dst, size_t dst_capacity, const void *src, size_t src_size,
387 	const zstd_ddict *ddict);
388 
389 
390 /* ======   Streaming Buffers   ====== */
391 
392 /**
393  * struct zstd_in_buffer - input buffer for streaming
394  * @src:  Start of the input buffer.
395  * @size: Size of the input buffer.
396  * @pos:  Position where reading stopped. Will be updated.
397  *        Necessarily 0 <= pos <= size.
398  *
399  * See zstd_lib.h.
400  */
401 typedef ZSTD_inBuffer zstd_in_buffer;
402 
403 /**
404  * struct zstd_out_buffer - output buffer for streaming
405  * @dst:  Start of the output buffer.
406  * @size: Size of the output buffer.
407  * @pos:  Position where writing stopped. Will be updated.
408  *        Necessarily 0 <= pos <= size.
409  *
410  * See zstd_lib.h.
411  */
412 typedef ZSTD_outBuffer zstd_out_buffer;
413 
414 /* ======   Streaming Compression   ====== */
415 
416 typedef ZSTD_CStream zstd_cstream;
417 
418 /**
419  * zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream
420  * @cparams: The compression parameters to be used for compression.
421  *
422  * Return:   A lower bound on the size of the workspace that is passed to
423  *           zstd_init_cstream().
424  */
425 size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams);
426 
427 /**
428  * zstd_init_cstream() - initialize a zstd streaming compression context
429  * @parameters        The zstd parameters to use for compression.
430  * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller
431  *                    must pass the source size (zero means empty source).
432  *                    Otherwise, the caller may optionally pass the source
433  *                    size, or zero if unknown.
434  * @workspace:        The workspace to emplace the context into. It must outlive
435  *                    the returned context.
436  * @workspace_size:   The size of workspace.
437  *                    Use zstd_cstream_workspace_bound(params->cparams) to
438  *                    determine how large the workspace must be.
439  *
440  * Return:            The zstd streaming compression context or NULL on error.
441  */
442 zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
443 	unsigned long long pledged_src_size, void *workspace, size_t workspace_size);
444 
445 /**
446  * zstd_reset_cstream() - reset the context using parameters from creation
447  * @cstream:          The zstd streaming compression context to reset.
448  * @pledged_src_size: Optionally the source size, or zero if unknown.
449  *
450  * Resets the context using the parameters from creation. Skips dictionary
451  * loading, since it can be reused. If `pledged_src_size` is non-zero the frame
452  * content size is always written into the frame header.
453  *
454  * Return:            Zero or an error, which can be checked using
455  *                    zstd_is_error().
456  */
457 size_t zstd_reset_cstream(zstd_cstream *cstream,
458 	unsigned long long pledged_src_size);
459 
460 /**
461  * zstd_compress_stream() - streaming compress some of input into output
462  * @cstream: The zstd streaming compression context.
463  * @output:  Destination buffer. `output->pos` is updated to indicate how much
464  *           compressed data was written.
465  * @input:   Source buffer. `input->pos` is updated to indicate how much data
466  *           was read. Note that it may not consume the entire input, in which
467  *           case `input->pos < input->size`, and it's up to the caller to
468  *           present remaining data again.
469  *
470  * The `input` and `output` buffers may be any size. Guaranteed to make some
471  * forward progress if `input` and `output` are not empty.
472  *
473  * Return:   A hint for the number of bytes to use as the input for the next
474  *           function call or an error, which can be checked using
475  *           zstd_is_error().
476  */
477 size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
478 	zstd_in_buffer *input);
479 
480 /**
481  * zstd_flush_stream() - flush internal buffers into output
482  * @cstream: The zstd streaming compression context.
483  * @output:  Destination buffer. `output->pos` is updated to indicate how much
484  *           compressed data was written.
485  *
486  * zstd_flush_stream() must be called until it returns 0, meaning all the data
487  * has been flushed. Since zstd_flush_stream() causes a block to be ended,
488  * calling it too often will degrade the compression ratio.
489  *
490  * Return:   The number of bytes still present within internal buffers or an
491  *           error, which can be checked using zstd_is_error().
492  */
493 size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output);
494 
495 /**
496  * zstd_end_stream() - flush internal buffers into output and end the frame
497  * @cstream: The zstd streaming compression context.
498  * @output:  Destination buffer. `output->pos` is updated to indicate how much
499  *           compressed data was written.
500  *
501  * zstd_end_stream() must be called until it returns 0, meaning all the data has
502  * been flushed and the frame epilogue has been written.
503  *
504  * Return:   The number of bytes still present within internal buffers or an
505  *           error, which can be checked using zstd_is_error().
506  */
507 size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output);
508 
509 /* ======   Streaming Decompression   ====== */
510 
511 typedef ZSTD_DStream zstd_dstream;
512 
513 /**
514  * zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream
515  * @max_window_size: The maximum window size allowed for compressed frames.
516  *
517  * Return:           A lower bound on the size of the workspace that is passed
518  *                   to zstd_init_dstream().
519  */
520 size_t zstd_dstream_workspace_bound(size_t max_window_size);
521 
522 /**
523  * zstd_init_dstream() - initialize a zstd streaming decompression context
524  * @max_window_size: The maximum window size allowed for compressed frames.
525  * @workspace:       The workspace to emplace the context into. It must outlive
526  *                   the returned context.
527  * @workspaceSize:   The size of workspace.
528  *                   Use zstd_dstream_workspace_bound(max_window_size) to
529  *                   determine how large the workspace must be.
530  *
531  * Return:           The zstd streaming decompression context.
532  */
533 zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
534 	size_t workspace_size);
535 
536 /**
537  * zstd_reset_dstream() - reset the context using parameters from creation
538  * @dstream: The zstd streaming decompression context to reset.
539  *
540  * Resets the context using the parameters from creation. Skips dictionary
541  * loading, since it can be reused.
542  *
543  * Return:   Zero or an error, which can be checked using zstd_is_error().
544  */
545 size_t zstd_reset_dstream(zstd_dstream *dstream);
546 
547 /**
548  * zstd_decompress_stream() - streaming decompress some of input into output
549  * @dstream: The zstd streaming decompression context.
550  * @output:  Destination buffer. `output.pos` is updated to indicate how much
551  *           decompressed data was written.
552  * @input:   Source buffer. `input.pos` is updated to indicate how much data was
553  *           read. Note that it may not consume the entire input, in which case
554  *           `input.pos < input.size`, and it's up to the caller to present
555  *           remaining data again.
556  *
557  * The `input` and `output` buffers may be any size. Guaranteed to make some
558  * forward progress if `input` and `output` are not empty.
559  * zstd_decompress_stream() will not consume the last byte of the frame until
560  * the entire frame is flushed.
561  *
562  * Return:   Returns 0 iff a frame is completely decoded and fully flushed.
563  *           Otherwise returns a hint for the number of bytes to use as the
564  *           input for the next function call or an error, which can be checked
565  *           using zstd_is_error(). The size hint will never load more than the
566  *           frame.
567  */
568 size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
569 	zstd_in_buffer *input);
570 
571 /* ======   Frame Inspection Functions ====== */
572 
573 /**
574  * zstd_find_frame_compressed_size() - returns the size of a compressed frame
575  * @src:      Source buffer. It should point to the start of a zstd encoded
576  *            frame or a skippable frame.
577  * @src_size: The size of the source buffer. It must be at least as large as the
578  *            size of the frame.
579  *
580  * Return:    The compressed size of the frame pointed to by `src` or an error,
581  *            which can be check with zstd_is_error().
582  *            Suitable to pass to ZSTD_decompress() or similar functions.
583  */
584 size_t zstd_find_frame_compressed_size(const void *src, size_t src_size);
585 
586 /**
587  * struct zstd_frame_params - zstd frame parameters stored in the frame header
588  * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not
589  *                    present.
590  * @windowSize:       The window size, or 0 if the frame is a skippable frame.
591  * @blockSizeMax:     The maximum block size.
592  * @frameType:        The frame type (zstd or skippable)
593  * @headerSize:       The size of the frame header.
594  * @dictID:           The dictionary id, or 0 if not present.
595  * @checksumFlag:     Whether a checksum was used.
596  *
597  * See zstd_lib.h.
598  */
599 typedef ZSTD_frameHeader zstd_frame_header;
600 
601 /**
602  * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame
603  * @params:   On success the frame parameters are written here.
604  * @src:      The source buffer. It must point to a zstd or skippable frame.
605  * @src_size: The size of the source buffer.
606  *
607  * Return:    0 on success. If more data is required it returns how many bytes
608  *            must be provided to make forward progress. Otherwise it returns
609  *            an error, which can be checked using zstd_is_error().
610  */
611 size_t zstd_get_frame_header(zstd_frame_header *params, const void *src,
612 	size_t src_size);
613 
614 #endif  /* LINUX_ZSTD_H */
615