1 /*-
2 * Copyright (c) 2017 Sean Purcell
3 * Copyright (c) 2023-2024 Klara, Inc.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "archive_platform.h"
28
29 #ifdef HAVE_ERRNO_H
30 #include <errno.h>
31 #endif
32 #ifdef HAVE_LIMITS_H
33 #include <limits.h>
34 #endif
35 #ifdef HAVE_STDINT_H
36 #include <stdint.h>
37 #endif
38 #ifdef HAVE_STDLIB_H
39 #include <stdlib.h>
40 #endif
41 #ifdef HAVE_STRING_H
42 #include <string.h>
43 #endif
44 #ifdef HAVE_UNISTD_H
45 #include <unistd.h>
46 #endif
47 #ifdef HAVE_ZSTD_H
48 #include <zstd.h>
49 #endif
50
51 #include "archive.h"
52 #include "archive_private.h"
53 #include "archive_string.h"
54 #include "archive_write_private.h"
55
56 /* Don't compile this if we don't have zstd.h */
57
58 struct private_data {
59 int compression_level;
60 int threads;
61 int long_distance;
62 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
63 enum {
64 running,
65 finishing,
66 resetting,
67 } state;
68 int frame_per_file;
69 size_t min_frame_in;
70 size_t max_frame_in;
71 size_t min_frame_out;
72 size_t max_frame_out;
73 size_t cur_frame;
74 size_t cur_frame_in;
75 size_t cur_frame_out;
76 size_t total_in;
77 ZSTD_CStream *cstream;
78 ZSTD_outBuffer out;
79 #else
80 struct archive_write_program_data *pdata;
81 #endif
82 };
83
84 /* If we don't have the library use default range values (zstdcli.c v1.4.0) */
85 #define CLEVEL_MIN -99
86 #define CLEVEL_STD_MIN 0 /* prior to 1.3.4 and more recent without using --fast */
87 #define CLEVEL_DEFAULT 3
88 #define CLEVEL_STD_MAX 19 /* without using --ultra */
89 #define CLEVEL_MAX 22
90
91 #define LONG_STD 27
92
93 #define MINVER_NEGCLEVEL 10304
94 #define MINVER_MINCLEVEL 10306
95 #define MINVER_LONG 10302
96
97 static int archive_compressor_zstd_options(struct archive_write_filter *,
98 const char *, const char *);
99 static int archive_compressor_zstd_open(struct archive_write_filter *);
100 static int archive_compressor_zstd_write(struct archive_write_filter *,
101 const void *, size_t);
102 static int archive_compressor_zstd_flush(struct archive_write_filter *);
103 static int archive_compressor_zstd_close(struct archive_write_filter *);
104 static int archive_compressor_zstd_free(struct archive_write_filter *);
105 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
106 static int drive_compressor(struct archive_write_filter *,
107 struct private_data *, int, const void *, size_t);
108 #endif
109
110
111 /*
112 * Add a zstd compression filter to this write handle.
113 */
114 int
archive_write_add_filter_zstd(struct archive * _a)115 archive_write_add_filter_zstd(struct archive *_a)
116 {
117 struct archive_write *a = (struct archive_write *)_a;
118 struct archive_write_filter *f = __archive_write_allocate_filter(_a);
119 struct private_data *data;
120 archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC,
121 ARCHIVE_STATE_NEW, "archive_write_add_filter_zstd");
122
123 data = calloc(1, sizeof(*data));
124 if (data == NULL) {
125 archive_set_error(&a->archive, ENOMEM, "Out of memory");
126 return (ARCHIVE_FATAL);
127 }
128 f->data = data;
129 f->open = &archive_compressor_zstd_open;
130 f->options = &archive_compressor_zstd_options;
131 f->flush = &archive_compressor_zstd_flush;
132 f->close = &archive_compressor_zstd_close;
133 f->free = &archive_compressor_zstd_free;
134 f->code = ARCHIVE_FILTER_ZSTD;
135 f->name = "zstd";
136 data->compression_level = CLEVEL_DEFAULT;
137 data->threads = 0;
138 data->long_distance = 0;
139 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
140 data->frame_per_file = 0;
141 data->min_frame_in = 0;
142 data->max_frame_in = SIZE_MAX;
143 data->min_frame_out = 0;
144 data->max_frame_out = SIZE_MAX;
145 data->cur_frame_in = 0;
146 data->cur_frame_out = 0;
147 data->cstream = ZSTD_createCStream();
148 if (data->cstream == NULL) {
149 free(data);
150 archive_set_error(&a->archive, ENOMEM,
151 "Failed to allocate zstd compressor object");
152 return (ARCHIVE_FATAL);
153 }
154
155 return (ARCHIVE_OK);
156 #else
157 data->pdata = __archive_write_program_allocate("zstd");
158 if (data->pdata == NULL) {
159 free(data);
160 archive_set_error(&a->archive, ENOMEM, "Out of memory");
161 return (ARCHIVE_FATAL);
162 }
163 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
164 "Using external zstd program");
165 return (ARCHIVE_WARN);
166 #endif
167 }
168
169 static int
archive_compressor_zstd_free(struct archive_write_filter * f)170 archive_compressor_zstd_free(struct archive_write_filter *f)
171 {
172 struct private_data *data = (struct private_data *)f->data;
173 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
174 ZSTD_freeCStream(data->cstream);
175 free(data->out.dst);
176 #else
177 __archive_write_program_free(data->pdata);
178 #endif
179 free(data);
180 f->data = NULL;
181 return (ARCHIVE_OK);
182 }
183
184 static int
string_to_number(const char * string,intmax_t * numberp)185 string_to_number(const char *string, intmax_t *numberp)
186 {
187 char *end;
188
189 if (string == NULL || *string == '\0')
190 return (ARCHIVE_WARN);
191 *numberp = strtoimax(string, &end, 10);
192 if (end == string || *end != '\0' || errno == EOVERFLOW) {
193 *numberp = 0;
194 return (ARCHIVE_WARN);
195 }
196 return (ARCHIVE_OK);
197 }
198
199 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
200 static int
string_to_size(const char * string,size_t * numberp)201 string_to_size(const char *string, size_t *numberp)
202 {
203 uintmax_t number;
204 char *end;
205 unsigned int shift = 0;
206
207 if (string == NULL || *string == '\0' || *string == '-')
208 return (ARCHIVE_WARN);
209 number = strtoumax(string, &end, 10);
210 if (end > string) {
211 if (*end == 'K' || *end == 'k') {
212 shift = 10;
213 end++;
214 } else if (*end == 'M' || *end == 'm') {
215 shift = 20;
216 end++;
217 } else if (*end == 'G' || *end == 'g') {
218 shift = 30;
219 end++;
220 }
221 if (*end == 'B' || *end == 'b') {
222 end++;
223 }
224 }
225 if (end == string || *end != '\0' || errno == EOVERFLOW) {
226 return (ARCHIVE_WARN);
227 }
228 if (number > (uintmax_t)SIZE_MAX >> shift) {
229 return (ARCHIVE_WARN);
230 }
231 *numberp = (size_t)(number << shift);
232 return (ARCHIVE_OK);
233 }
234 #endif
235
236 /*
237 * Set write options.
238 */
239 static int
archive_compressor_zstd_options(struct archive_write_filter * f,const char * key,const char * value)240 archive_compressor_zstd_options(struct archive_write_filter *f, const char *key,
241 const char *value)
242 {
243 struct private_data *data = (struct private_data *)f->data;
244
245 if (strcmp(key, "compression-level") == 0) {
246 intmax_t level;
247 if (string_to_number(value, &level) != ARCHIVE_OK) {
248 return (ARCHIVE_WARN);
249 }
250 /* If we don't have the library, hard-code the max level */
251 int minimum = CLEVEL_MIN;
252 int maximum = CLEVEL_MAX;
253 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
254 maximum = ZSTD_maxCLevel();
255 #if ZSTD_VERSION_NUMBER >= MINVER_MINCLEVEL
256 if (ZSTD_versionNumber() >= MINVER_MINCLEVEL) {
257 minimum = ZSTD_minCLevel();
258 }
259 else
260 #endif
261 if (ZSTD_versionNumber() < MINVER_NEGCLEVEL) {
262 minimum = CLEVEL_STD_MIN;
263 }
264 #endif
265 if (level < minimum || level > maximum) {
266 return (ARCHIVE_WARN);
267 }
268 data->compression_level = (int)level;
269 return (ARCHIVE_OK);
270 } else if (strcmp(key, "threads") == 0) {
271 intmax_t threads;
272 if (string_to_number(value, &threads) != ARCHIVE_OK) {
273 return (ARCHIVE_WARN);
274 }
275
276 #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
277 if (threads == 0) {
278 threads = sysconf(_SC_NPROCESSORS_ONLN);
279 }
280 #elif !defined(__CYGWIN__) && defined(_WIN32_WINNT) && \
281 _WIN32_WINNT >= 0x0601 /* _WIN32_WINNT_WIN7 */
282 if (threads == 0) {
283 DWORD winCores = GetActiveProcessorCount(
284 ALL_PROCESSOR_GROUPS);
285 threads = (intmax_t)winCores;
286 }
287 #endif
288 if (threads < 0 || threads > INT_MAX) {
289 return (ARCHIVE_WARN);
290 }
291 data->threads = (int)threads;
292 return (ARCHIVE_OK);
293 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
294 } else if (strcmp(key, "frame-per-file") == 0) {
295 data->frame_per_file = 1;
296 return (ARCHIVE_OK);
297 } else if (strcmp(key, "min-frame-in") == 0) {
298 if (string_to_size(value, &data->min_frame_in) != ARCHIVE_OK) {
299 return (ARCHIVE_WARN);
300 }
301 return (ARCHIVE_OK);
302 } else if (strcmp(key, "min-frame-out") == 0 ||
303 strcmp(key, "min-frame-size") == 0) {
304 if (string_to_size(value, &data->min_frame_out) != ARCHIVE_OK) {
305 return (ARCHIVE_WARN);
306 }
307 return (ARCHIVE_OK);
308 } else if (strcmp(key, "max-frame-in") == 0 ||
309 strcmp(key, "max-frame-size") == 0) {
310 if (string_to_size(value, &data->max_frame_in) != ARCHIVE_OK ||
311 data->max_frame_in < 1024) {
312 return (ARCHIVE_WARN);
313 }
314 return (ARCHIVE_OK);
315 } else if (strcmp(key, "max-frame-out") == 0) {
316 if (string_to_size(value, &data->max_frame_out) != ARCHIVE_OK ||
317 data->max_frame_out < 1024) {
318 return (ARCHIVE_WARN);
319 }
320 return (ARCHIVE_OK);
321 #endif
322 }
323 else if (strcmp(key, "long") == 0) {
324 intmax_t long_distance;
325 if (string_to_number(value, &long_distance) != ARCHIVE_OK) {
326 return (ARCHIVE_WARN);
327 }
328 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream && ZSTD_VERSION_NUMBER >= MINVER_LONG
329 ZSTD_bounds bounds = ZSTD_cParam_getBounds(ZSTD_c_windowLog);
330 if (ZSTD_isError(bounds.error)) {
331 int max_distance = ((int)(sizeof(size_t) == 4 ? 30 : 31));
332 if (((int)long_distance) < 10 || (int)long_distance > max_distance)
333 return (ARCHIVE_WARN);
334 } else {
335 if ((int)long_distance < bounds.lowerBound || (int)long_distance > bounds.upperBound)
336 return (ARCHIVE_WARN);
337 }
338 #else
339 int max_distance = ((int)(sizeof(size_t) == 4 ? 30 : 31));
340 if (((int)long_distance) < 10 || (int)long_distance > max_distance)
341 return (ARCHIVE_WARN);
342 #endif
343 data->long_distance = (int)long_distance;
344 return (ARCHIVE_OK);
345 }
346
347 /* Note: The "warn" return is just to inform the options
348 * supervisor that we didn't handle it. It will generate
349 * a suitable error if no one used this option. */
350 return (ARCHIVE_WARN);
351 }
352
353 #if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
354 /*
355 * Setup callback.
356 */
357 static int
archive_compressor_zstd_open(struct archive_write_filter * f)358 archive_compressor_zstd_open(struct archive_write_filter *f)
359 {
360 struct private_data *data = (struct private_data *)f->data;
361
362 if (data->out.dst == NULL) {
363 size_t bs = ZSTD_CStreamOutSize(), bpb;
364 if (f->archive->magic == ARCHIVE_WRITE_MAGIC) {
365 /* Buffer size should be a multiple number of
366 * the of bytes per block for performance. */
367 bpb = archive_write_get_bytes_per_block(f->archive);
368 if (bpb > bs)
369 bs = bpb;
370 else if (bpb != 0)
371 bs -= bs % bpb;
372 }
373 data->out.size = bs;
374 data->out.pos = 0;
375 data->out.dst = malloc(data->out.size);
376 if (data->out.dst == NULL) {
377 archive_set_error(f->archive, ENOMEM,
378 "Can't allocate data for compression buffer");
379 return (ARCHIVE_FATAL);
380 }
381 }
382
383 f->write = archive_compressor_zstd_write;
384
385 if (ZSTD_isError(ZSTD_initCStream(data->cstream,
386 data->compression_level))) {
387 archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
388 "Internal error initializing zstd compressor object");
389 return (ARCHIVE_FATAL);
390 }
391
392 ZSTD_CCtx_setParameter(data->cstream, ZSTD_c_nbWorkers, data->threads);
393
394 #if ZSTD_VERSION_NUMBER >= MINVER_LONG
395 ZSTD_CCtx_setParameter(data->cstream, ZSTD_c_windowLog, data->long_distance);
396 #endif
397
398 return (ARCHIVE_OK);
399 }
400
401 /*
402 * Write data to the compressed stream.
403 */
404 static int
archive_compressor_zstd_write(struct archive_write_filter * f,const void * buff,size_t length)405 archive_compressor_zstd_write(struct archive_write_filter *f, const void *buff,
406 size_t length)
407 {
408 struct private_data *data = (struct private_data *)f->data;
409
410 return (drive_compressor(f, data, 0, buff, length));
411 }
412
413 /*
414 * Flush the compressed stream.
415 */
416 static int
archive_compressor_zstd_flush(struct archive_write_filter * f)417 archive_compressor_zstd_flush(struct archive_write_filter *f)
418 {
419 struct private_data *data = (struct private_data *)f->data;
420
421 if (data->frame_per_file && data->state == running) {
422 if (data->cur_frame_in > data->min_frame_in &&
423 data->cur_frame_out > data->min_frame_out) {
424 data->state = finishing;
425 }
426 }
427 return (drive_compressor(f, data, 1, NULL, 0));
428 }
429
430 /*
431 * Finish the compression...
432 */
433 static int
archive_compressor_zstd_close(struct archive_write_filter * f)434 archive_compressor_zstd_close(struct archive_write_filter *f)
435 {
436 struct private_data *data = (struct private_data *)f->data;
437
438 if (data->state == running)
439 data->state = finishing;
440 return (drive_compressor(f, data, 1, NULL, 0));
441 }
442
443 /*
444 * Utility function to push input data through compressor,
445 * writing full output blocks as necessary.
446 */
447 static int
drive_compressor(struct archive_write_filter * f,struct private_data * data,int flush,const void * src,size_t length)448 drive_compressor(struct archive_write_filter *f,
449 struct private_data *data, int flush, const void *src, size_t length)
450 {
451 ZSTD_inBuffer in = { .src = src, .size = length, .pos = 0 };
452 size_t ipos, opos, zstdret = 0;
453 int ret;
454
455 for (;;) {
456 ipos = in.pos;
457 opos = data->out.pos;
458 switch (data->state) {
459 case running:
460 if (in.pos == in.size)
461 return (ARCHIVE_OK);
462 zstdret = ZSTD_compressStream(data->cstream,
463 &data->out, &in);
464 if (ZSTD_isError(zstdret))
465 goto zstd_fatal;
466 break;
467 case finishing:
468 zstdret = ZSTD_endStream(data->cstream, &data->out);
469 if (ZSTD_isError(zstdret))
470 goto zstd_fatal;
471 if (zstdret == 0)
472 data->state = resetting;
473 break;
474 case resetting:
475 ZSTD_CCtx_reset(data->cstream, ZSTD_reset_session_only);
476 data->cur_frame++;
477 data->cur_frame_in = 0;
478 data->cur_frame_out = 0;
479 data->state = running;
480 break;
481 }
482 data->total_in += in.pos - ipos;
483 data->cur_frame_in += in.pos - ipos;
484 data->cur_frame_out += data->out.pos - opos;
485 if (data->state == running) {
486 if (data->cur_frame_in >= data->max_frame_in ||
487 data->cur_frame_out >= data->max_frame_out) {
488 data->state = finishing;
489 }
490 }
491 if (data->out.pos == data->out.size ||
492 (flush && data->out.pos > 0)) {
493 ret = __archive_write_filter(f->next_filter,
494 data->out.dst, data->out.pos);
495 if (ret != ARCHIVE_OK)
496 goto fatal;
497 data->out.pos = 0;
498 }
499 }
500 zstd_fatal:
501 archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
502 "Zstd compression failed: %s",
503 ZSTD_getErrorName(zstdret));
504 fatal:
505 return (ARCHIVE_FATAL);
506 }
507
508 #else /* HAVE_ZSTD_H && HAVE_ZSTD_compressStream */
509
510 static int
archive_compressor_zstd_open(struct archive_write_filter * f)511 archive_compressor_zstd_open(struct archive_write_filter *f)
512 {
513 struct private_data *data = (struct private_data *)f->data;
514 struct archive_string as;
515 int r;
516
517 archive_string_init(&as);
518 /* --no-check matches library default */
519 archive_strcpy(&as, "zstd --no-check");
520
521 if (data->compression_level < CLEVEL_STD_MIN) {
522 archive_string_sprintf(&as, " --fast=%d", -data->compression_level);
523 } else {
524 archive_string_sprintf(&as, " -%d", data->compression_level);
525 }
526
527 if (data->compression_level > CLEVEL_STD_MAX) {
528 archive_strcat(&as, " --ultra");
529 }
530
531 if (data->threads != 0) {
532 archive_string_sprintf(&as, " --threads=%d", data->threads);
533 }
534
535 if (data->long_distance != 0) {
536 archive_string_sprintf(&as, " --long=%d", data->long_distance);
537 }
538
539 f->write = archive_compressor_zstd_write;
540 r = __archive_write_program_open(f, data->pdata, as.s);
541 archive_string_free(&as);
542 return (r);
543 }
544
545 static int
archive_compressor_zstd_write(struct archive_write_filter * f,const void * buff,size_t length)546 archive_compressor_zstd_write(struct archive_write_filter *f, const void *buff,
547 size_t length)
548 {
549 struct private_data *data = (struct private_data *)f->data;
550
551 return __archive_write_program_write(f, data->pdata, buff, length);
552 }
553
554 static int
archive_compressor_zstd_flush(struct archive_write_filter * f)555 archive_compressor_zstd_flush(struct archive_write_filter *f)
556 {
557 (void)f; /* UNUSED */
558
559 return (ARCHIVE_OK);
560 }
561
562 static int
archive_compressor_zstd_close(struct archive_write_filter * f)563 archive_compressor_zstd_close(struct archive_write_filter *f)
564 {
565 struct private_data *data = (struct private_data *)f->data;
566
567 return __archive_write_program_close(f, data->pdata);
568 }
569
570 #endif /* HAVE_ZSTD_H && HAVE_ZSTD_compressStream */
571