1 /* 2 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 12 /* ************************************* 13 * Compiler Options 14 ***************************************/ 15 #ifdef _MSC_VER /* Visual */ 16 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 17 # pragma warning(disable : 4204) /* non-constant aggregate initializer */ 18 #endif 19 #if defined(__MINGW32__) && !defined(_POSIX_SOURCE) 20 # define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */ 21 #endif 22 23 /*-************************************* 24 * Includes 25 ***************************************/ 26 #include "platform.h" /* Large Files support, SET_BINARY_MODE */ 27 #include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */ 28 #include <stdio.h> /* fprintf, fopen, fread, _fileno, stdin, stdout */ 29 #include <stdlib.h> /* malloc, free */ 30 #include <string.h> /* strcmp, strlen */ 31 #include <assert.h> 32 #include <errno.h> /* errno */ 33 #include <limits.h> /* INT_MAX */ 34 #include <signal.h> 35 #include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */ 36 37 #if defined (_MSC_VER) 38 # include <sys/stat.h> 39 # include <io.h> 40 #endif 41 42 #include "../lib/common/mem.h" /* U32, U64 */ 43 #include "fileio.h" 44 45 #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */ 46 #include "../lib/zstd.h" 47 #include "../lib/common/zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */ 48 #include "../lib/compress/zstd_compress_internal.h" 49 50 #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS) 51 # include <zlib.h> 52 # if !defined(z_const) 53 # define z_const 54 # endif 55 #endif 56 57 #if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS) 58 # include <lzma.h> 59 #endif 60 61 #define LZ4_MAGICNUMBER 0x184D2204 62 #if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS) 63 # define LZ4F_ENABLE_OBSOLETE_ENUMS 64 # include <lz4frame.h> 65 # include <lz4.h> 66 #endif 67 68 69 /*-************************************* 70 * Constants 71 ***************************************/ 72 #define ADAPT_WINDOWLOG_DEFAULT 23 /* 8 MB */ 73 #define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */ 74 75 #define FNSPACE 30 76 77 /*-************************************* 78 * Macros 79 ***************************************/ 80 81 struct FIO_display_prefs_s { 82 int displayLevel; /* 0 : no display; 1: errors; 2: + result + interaction + warnings; 3: + progression; 4: + information */ 83 U32 noProgress; 84 }; 85 86 static FIO_display_prefs_t g_display_prefs = {2, 0}; 87 88 #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) 89 #define DISPLAYOUT(...) fprintf(stdout, __VA_ARGS__) 90 #define DISPLAYLEVEL(l, ...) { if (g_display_prefs.displayLevel>=l) { DISPLAY(__VA_ARGS__); } } 91 92 static const U64 g_refreshRate = SEC_TO_MICRO / 6; 93 static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; 94 95 #define READY_FOR_UPDATE() (!g_display_prefs.noProgress && UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) 96 #define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); } 97 #define DISPLAYUPDATE(l, ...) { \ 98 if (g_display_prefs.displayLevel>=l && !g_display_prefs.noProgress) { \ 99 if (READY_FOR_UPDATE() || (g_display_prefs.displayLevel>=4)) { \ 100 DELAY_NEXT_UPDATE(); \ 101 DISPLAY(__VA_ARGS__); \ 102 if (g_display_prefs.displayLevel>=4) fflush(stderr); \ 103 } } } 104 105 #undef MIN /* in case it would be already defined */ 106 #define MIN(a,b) ((a) < (b) ? (a) : (b)) 107 108 109 #define EXM_THROW(error, ...) \ 110 { \ 111 DISPLAYLEVEL(1, "zstd: "); \ 112 DISPLAYLEVEL(5, "Error defined at %s, line %i : \n", __FILE__, __LINE__); \ 113 DISPLAYLEVEL(1, "error %i : ", error); \ 114 DISPLAYLEVEL(1, __VA_ARGS__); \ 115 DISPLAYLEVEL(1, " \n"); \ 116 exit(error); \ 117 } 118 119 #define CHECK_V(v, f) \ 120 v = f; \ 121 if (ZSTD_isError(v)) { \ 122 DISPLAYLEVEL(5, "%s \n", #f); \ 123 EXM_THROW(11, "%s", ZSTD_getErrorName(v)); \ 124 } 125 #define CHECK(f) { size_t err; CHECK_V(err, f); } 126 127 128 /*-************************************ 129 * Signal (Ctrl-C trapping) 130 **************************************/ 131 static const char* g_artefact = NULL; 132 static void INThandler(int sig) 133 { 134 assert(sig==SIGINT); (void)sig; 135 #if !defined(_MSC_VER) 136 signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */ 137 #endif 138 if (g_artefact) { 139 assert(UTIL_isRegularFile(g_artefact)); 140 remove(g_artefact); 141 } 142 DISPLAY("\n"); 143 exit(2); 144 } 145 static void addHandler(char const* dstFileName) 146 { 147 if (UTIL_isRegularFile(dstFileName)) { 148 g_artefact = dstFileName; 149 signal(SIGINT, INThandler); 150 } else { 151 g_artefact = NULL; 152 } 153 } 154 /* Idempotent */ 155 static void clearHandler(void) 156 { 157 if (g_artefact) signal(SIGINT, SIG_DFL); 158 g_artefact = NULL; 159 } 160 161 162 /*-********************************************************* 163 * Termination signal trapping (Print debug stack trace) 164 ***********************************************************/ 165 #if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */ 166 # if (__has_feature(address_sanitizer)) 167 # define BACKTRACE_ENABLE 0 168 # endif /* __has_feature(address_sanitizer) */ 169 #elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */ 170 # define BACKTRACE_ENABLE 0 171 #endif 172 173 #if !defined(BACKTRACE_ENABLE) 174 /* automatic detector : backtrace enabled by default on linux+glibc and osx */ 175 # if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \ 176 || (defined(__APPLE__) && defined(__MACH__)) 177 # define BACKTRACE_ENABLE 1 178 # else 179 # define BACKTRACE_ENABLE 0 180 # endif 181 #endif 182 183 /* note : after this point, BACKTRACE_ENABLE is necessarily defined */ 184 185 186 #if BACKTRACE_ENABLE 187 188 #include <execinfo.h> /* backtrace, backtrace_symbols */ 189 190 #define MAX_STACK_FRAMES 50 191 192 static void ABRThandler(int sig) { 193 const char* name; 194 void* addrlist[MAX_STACK_FRAMES]; 195 char** symbollist; 196 int addrlen, i; 197 198 switch (sig) { 199 case SIGABRT: name = "SIGABRT"; break; 200 case SIGFPE: name = "SIGFPE"; break; 201 case SIGILL: name = "SIGILL"; break; 202 case SIGINT: name = "SIGINT"; break; 203 case SIGSEGV: name = "SIGSEGV"; break; 204 default: name = "UNKNOWN"; 205 } 206 207 DISPLAY("Caught %s signal, printing stack:\n", name); 208 /* Retrieve current stack addresses. */ 209 addrlen = backtrace(addrlist, MAX_STACK_FRAMES); 210 if (addrlen == 0) { 211 DISPLAY("\n"); 212 return; 213 } 214 /* Create readable strings to each frame. */ 215 symbollist = backtrace_symbols(addrlist, addrlen); 216 /* Print the stack trace, excluding calls handling the signal. */ 217 for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) { 218 DISPLAY("%s\n", symbollist[i]); 219 } 220 free(symbollist); 221 /* Reset and raise the signal so default handler runs. */ 222 signal(sig, SIG_DFL); 223 raise(sig); 224 } 225 #endif 226 227 void FIO_addAbortHandler() 228 { 229 #if BACKTRACE_ENABLE 230 signal(SIGABRT, ABRThandler); 231 signal(SIGFPE, ABRThandler); 232 signal(SIGILL, ABRThandler); 233 signal(SIGSEGV, ABRThandler); 234 signal(SIGBUS, ABRThandler); 235 #endif 236 } 237 238 239 /*-************************************************************ 240 * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW 241 ***************************************************************/ 242 #if defined(_MSC_VER) && _MSC_VER >= 1400 243 # define LONG_SEEK _fseeki64 244 # define LONG_TELL _ftelli64 245 #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ 246 # define LONG_SEEK fseeko 247 # define LONG_TELL ftello 248 #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__) 249 # define LONG_SEEK fseeko64 250 # define LONG_TELL ftello64 251 #elif defined(_WIN32) && !defined(__DJGPP__) 252 # include <windows.h> 253 static int LONG_SEEK(FILE* file, __int64 offset, int origin) { 254 LARGE_INTEGER off; 255 DWORD method; 256 off.QuadPart = offset; 257 if (origin == SEEK_END) 258 method = FILE_END; 259 else if (origin == SEEK_CUR) 260 method = FILE_CURRENT; 261 else 262 method = FILE_BEGIN; 263 264 if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method)) 265 return 0; 266 else 267 return -1; 268 } 269 static __int64 LONG_TELL(FILE* file) { 270 LARGE_INTEGER off, newOff; 271 off.QuadPart = 0; 272 newOff.QuadPart = 0; 273 SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, &newOff, FILE_CURRENT); 274 return newOff.QuadPart; 275 } 276 #else 277 # define LONG_SEEK fseek 278 # define LONG_TELL ftell 279 #endif 280 281 282 /*-************************************* 283 * Parameters: FIO_prefs_t 284 ***************************************/ 285 286 /* typedef'd to FIO_prefs_t within fileio.h */ 287 struct FIO_prefs_s { 288 289 /* Algorithm preferences */ 290 FIO_compressionType_t compressionType; 291 U32 sparseFileSupport; /* 0: no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */ 292 int dictIDFlag; 293 int checksumFlag; 294 int blockSize; 295 int overlapLog; 296 U32 adaptiveMode; 297 int rsyncable; 298 int minAdaptLevel; 299 int maxAdaptLevel; 300 int ldmFlag; 301 int ldmHashLog; 302 int ldmMinMatch; 303 int ldmBucketSizeLog; 304 int ldmHashRateLog; 305 size_t streamSrcSize; 306 size_t targetCBlockSize; 307 int srcSizeHint; 308 int testMode; 309 ZSTD_literalCompressionMode_e literalCompressionMode; 310 311 /* IO preferences */ 312 U32 removeSrcFile; 313 U32 overwrite; 314 315 /* Computation resources preferences */ 316 unsigned memLimit; 317 int nbWorkers; 318 319 int excludeCompressedFiles; 320 int patchFromMode; 321 int contentSize; 322 }; 323 324 /*-************************************* 325 * Parameters: FIO_ctx_t 326 ***************************************/ 327 328 /* typedef'd to FIO_ctx_t within fileio.h */ 329 struct FIO_ctx_s { 330 331 /* file i/o info */ 332 int nbFilesTotal; 333 int hasStdinInput; 334 int hasStdoutOutput; 335 336 /* file i/o state */ 337 int currFileIdx; 338 int nbFilesProcessed; 339 size_t totalBytesInput; 340 size_t totalBytesOutput; 341 }; 342 343 344 /*-************************************* 345 * Parameters: Initialization 346 ***************************************/ 347 348 #define FIO_OVERLAP_LOG_NOTSET 9999 349 #define FIO_LDM_PARAM_NOTSET 9999 350 351 352 FIO_prefs_t* FIO_createPreferences(void) 353 { 354 FIO_prefs_t* const ret = (FIO_prefs_t*)malloc(sizeof(FIO_prefs_t)); 355 if (!ret) EXM_THROW(21, "Allocation error : not enough memory"); 356 357 ret->compressionType = FIO_zstdCompression; 358 ret->overwrite = 0; 359 ret->sparseFileSupport = ZSTD_SPARSE_DEFAULT; 360 ret->dictIDFlag = 1; 361 ret->checksumFlag = 1; 362 ret->removeSrcFile = 0; 363 ret->memLimit = 0; 364 ret->nbWorkers = 1; 365 ret->blockSize = 0; 366 ret->overlapLog = FIO_OVERLAP_LOG_NOTSET; 367 ret->adaptiveMode = 0; 368 ret->rsyncable = 0; 369 ret->minAdaptLevel = -50; /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */ 370 ret->maxAdaptLevel = 22; /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */ 371 ret->ldmFlag = 0; 372 ret->ldmHashLog = 0; 373 ret->ldmMinMatch = 0; 374 ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; 375 ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET; 376 ret->streamSrcSize = 0; 377 ret->targetCBlockSize = 0; 378 ret->srcSizeHint = 0; 379 ret->testMode = 0; 380 ret->literalCompressionMode = ZSTD_lcm_auto; 381 ret->excludeCompressedFiles = 0; 382 return ret; 383 } 384 385 FIO_ctx_t* FIO_createContext(void) 386 { 387 FIO_ctx_t* const ret = (FIO_ctx_t*)malloc(sizeof(FIO_ctx_t)); 388 if (!ret) EXM_THROW(21, "Allocation error : not enough memory"); 389 390 ret->currFileIdx = 0; 391 ret->hasStdinInput = 0; 392 ret->hasStdoutOutput = 0; 393 ret->nbFilesTotal = 1; 394 ret->nbFilesProcessed = 0; 395 ret->totalBytesInput = 0; 396 ret->totalBytesOutput = 0; 397 return ret; 398 } 399 400 void FIO_freePreferences(FIO_prefs_t* const prefs) 401 { 402 free(prefs); 403 } 404 405 void FIO_freeContext(FIO_ctx_t* const fCtx) 406 { 407 free(fCtx); 408 } 409 410 411 /*-************************************* 412 * Parameters: Display Options 413 ***************************************/ 414 415 void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; } 416 417 void FIO_setNoProgress(unsigned noProgress) { g_display_prefs.noProgress = noProgress; } 418 419 420 /*-************************************* 421 * Parameters: Setters 422 ***************************************/ 423 424 /* FIO_prefs_t functions */ 425 426 void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; } 427 428 void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; } 429 430 void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse) { prefs->sparseFileSupport = sparse; } 431 432 void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; } 433 434 void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; } 435 436 void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag) { prefs->removeSrcFile = (flag>0); } 437 438 void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; } 439 440 void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) { 441 #ifndef ZSTD_MULTITHREAD 442 if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n"); 443 #endif 444 prefs->nbWorkers = nbWorkers; 445 } 446 447 void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; } 448 449 void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) { 450 if (blockSize && prefs->nbWorkers==0) 451 DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n"); 452 prefs->blockSize = blockSize; 453 } 454 455 void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){ 456 if (overlapLog && prefs->nbWorkers==0) 457 DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n"); 458 prefs->overlapLog = overlapLog; 459 } 460 461 void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt) { 462 if ((adapt>0) && (prefs->nbWorkers==0)) 463 EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n"); 464 prefs->adaptiveMode = adapt; 465 } 466 467 void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) { 468 if ((rsyncable>0) && (prefs->nbWorkers==0)) 469 EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n"); 470 prefs->rsyncable = rsyncable; 471 } 472 473 void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) { 474 prefs->streamSrcSize = streamSrcSize; 475 } 476 477 void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) { 478 prefs->targetCBlockSize = targetCBlockSize; 479 } 480 481 void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) { 482 prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint); 483 } 484 485 void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode) { 486 prefs->testMode = (testMode!=0); 487 } 488 489 void FIO_setLiteralCompressionMode( 490 FIO_prefs_t* const prefs, 491 ZSTD_literalCompressionMode_e mode) { 492 prefs->literalCompressionMode = mode; 493 } 494 495 void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel) 496 { 497 #ifndef ZSTD_NOCOMPRESS 498 assert(minCLevel >= ZSTD_minCLevel()); 499 #endif 500 prefs->minAdaptLevel = minCLevel; 501 } 502 503 void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel) 504 { 505 prefs->maxAdaptLevel = maxCLevel; 506 } 507 508 void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag) { 509 prefs->ldmFlag = (ldmFlag>0); 510 } 511 512 void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog) { 513 prefs->ldmHashLog = ldmHashLog; 514 } 515 516 void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch) { 517 prefs->ldmMinMatch = ldmMinMatch; 518 } 519 520 void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog) { 521 prefs->ldmBucketSizeLog = ldmBucketSizeLog; 522 } 523 524 525 void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) { 526 prefs->ldmHashRateLog = ldmHashRateLog; 527 } 528 529 void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value) 530 { 531 prefs->patchFromMode = value != 0; 532 } 533 534 void FIO_setContentSize(FIO_prefs_t* const prefs, int value) 535 { 536 prefs->contentSize = value != 0; 537 } 538 539 /* FIO_ctx_t functions */ 540 541 void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) { 542 fCtx->hasStdoutOutput = value; 543 } 544 545 void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value) 546 { 547 fCtx->nbFilesTotal = value; 548 } 549 550 void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames) { 551 size_t i = 0; 552 for ( ; i < filenames->tableSize; ++i) { 553 if (!strcmp(stdinmark, filenames->fileNames[i])) { 554 fCtx->hasStdinInput = 1; 555 return; 556 } 557 } 558 } 559 560 /*-************************************* 561 * Functions 562 ***************************************/ 563 /** FIO_removeFile() : 564 * @result : Unlink `fileName`, even if it's read-only */ 565 static int FIO_removeFile(const char* path) 566 { 567 stat_t statbuf; 568 if (!UTIL_stat(path, &statbuf)) { 569 DISPLAYLEVEL(2, "zstd: Failed to stat %s while trying to remove it\n", path); 570 return 0; 571 } 572 if (!UTIL_isRegularFileStat(&statbuf)) { 573 DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path); 574 return 0; 575 } 576 #if defined(_WIN32) || defined(WIN32) 577 /* windows doesn't allow remove read-only files, 578 * so try to make it writable first */ 579 if (!(statbuf.st_mode & _S_IWRITE)) { 580 UTIL_chmod(path, &statbuf, _S_IWRITE); 581 } 582 #endif 583 return remove(path); 584 } 585 586 /** FIO_openSrcFile() : 587 * condition : `srcFileName` must be non-NULL. 588 * @result : FILE* to `srcFileName`, or NULL if it fails */ 589 static FILE* FIO_openSrcFile(const char* srcFileName) 590 { 591 stat_t statbuf; 592 assert(srcFileName != NULL); 593 if (!strcmp (srcFileName, stdinmark)) { 594 DISPLAYLEVEL(4,"Using stdin for input \n"); 595 SET_BINARY_MODE(stdin); 596 return stdin; 597 } 598 599 if (!UTIL_stat(srcFileName, &statbuf)) { 600 DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n", 601 srcFileName, strerror(errno)); 602 return NULL; 603 } 604 605 if (!UTIL_isRegularFileStat(&statbuf) 606 && !UTIL_isFIFOStat(&statbuf) 607 ) { 608 DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n", 609 srcFileName); 610 return NULL; 611 } 612 613 { FILE* const f = fopen(srcFileName, "rb"); 614 if (f == NULL) 615 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); 616 return f; 617 } 618 } 619 620 /** FIO_openDstFile() : 621 * condition : `dstFileName` must be non-NULL. 622 * @result : FILE* to `dstFileName`, or NULL if it fails */ 623 static FILE* 624 FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs, 625 const char* srcFileName, const char* dstFileName) 626 { 627 if (prefs->testMode) return NULL; /* do not open file in test mode */ 628 629 assert(dstFileName != NULL); 630 if (!strcmp (dstFileName, stdoutmark)) { 631 DISPLAYLEVEL(4,"Using stdout for output \n"); 632 SET_BINARY_MODE(stdout); 633 if (prefs->sparseFileSupport == 1) { 634 prefs->sparseFileSupport = 0; 635 DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n"); 636 } 637 return stdout; 638 } 639 640 /* ensure dst is not the same as src */ 641 if (srcFileName != NULL && UTIL_isSameFile(srcFileName, dstFileName)) { 642 DISPLAYLEVEL(1, "zstd: Refusing to open an output file which will overwrite the input file \n"); 643 return NULL; 644 } 645 646 if (prefs->sparseFileSupport == 1) { 647 prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT; 648 } 649 650 if (UTIL_isRegularFile(dstFileName)) { 651 /* Check if destination file already exists */ 652 FILE* const fCheck = fopen( dstFileName, "rb" ); 653 #if !defined(_WIN32) 654 /* this test does not work on Windows : 655 * `NUL` and `nul` are detected as regular files */ 656 if (!strcmp(dstFileName, nulmark)) { 657 EXM_THROW(40, "%s is unexpectedly categorized as a regular file", 658 dstFileName); 659 } 660 #endif 661 if (fCheck != NULL) { /* dst file exists, authorization prompt */ 662 fclose(fCheck); 663 if (!prefs->overwrite) { 664 if (g_display_prefs.displayLevel <= 1) { 665 /* No interaction possible */ 666 DISPLAY("zstd: %s already exists; not overwritten \n", 667 dstFileName); 668 return NULL; 669 } 670 DISPLAY("zstd: %s already exists; ", dstFileName); 671 if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten \n", "yY", fCtx->hasStdinInput)) 672 return NULL; 673 } 674 /* need to unlink */ 675 FIO_removeFile(dstFileName); 676 } } 677 678 { FILE* const f = fopen( dstFileName, "wb" ); 679 if (f == NULL) { 680 DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno)); 681 } else if (srcFileName != NULL 682 && strcmp (srcFileName, stdinmark) 683 && strcmp(dstFileName, nulmark) ) { 684 /* reduce rights on newly created dst file while compression is ongoing */ 685 UTIL_chmod(dstFileName, NULL, 00600); 686 } 687 return f; 688 } 689 } 690 691 /*! FIO_createDictBuffer() : 692 * creates a buffer, pointed by `*bufferPtr`, 693 * loads `filename` content into it, up to DICTSIZE_MAX bytes. 694 * @return : loaded size 695 * if fileName==NULL, returns 0 and a NULL pointer 696 */ 697 static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_prefs_t* const prefs) 698 { 699 FILE* fileHandle; 700 U64 fileSize; 701 702 assert(bufferPtr != NULL); 703 *bufferPtr = NULL; 704 if (fileName == NULL) return 0; 705 706 DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName); 707 fileHandle = fopen(fileName, "rb"); 708 if (fileHandle==NULL) EXM_THROW(31, "%s: %s", fileName, strerror(errno)); 709 710 fileSize = UTIL_getFileSize(fileName); 711 { 712 size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX; 713 if (fileSize > dictSizeMax) { 714 EXM_THROW(32, "Dictionary file %s is too large (> %u bytes)", 715 fileName, (unsigned)dictSizeMax); /* avoid extreme cases */ 716 } 717 } 718 *bufferPtr = malloc((size_t)fileSize); 719 if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno)); 720 { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle); 721 if (readSize != fileSize) 722 EXM_THROW(35, "Error reading dictionary file %s : %s", 723 fileName, strerror(errno)); 724 } 725 fclose(fileHandle); 726 return (size_t)fileSize; 727 } 728 729 730 731 /* FIO_checkFilenameCollisions() : 732 * Checks for and warns if there are any files that would have the same output path 733 */ 734 int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) { 735 const char **filenameTableSorted, *prevElem, *filename; 736 unsigned u; 737 738 filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles); 739 if (!filenameTableSorted) { 740 DISPLAY("Unable to malloc new str array, not checking for name collisions\n"); 741 return 1; 742 } 743 744 for (u = 0; u < nbFiles; ++u) { 745 filename = strrchr(filenameTable[u], PATH_SEP); 746 if (filename == NULL) { 747 filenameTableSorted[u] = filenameTable[u]; 748 } else { 749 filenameTableSorted[u] = filename+1; 750 } 751 } 752 753 qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr); 754 prevElem = filenameTableSorted[0]; 755 for (u = 1; u < nbFiles; ++u) { 756 if (strcmp(prevElem, filenameTableSorted[u]) == 0) { 757 DISPLAY("WARNING: Two files have same filename: %s\n", prevElem); 758 } 759 prevElem = filenameTableSorted[u]; 760 } 761 762 free((void*)filenameTableSorted); 763 return 0; 764 } 765 766 static const char* 767 extractFilename(const char* path, char separator) 768 { 769 const char* search = strrchr(path, separator); 770 if (search == NULL) return path; 771 return search+1; 772 } 773 774 /* FIO_createFilename_fromOutDir() : 775 * Takes a source file name and specified output directory, and 776 * allocates memory for and returns a pointer to final path. 777 * This function never returns an error (it may abort() in case of pb) 778 */ 779 static char* 780 FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const size_t suffixLen) 781 { 782 const char* filenameStart; 783 char separator; 784 char* result; 785 786 #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ 787 separator = '\\'; 788 #else 789 separator = '/'; 790 #endif 791 792 filenameStart = extractFilename(path, separator); 793 #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ 794 filenameStart = extractFilename(filenameStart, '/'); /* sometimes, '/' separator is also used on Windows (mingw+msys2) */ 795 #endif 796 797 result = (char*) calloc(1, strlen(outDirName) + 1 + strlen(filenameStart) + suffixLen + 1); 798 if (!result) { 799 EXM_THROW(30, "zstd: FIO_createFilename_fromOutDir: %s", strerror(errno)); 800 } 801 802 memcpy(result, outDirName, strlen(outDirName)); 803 if (outDirName[strlen(outDirName)-1] == separator) { 804 memcpy(result + strlen(outDirName), filenameStart, strlen(filenameStart)); 805 } else { 806 memcpy(result + strlen(outDirName), &separator, 1); 807 memcpy(result + strlen(outDirName) + 1, filenameStart, strlen(filenameStart)); 808 } 809 810 return result; 811 } 812 813 /* FIO_highbit64() : 814 * gives position of highest bit. 815 * note : only works for v > 0 ! 816 */ 817 static unsigned FIO_highbit64(unsigned long long v) 818 { 819 unsigned count = 0; 820 assert(v != 0); 821 v >>= 1; 822 while (v) { v >>= 1; count++; } 823 return count; 824 } 825 826 static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs, 827 unsigned long long const dictSize, 828 unsigned long long const maxSrcFileSize) 829 { 830 unsigned long long maxSize = MAX(prefs->memLimit, MAX(dictSize, maxSrcFileSize)); 831 unsigned const maxWindowSize = (1U << ZSTD_WINDOWLOG_MAX); 832 if (maxSize == UTIL_FILESIZE_UNKNOWN) 833 EXM_THROW(42, "Using --patch-from with stdin requires --stream-size"); 834 assert(maxSize != UTIL_FILESIZE_UNKNOWN); 835 if (maxSize > maxWindowSize) 836 EXM_THROW(42, "Can't handle files larger than %u GB\n", maxWindowSize/(1 GB)); 837 FIO_setMemLimit(prefs, (unsigned)maxSize); 838 } 839 840 /* FIO_removeMultiFilesWarning() : 841 * Returns 1 if the console should abort, 0 if console should proceed. 842 * This function handles logic when processing multiple files with -o, displaying the appropriate warnings/prompts. 843 * 844 * If -f is specified, or there is just 1 file, zstd will always proceed as usual. 845 * If --rm is specified, there will be a prompt asking for user confirmation. 846 * If -f is specified with --rm, zstd will proceed as usual 847 * If -q is specified with --rm, zstd will abort pre-emptively 848 * If neither flag is specified, zstd will prompt the user for confirmation to proceed. 849 * If --rm is not specified, then zstd will print a warning to the user (which can be silenced with -q). 850 * However, if the output is stdout, we will always abort rather than displaying the warning prompt. 851 */ 852 static int FIO_removeMultiFilesWarning(FIO_ctx_t* const fCtx, const FIO_prefs_t* const prefs, const char* outFileName, int displayLevelCutoff) 853 { 854 int error = 0; 855 if (fCtx->nbFilesTotal > 1 && !prefs->overwrite) { 856 if (g_display_prefs.displayLevel <= displayLevelCutoff) { 857 if (prefs->removeSrcFile) { 858 DISPLAYLEVEL(1, "zstd: Aborting... not deleting files and processing into dst: %s", outFileName); 859 error = 1; 860 } 861 } else { 862 if (!strcmp(outFileName, stdoutmark)) { 863 DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into stdout. "); 864 } else { 865 DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into a single output file: %s ", outFileName); 866 } 867 DISPLAYLEVEL(2, "\nThe concatenated output CANNOT regenerate the original directory tree. ") 868 if (prefs->removeSrcFile) { 869 if (fCtx->hasStdoutOutput) { 870 DISPLAYLEVEL(1, "\nAborting. Use -f if you really want to delete the files and output to stdout"); 871 error = 1; 872 } else { 873 error = g_display_prefs.displayLevel > displayLevelCutoff && UTIL_requireUserConfirmation("This is a destructive operation. Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput); 874 } 875 } 876 } 877 DISPLAY("\n"); 878 } 879 return error; 880 } 881 882 #ifndef ZSTD_NOCOMPRESS 883 884 /* ********************************************************************** 885 * Compression 886 ************************************************************************/ 887 typedef struct { 888 FILE* srcFile; 889 FILE* dstFile; 890 void* srcBuffer; 891 size_t srcBufferSize; 892 void* dstBuffer; 893 size_t dstBufferSize; 894 void* dictBuffer; 895 size_t dictBufferSize; 896 const char* dictFileName; 897 ZSTD_CStream* cctx; 898 } cRess_t; 899 900 static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs, 901 ZSTD_compressionParameters* comprParams, 902 unsigned long long const dictSize, 903 unsigned long long const maxSrcFileSize, 904 int cLevel) 905 { 906 unsigned const fileWindowLog = FIO_highbit64(maxSrcFileSize) + 1; 907 ZSTD_compressionParameters const cParams = ZSTD_getCParams(cLevel, (size_t)maxSrcFileSize, (size_t)dictSize); 908 FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, maxSrcFileSize); 909 if (fileWindowLog > ZSTD_WINDOWLOG_MAX) 910 DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n"); 911 comprParams->windowLog = MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog); 912 if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) { 913 if (!prefs->ldmFlag) 914 DISPLAYLEVEL(1, "long mode automatically triggered\n"); 915 FIO_setLdmFlag(prefs, 1); 916 } 917 if (cParams.strategy >= ZSTD_btopt) { 918 DISPLAYLEVEL(1, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n"); 919 DISPLAYLEVEL(1, "- Use --single-thread mode in the zstd cli\n"); 920 DISPLAYLEVEL(1, "- Set a larger targetLength (eg. --zstd=targetLength=4096)\n"); 921 DISPLAYLEVEL(1, "- Set a larger chainLog (eg. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX); 922 DISPLAYLEVEL(1, "Also consdier playing around with searchLog and hashLog\n"); 923 } 924 } 925 926 static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, 927 const char* dictFileName, unsigned long long const maxSrcFileSize, 928 int cLevel, ZSTD_compressionParameters comprParams) { 929 cRess_t ress; 930 memset(&ress, 0, sizeof(ress)); 931 932 DISPLAYLEVEL(6, "FIO_createCResources \n"); 933 ress.cctx = ZSTD_createCCtx(); 934 if (ress.cctx == NULL) 935 EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx", 936 strerror(errno)); 937 ress.srcBufferSize = ZSTD_CStreamInSize(); 938 ress.srcBuffer = malloc(ress.srcBufferSize); 939 ress.dstBufferSize = ZSTD_CStreamOutSize(); 940 941 /* need to update memLimit before calling createDictBuffer 942 * because of memLimit check inside it */ 943 if (prefs->patchFromMode) { 944 unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize; 945 FIO_adjustParamsForPatchFromMode(prefs, &comprParams, UTIL_getFileSize(dictFileName), ssSize > 0 ? ssSize : maxSrcFileSize, cLevel); 946 } 947 ress.dstBuffer = malloc(ress.dstBufferSize); 948 ress.dictBufferSize = FIO_createDictBuffer(&ress.dictBuffer, dictFileName, prefs); /* works with dictFileName==NULL */ 949 if (!ress.srcBuffer || !ress.dstBuffer) 950 EXM_THROW(31, "allocation error : not enough memory"); 951 952 /* Advanced parameters, including dictionary */ 953 if (dictFileName && (ress.dictBuffer==NULL)) 954 EXM_THROW(32, "allocation error : can't create dictBuffer"); 955 ress.dictFileName = dictFileName; 956 957 if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog) 958 comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT; 959 960 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, prefs->contentSize) ); /* always enable content size when available (note: supposed to be default) */ 961 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) ); 962 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) ); 963 /* compression level */ 964 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) ); 965 /* max compressed block size */ 966 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) ); 967 /* source size hint */ 968 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) ); 969 /* long distance matching */ 970 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) ); 971 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) ); 972 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) ); 973 if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) { 974 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) ); 975 } 976 if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) { 977 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) ); 978 } 979 /* compression parameters */ 980 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) ); 981 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) ); 982 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) ); 983 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) ); 984 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) ); 985 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) ); 986 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, comprParams.strategy) ); 987 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) ); 988 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) ); 989 /* multi-threading */ 990 #ifdef ZSTD_MULTITHREAD 991 DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers); 992 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) ); 993 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) ); 994 if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) { 995 DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog); 996 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) ); 997 } 998 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) ); 999 #endif 1000 /* dictionary */ 1001 if (prefs->patchFromMode) { 1002 CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dictBuffer, ress.dictBufferSize) ); 1003 } else { 1004 CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, ress.dictBuffer, ress.dictBufferSize) ); 1005 } 1006 1007 return ress; 1008 } 1009 1010 static void FIO_freeCResources(const cRess_t* const ress) 1011 { 1012 free(ress->srcBuffer); 1013 free(ress->dstBuffer); 1014 free(ress->dictBuffer); 1015 ZSTD_freeCStream(ress->cctx); /* never fails */ 1016 } 1017 1018 1019 #ifdef ZSTD_GZCOMPRESS 1020 static unsigned long long 1021 FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but not changed */ 1022 const char* srcFileName, U64 const srcFileSize, 1023 int compressionLevel, U64* readsize) 1024 { 1025 unsigned long long inFileSize = 0, outFileSize = 0; 1026 z_stream strm; 1027 1028 if (compressionLevel > Z_BEST_COMPRESSION) 1029 compressionLevel = Z_BEST_COMPRESSION; 1030 1031 strm.zalloc = Z_NULL; 1032 strm.zfree = Z_NULL; 1033 strm.opaque = Z_NULL; 1034 1035 { int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED, 1036 15 /* maxWindowLogSize */ + 16 /* gzip only */, 1037 8, Z_DEFAULT_STRATEGY); /* see http://www.zlib.net/manual.html */ 1038 if (ret != Z_OK) { 1039 EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret); 1040 } } 1041 1042 strm.next_in = 0; 1043 strm.avail_in = 0; 1044 strm.next_out = (Bytef*)ress->dstBuffer; 1045 strm.avail_out = (uInt)ress->dstBufferSize; 1046 1047 while (1) { 1048 int ret; 1049 if (strm.avail_in == 0) { 1050 size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile); 1051 if (inSize == 0) break; 1052 inFileSize += inSize; 1053 strm.next_in = (z_const unsigned char*)ress->srcBuffer; 1054 strm.avail_in = (uInt)inSize; 1055 } 1056 ret = deflate(&strm, Z_NO_FLUSH); 1057 if (ret != Z_OK) 1058 EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret); 1059 { size_t const cSize = ress->dstBufferSize - strm.avail_out; 1060 if (cSize) { 1061 if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize) 1062 EXM_THROW(73, "Write error : cannot write to output file : %s ", strerror(errno)); 1063 outFileSize += cSize; 1064 strm.next_out = (Bytef*)ress->dstBuffer; 1065 strm.avail_out = (uInt)ress->dstBufferSize; 1066 } } 1067 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) { 1068 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", 1069 (unsigned)(inFileSize>>20), 1070 (double)outFileSize/inFileSize*100) 1071 } else { 1072 DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%% ", 1073 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), 1074 (double)outFileSize/inFileSize*100); 1075 } } 1076 1077 while (1) { 1078 int const ret = deflate(&strm, Z_FINISH); 1079 { size_t const cSize = ress->dstBufferSize - strm.avail_out; 1080 if (cSize) { 1081 if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize) 1082 EXM_THROW(75, "Write error : %s ", strerror(errno)); 1083 outFileSize += cSize; 1084 strm.next_out = (Bytef*)ress->dstBuffer; 1085 strm.avail_out = (uInt)ress->dstBufferSize; 1086 } } 1087 if (ret == Z_STREAM_END) break; 1088 if (ret != Z_BUF_ERROR) 1089 EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret); 1090 } 1091 1092 { int const ret = deflateEnd(&strm); 1093 if (ret != Z_OK) { 1094 EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret); 1095 } } 1096 *readsize = inFileSize; 1097 return outFileSize; 1098 } 1099 #endif 1100 1101 1102 #ifdef ZSTD_LZMACOMPRESS 1103 static unsigned long long 1104 FIO_compressLzmaFrame(cRess_t* ress, 1105 const char* srcFileName, U64 const srcFileSize, 1106 int compressionLevel, U64* readsize, int plain_lzma) 1107 { 1108 unsigned long long inFileSize = 0, outFileSize = 0; 1109 lzma_stream strm = LZMA_STREAM_INIT; 1110 lzma_action action = LZMA_RUN; 1111 lzma_ret ret; 1112 1113 if (compressionLevel < 0) compressionLevel = 0; 1114 if (compressionLevel > 9) compressionLevel = 9; 1115 1116 if (plain_lzma) { 1117 lzma_options_lzma opt_lzma; 1118 if (lzma_lzma_preset(&opt_lzma, compressionLevel)) 1119 EXM_THROW(81, "zstd: %s: lzma_lzma_preset error", srcFileName); 1120 ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */ 1121 if (ret != LZMA_OK) 1122 EXM_THROW(82, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret); 1123 } else { 1124 ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */ 1125 if (ret != LZMA_OK) 1126 EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret); 1127 } 1128 1129 strm.next_in = 0; 1130 strm.avail_in = 0; 1131 strm.next_out = (BYTE*)ress->dstBuffer; 1132 strm.avail_out = ress->dstBufferSize; 1133 1134 while (1) { 1135 if (strm.avail_in == 0) { 1136 size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile); 1137 if (inSize == 0) action = LZMA_FINISH; 1138 inFileSize += inSize; 1139 strm.next_in = (BYTE const*)ress->srcBuffer; 1140 strm.avail_in = inSize; 1141 } 1142 1143 ret = lzma_code(&strm, action); 1144 1145 if (ret != LZMA_OK && ret != LZMA_STREAM_END) 1146 EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret); 1147 { size_t const compBytes = ress->dstBufferSize - strm.avail_out; 1148 if (compBytes) { 1149 if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes) 1150 EXM_THROW(85, "Write error : %s", strerror(errno)); 1151 outFileSize += compBytes; 1152 strm.next_out = (BYTE*)ress->dstBuffer; 1153 strm.avail_out = ress->dstBufferSize; 1154 } } 1155 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) 1156 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", 1157 (unsigned)(inFileSize>>20), 1158 (double)outFileSize/inFileSize*100) 1159 else 1160 DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", 1161 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), 1162 (double)outFileSize/inFileSize*100); 1163 if (ret == LZMA_STREAM_END) break; 1164 } 1165 1166 lzma_end(&strm); 1167 *readsize = inFileSize; 1168 1169 return outFileSize; 1170 } 1171 #endif 1172 1173 #ifdef ZSTD_LZ4COMPRESS 1174 1175 #if LZ4_VERSION_NUMBER <= 10600 1176 #define LZ4F_blockLinked blockLinked 1177 #define LZ4F_max64KB max64KB 1178 #endif 1179 1180 static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); } 1181 1182 static unsigned long long 1183 FIO_compressLz4Frame(cRess_t* ress, 1184 const char* srcFileName, U64 const srcFileSize, 1185 int compressionLevel, int checksumFlag, 1186 U64* readsize) 1187 { 1188 const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB); 1189 unsigned long long inFileSize = 0, outFileSize = 0; 1190 1191 LZ4F_preferences_t prefs; 1192 LZ4F_compressionContext_t ctx; 1193 1194 LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); 1195 if (LZ4F_isError(errorCode)) 1196 EXM_THROW(31, "zstd: failed to create lz4 compression context"); 1197 1198 memset(&prefs, 0, sizeof(prefs)); 1199 1200 assert(blockSize <= ress->srcBufferSize); 1201 1202 prefs.autoFlush = 1; 1203 prefs.compressionLevel = compressionLevel; 1204 prefs.frameInfo.blockMode = LZ4F_blockLinked; 1205 prefs.frameInfo.blockSizeID = LZ4F_max64KB; 1206 prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)checksumFlag; 1207 #if LZ4_VERSION_NUMBER >= 10600 1208 prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize; 1209 #endif 1210 assert(LZ4F_compressBound(blockSize, &prefs) <= ress->dstBufferSize); 1211 1212 { 1213 size_t readSize; 1214 size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs); 1215 if (LZ4F_isError(headerSize)) 1216 EXM_THROW(33, "File header generation failed : %s", 1217 LZ4F_getErrorName(headerSize)); 1218 if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize) 1219 EXM_THROW(34, "Write error : %s (cannot write header)", strerror(errno)); 1220 outFileSize += headerSize; 1221 1222 /* Read first block */ 1223 readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); 1224 inFileSize += readSize; 1225 1226 /* Main Loop */ 1227 while (readSize>0) { 1228 size_t const outSize = LZ4F_compressUpdate(ctx, 1229 ress->dstBuffer, ress->dstBufferSize, 1230 ress->srcBuffer, readSize, NULL); 1231 if (LZ4F_isError(outSize)) 1232 EXM_THROW(35, "zstd: %s: lz4 compression failed : %s", 1233 srcFileName, LZ4F_getErrorName(outSize)); 1234 outFileSize += outSize; 1235 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) { 1236 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", 1237 (unsigned)(inFileSize>>20), 1238 (double)outFileSize/inFileSize*100) 1239 } else { 1240 DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", 1241 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), 1242 (double)outFileSize/inFileSize*100); 1243 } 1244 1245 /* Write Block */ 1246 { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile); 1247 if (sizeCheck != outSize) 1248 EXM_THROW(36, "Write error : %s", strerror(errno)); 1249 } 1250 1251 /* Read next block */ 1252 readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); 1253 inFileSize += readSize; 1254 } 1255 if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName); 1256 1257 /* End of Stream mark */ 1258 headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL); 1259 if (LZ4F_isError(headerSize)) 1260 EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s", 1261 srcFileName, LZ4F_getErrorName(headerSize)); 1262 1263 { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile); 1264 if (sizeCheck != headerSize) 1265 EXM_THROW(39, "Write error : %s (cannot write end of stream)", 1266 strerror(errno)); 1267 } 1268 outFileSize += headerSize; 1269 } 1270 1271 *readsize = inFileSize; 1272 LZ4F_freeCompressionContext(ctx); 1273 1274 return outFileSize; 1275 } 1276 #endif 1277 1278 1279 static unsigned long long 1280 FIO_compressZstdFrame(FIO_ctx_t* const fCtx, 1281 FIO_prefs_t* const prefs, 1282 const cRess_t* ressPtr, 1283 const char* srcFileName, U64 fileSize, 1284 int compressionLevel, U64* readsize) 1285 { 1286 cRess_t const ress = *ressPtr; 1287 FILE* const srcFile = ress.srcFile; 1288 FILE* const dstFile = ress.dstFile; 1289 U64 compressedfilesize = 0; 1290 ZSTD_EndDirective directive = ZSTD_e_continue; 1291 1292 /* stats */ 1293 ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 }; 1294 ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 }; 1295 typedef enum { noChange, slower, faster } speedChange_e; 1296 speedChange_e speedChange = noChange; 1297 unsigned flushWaiting = 0; 1298 unsigned inputPresented = 0; 1299 unsigned inputBlocked = 0; 1300 unsigned lastJobID = 0; 1301 1302 DISPLAYLEVEL(6, "compression using zstd format \n"); 1303 1304 /* init */ 1305 if (fileSize != UTIL_FILESIZE_UNKNOWN) { 1306 CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize)); 1307 } else if (prefs->streamSrcSize > 0) { 1308 /* unknown source size; use the declared stream size */ 1309 CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) ); 1310 } 1311 (void)srcFileName; 1312 1313 /* Main compression loop */ 1314 do { 1315 size_t stillToFlush; 1316 /* Fill input Buffer */ 1317 size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile); 1318 ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 }; 1319 DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize); 1320 *readsize += inSize; 1321 1322 if ((inSize == 0) || (*readsize == fileSize)) 1323 directive = ZSTD_e_end; 1324 1325 stillToFlush = 1; 1326 while ((inBuff.pos != inBuff.size) /* input buffer must be entirely ingested */ 1327 || (directive == ZSTD_e_end && stillToFlush != 0) ) { 1328 1329 size_t const oldIPos = inBuff.pos; 1330 ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 }; 1331 size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx); 1332 CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive)); 1333 1334 /* count stats */ 1335 inputPresented++; 1336 if (oldIPos == inBuff.pos) inputBlocked++; /* input buffer is full and can't take any more : input speed is faster than consumption rate */ 1337 if (!toFlushNow) flushWaiting = 1; 1338 1339 /* Write compressed stream */ 1340 DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n", 1341 (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos); 1342 if (outBuff.pos) { 1343 size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile); 1344 if (sizeCheck != outBuff.pos) 1345 EXM_THROW(25, "Write error : %s (cannot write compressed block)", 1346 strerror(errno)); 1347 compressedfilesize += outBuff.pos; 1348 } 1349 1350 /* display notification; and adapt compression level */ 1351 if (READY_FOR_UPDATE()) { 1352 ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx); 1353 double const cShare = (double)zfp.produced / (zfp.consumed + !zfp.consumed/*avoid div0*/) * 100; 1354 1355 /* display progress notifications */ 1356 if (g_display_prefs.displayLevel >= 3) { 1357 DISPLAYUPDATE(3, "\r(L%i) Buffered :%4u MB - Consumed :%4u MB - Compressed :%4u MB => %.2f%% ", 1358 compressionLevel, 1359 (unsigned)((zfp.ingested - zfp.consumed) >> 20), 1360 (unsigned)(zfp.consumed >> 20), 1361 (unsigned)(zfp.produced >> 20), 1362 cShare ); 1363 } else { /* summarized notifications if == 2 */ 1364 DISPLAYLEVEL(2, "\r%79s\r", ""); /* Clear out the current displayed line */ 1365 if (fCtx->nbFilesTotal > 1) { 1366 size_t srcFileNameSize = strlen(srcFileName); 1367 /* Ensure that the string we print is roughly the same size each time */ 1368 if (srcFileNameSize > 18) { 1369 const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15; 1370 DISPLAYLEVEL(2, "Compress: %u/%u files. Current: ...%s ", 1371 fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName); 1372 } else { 1373 DISPLAYLEVEL(2, "Compress: %u/%u files. Current: %*s ", 1374 fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName); 1375 } 1376 } 1377 DISPLAYLEVEL(2, "Read : %2u ", (unsigned)(zfp.consumed >> 20)); 1378 if (fileSize != UTIL_FILESIZE_UNKNOWN) 1379 DISPLAYLEVEL(2, "/ %2u ", (unsigned)(fileSize >> 20)); 1380 DISPLAYLEVEL(2, "MB ==> %2.f%%", cShare); 1381 DELAY_NEXT_UPDATE(); 1382 } 1383 1384 /* adaptive mode : statistics measurement and speed correction */ 1385 if (prefs->adaptiveMode) { 1386 1387 /* check output speed */ 1388 if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */ 1389 1390 unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced; 1391 unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed; 1392 assert(zfp.produced >= previous_zfp_update.produced); 1393 assert(prefs->nbWorkers >= 1); 1394 1395 /* test if compression is blocked 1396 * either because output is slow and all buffers are full 1397 * or because input is slow and no job can start while waiting for at least one buffer to be filled. 1398 * note : exclude starting part, since currentJobID > 1 */ 1399 if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/ 1400 && (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */ 1401 ) { 1402 DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n") 1403 speedChange = slower; 1404 } 1405 1406 previous_zfp_update = zfp; 1407 1408 if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */ 1409 && (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */ 1410 ) { 1411 DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed); 1412 speedChange = slower; 1413 } 1414 flushWaiting = 0; 1415 } 1416 1417 /* course correct only if there is at least one new job completed */ 1418 if (zfp.currentJobID > lastJobID) { 1419 DISPLAYLEVEL(6, "compression level adaptation check \n") 1420 1421 /* check input speed */ 1422 if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */ 1423 if (inputBlocked <= 0) { 1424 DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n"); 1425 speedChange = slower; 1426 } else if (speedChange == noChange) { 1427 unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested; 1428 unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed; 1429 unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced; 1430 unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed; 1431 previous_zfp_correction = zfp; 1432 assert(inputPresented > 0); 1433 DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n", 1434 inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100, 1435 (unsigned)newlyIngested, (unsigned)newlyConsumed, 1436 (unsigned)newlyFlushed, (unsigned)newlyProduced); 1437 if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */ 1438 && (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */ 1439 && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */ 1440 ) { 1441 DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n", 1442 newlyIngested, newlyConsumed, newlyProduced, newlyFlushed); 1443 speedChange = faster; 1444 } 1445 } 1446 inputBlocked = 0; 1447 inputPresented = 0; 1448 } 1449 1450 if (speedChange == slower) { 1451 DISPLAYLEVEL(6, "slower speed , higher compression \n") 1452 compressionLevel ++; 1453 if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel(); 1454 if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel; 1455 compressionLevel += (compressionLevel == 0); /* skip 0 */ 1456 ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); 1457 } 1458 if (speedChange == faster) { 1459 DISPLAYLEVEL(6, "faster speed , lighter compression \n") 1460 compressionLevel --; 1461 if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel; 1462 compressionLevel -= (compressionLevel == 0); /* skip 0 */ 1463 ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); 1464 } 1465 speedChange = noChange; 1466 1467 lastJobID = zfp.currentJobID; 1468 } /* if (zfp.currentJobID > lastJobID) */ 1469 } /* if (g_adaptiveMode) */ 1470 } /* if (READY_FOR_UPDATE()) */ 1471 } /* while ((inBuff.pos != inBuff.size) */ 1472 } while (directive != ZSTD_e_end); 1473 1474 if (ferror(srcFile)) { 1475 EXM_THROW(26, "Read error : I/O error"); 1476 } 1477 if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) { 1478 EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B", 1479 (unsigned long long)*readsize, (unsigned long long)fileSize); 1480 } 1481 1482 return compressedfilesize; 1483 } 1484 1485 /*! FIO_compressFilename_internal() : 1486 * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened. 1487 * @return : 0 : compression completed correctly, 1488 * 1 : missing or pb opening srcFileName 1489 */ 1490 static int 1491 FIO_compressFilename_internal(FIO_ctx_t* const fCtx, 1492 FIO_prefs_t* const prefs, 1493 cRess_t ress, 1494 const char* dstFileName, const char* srcFileName, 1495 int compressionLevel) 1496 { 1497 UTIL_time_t const timeStart = UTIL_getTime(); 1498 clock_t const cpuStart = clock(); 1499 U64 readsize = 0; 1500 U64 compressedfilesize = 0; 1501 U64 const fileSize = UTIL_getFileSize(srcFileName); 1502 DISPLAYLEVEL(5, "%s: %u bytes \n", srcFileName, (unsigned)fileSize); 1503 1504 /* compression format selection */ 1505 switch (prefs->compressionType) { 1506 default: 1507 case FIO_zstdCompression: 1508 compressedfilesize = FIO_compressZstdFrame(fCtx, prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize); 1509 break; 1510 1511 case FIO_gzipCompression: 1512 #ifdef ZSTD_GZCOMPRESS 1513 compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize); 1514 #else 1515 (void)compressionLevel; 1516 EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", 1517 srcFileName); 1518 #endif 1519 break; 1520 1521 case FIO_xzCompression: 1522 case FIO_lzmaCompression: 1523 #ifdef ZSTD_LZMACOMPRESS 1524 compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_lzmaCompression); 1525 #else 1526 (void)compressionLevel; 1527 EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n", 1528 srcFileName); 1529 #endif 1530 break; 1531 1532 case FIO_lz4Compression: 1533 #ifdef ZSTD_LZ4COMPRESS 1534 compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, prefs->checksumFlag, &readsize); 1535 #else 1536 (void)compressionLevel; 1537 EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n", 1538 srcFileName); 1539 #endif 1540 break; 1541 } 1542 1543 /* Status */ 1544 fCtx->totalBytesInput += (size_t)readsize; 1545 fCtx->totalBytesOutput += (size_t)compressedfilesize; 1546 DISPLAYLEVEL(2, "\r%79s\r", ""); 1547 if (g_display_prefs.displayLevel >= 2 && 1548 !fCtx->hasStdoutOutput && 1549 (g_display_prefs.displayLevel >= 3 || fCtx->nbFilesTotal <= 1)) { 1550 if (readsize == 0) { 1551 DISPLAYLEVEL(2,"%-20s : (%6llu => %6llu bytes, %s) \n", 1552 srcFileName, 1553 (unsigned long long)readsize, (unsigned long long) compressedfilesize, 1554 dstFileName); 1555 } else { 1556 DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6llu => %6llu bytes, %s) \n", 1557 srcFileName, 1558 (double)compressedfilesize / readsize * 100, 1559 (unsigned long long)readsize, (unsigned long long) compressedfilesize, 1560 dstFileName); 1561 } 1562 } 1563 1564 /* Elapsed Time and CPU Load */ 1565 { clock_t const cpuEnd = clock(); 1566 double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC; 1567 U64 const timeLength_ns = UTIL_clockSpanNano(timeStart); 1568 double const timeLength_s = (double)timeLength_ns / 1000000000; 1569 double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100; 1570 DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec (cpu load : %.0f%%)\n", 1571 srcFileName, timeLength_s, cpuLoad_pct); 1572 } 1573 return 0; 1574 } 1575 1576 1577 /*! FIO_compressFilename_dstFile() : 1578 * open dstFileName, or pass-through if ress.dstFile != NULL, 1579 * then start compression with FIO_compressFilename_internal(). 1580 * Manages source removal (--rm) and file permissions transfer. 1581 * note : ress.srcFile must be != NULL, 1582 * so reach this function through FIO_compressFilename_srcFile(). 1583 * @return : 0 : compression completed correctly, 1584 * 1 : pb 1585 */ 1586 static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx, 1587 FIO_prefs_t* const prefs, 1588 cRess_t ress, 1589 const char* dstFileName, 1590 const char* srcFileName, 1591 int compressionLevel) 1592 { 1593 int closeDstFile = 0; 1594 int result; 1595 stat_t statbuf; 1596 int transfer_permissions = 0; 1597 assert(ress.srcFile != NULL); 1598 if (ress.dstFile == NULL) { 1599 closeDstFile = 1; 1600 DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName); 1601 ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName); 1602 if (ress.dstFile==NULL) return 1; /* could not open dstFileName */ 1603 /* Must only be added after FIO_openDstFile() succeeds. 1604 * Otherwise we may delete the destination file if it already exists, 1605 * and the user presses Ctrl-C when asked if they wish to overwrite. 1606 */ 1607 addHandler(dstFileName); 1608 1609 if ( strcmp (srcFileName, stdinmark) 1610 && UTIL_stat(srcFileName, &statbuf) 1611 && UTIL_isRegularFileStat(&statbuf) ) 1612 transfer_permissions = 1; 1613 } 1614 1615 result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); 1616 1617 if (closeDstFile) { 1618 FILE* const dstFile = ress.dstFile; 1619 ress.dstFile = NULL; 1620 1621 clearHandler(); 1622 1623 DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName); 1624 if (fclose(dstFile)) { /* error closing dstFile */ 1625 DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); 1626 result=1; 1627 } 1628 if ( (result != 0) /* operation failure */ 1629 && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */ 1630 ) { 1631 FIO_removeFile(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */ 1632 } else if (transfer_permissions) { 1633 DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: transferring permissions into dst: %s \n", dstFileName); 1634 UTIL_setFileStat(dstFileName, &statbuf); 1635 } else { 1636 DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: do not transfer permissions into dst: %s \n", dstFileName); 1637 } 1638 } 1639 1640 return result; 1641 } 1642 1643 /* List used to compare file extensions (used with --exclude-compressed flag) 1644 * Different from the suffixList and should only apply to ZSTD compress operationResult 1645 */ 1646 static const char *compressedFileExtensions[] = { 1647 ZSTD_EXTENSION, 1648 TZSTD_EXTENSION, 1649 GZ_EXTENSION, 1650 TGZ_EXTENSION, 1651 LZMA_EXTENSION, 1652 XZ_EXTENSION, 1653 TXZ_EXTENSION, 1654 LZ4_EXTENSION, 1655 TLZ4_EXTENSION, 1656 NULL 1657 }; 1658 1659 /*! FIO_compressFilename_srcFile() : 1660 * @return : 0 : compression completed correctly, 1661 * 1 : missing or pb opening srcFileName 1662 */ 1663 static int 1664 FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx, 1665 FIO_prefs_t* const prefs, 1666 cRess_t ress, 1667 const char* dstFileName, 1668 const char* srcFileName, 1669 int compressionLevel) 1670 { 1671 int result; 1672 DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName); 1673 1674 /* ensure src is not a directory */ 1675 if (UTIL_isDirectory(srcFileName)) { 1676 DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); 1677 return 1; 1678 } 1679 1680 /* ensure src is not the same as dict (if present) */ 1681 if (ress.dictFileName != NULL && UTIL_isSameFile(srcFileName, ress.dictFileName)) { 1682 DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName); 1683 return 1; 1684 } 1685 1686 /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used 1687 * YES => ZSTD will skip compression of the file and will return 0. 1688 * NO => ZSTD will resume with compress operation. 1689 */ 1690 if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) { 1691 DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName); 1692 return 0; 1693 } 1694 1695 ress.srcFile = FIO_openSrcFile(srcFileName); 1696 if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */ 1697 1698 result = FIO_compressFilename_dstFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); 1699 1700 fclose(ress.srcFile); 1701 ress.srcFile = NULL; 1702 if ( prefs->removeSrcFile /* --rm */ 1703 && result == 0 /* success */ 1704 && strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */ 1705 ) { 1706 /* We must clear the handler, since after this point calling it would 1707 * delete both the source and destination files. 1708 */ 1709 clearHandler(); 1710 if (FIO_removeFile(srcFileName)) 1711 EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno)); 1712 } 1713 return result; 1714 } 1715 1716 int FIO_compressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* dstFileName, 1717 const char* srcFileName, const char* dictFileName, 1718 int compressionLevel, ZSTD_compressionParameters comprParams) 1719 { 1720 cRess_t const ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams); 1721 int const result = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); 1722 1723 #define DISPLAY_LEVEL_DEFAULT 2 1724 1725 FIO_freeCResources(&ress); 1726 return result; 1727 } 1728 1729 /* FIO_determineCompressedName() : 1730 * create a destination filename for compressed srcFileName. 1731 * @return a pointer to it. 1732 * This function never returns an error (it may abort() in case of pb) 1733 */ 1734 static const char* 1735 FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix) 1736 { 1737 static size_t dfnbCapacity = 0; 1738 static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ 1739 char* outDirFilename = NULL; 1740 size_t sfnSize = strlen(srcFileName); 1741 size_t const srcSuffixLen = strlen(suffix); 1742 if (outDirName) { 1743 outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen); 1744 sfnSize = strlen(outDirFilename); 1745 assert(outDirFilename != NULL); 1746 } 1747 1748 if (dfnbCapacity <= sfnSize+srcSuffixLen+1) { 1749 /* resize buffer for dstName */ 1750 free(dstFileNameBuffer); 1751 dfnbCapacity = sfnSize + srcSuffixLen + 30; 1752 dstFileNameBuffer = (char*)malloc(dfnbCapacity); 1753 if (!dstFileNameBuffer) { 1754 EXM_THROW(30, "zstd: %s", strerror(errno)); 1755 } 1756 } 1757 assert(dstFileNameBuffer != NULL); 1758 1759 if (outDirFilename) { 1760 memcpy(dstFileNameBuffer, outDirFilename, sfnSize); 1761 free(outDirFilename); 1762 } else { 1763 memcpy(dstFileNameBuffer, srcFileName, sfnSize); 1764 } 1765 memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */); 1766 return dstFileNameBuffer; 1767 } 1768 1769 static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles) 1770 { 1771 size_t i; 1772 unsigned long long fileSize, maxFileSize = 0; 1773 for (i = 0; i < nbFiles; i++) { 1774 fileSize = UTIL_getFileSize(inFileNames[i]); 1775 maxFileSize = fileSize > maxFileSize ? fileSize : maxFileSize; 1776 } 1777 return maxFileSize; 1778 } 1779 1780 /* FIO_compressMultipleFilenames() : 1781 * compress nbFiles files 1782 * into either one destination (outFileName), 1783 * or into one file each (outFileName == NULL, but suffix != NULL), 1784 * or into a destination folder (specified with -O) 1785 */ 1786 int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, 1787 FIO_prefs_t* const prefs, 1788 const char** inFileNamesTable, 1789 const char* outMirroredRootDirName, 1790 const char* outDirName, 1791 const char* outFileName, const char* suffix, 1792 const char* dictFileName, int compressionLevel, 1793 ZSTD_compressionParameters comprParams) 1794 { 1795 int status; 1796 int error = 0; 1797 cRess_t ress = FIO_createCResources(prefs, dictFileName, 1798 FIO_getLargestFileSize(inFileNamesTable, fCtx->nbFilesTotal), 1799 compressionLevel, comprParams); 1800 1801 /* init */ 1802 assert(outFileName != NULL || suffix != NULL); 1803 if (outFileName != NULL) { /* output into a single destination (stdout typically) */ 1804 if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) { 1805 FIO_freeCResources(&ress); 1806 return 1; 1807 } 1808 ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName); 1809 if (ress.dstFile == NULL) { /* could not open outFileName */ 1810 error = 1; 1811 } else { 1812 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) { 1813 status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel); 1814 if (!status) fCtx->nbFilesProcessed++; 1815 error |= status; 1816 } 1817 if (fclose(ress.dstFile)) 1818 EXM_THROW(29, "Write error (%s) : cannot properly close %s", 1819 strerror(errno), outFileName); 1820 ress.dstFile = NULL; 1821 } 1822 } else { 1823 if (outMirroredRootDirName) 1824 UTIL_mirrorSourceFilesDirectories(inFileNamesTable, fCtx->nbFilesTotal, outMirroredRootDirName); 1825 1826 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) { 1827 const char* const srcFileName = inFileNamesTable[fCtx->currFileIdx]; 1828 const char* dstFileName = NULL; 1829 if (outMirroredRootDirName) { 1830 char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName); 1831 if (validMirroredDirName) { 1832 dstFileName = FIO_determineCompressedName(srcFileName, validMirroredDirName, suffix); 1833 free(validMirroredDirName); 1834 } else { 1835 DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot compress '%s' into '%s' \n", srcFileName, outMirroredRootDirName); 1836 error=1; 1837 continue; 1838 } 1839 } else { 1840 dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */ 1841 } 1842 status = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); 1843 if (!status) fCtx->nbFilesProcessed++; 1844 error |= status; 1845 } 1846 1847 if (outDirName) 1848 FIO_checkFilenameCollisions(inFileNamesTable , fCtx->nbFilesTotal); 1849 } 1850 1851 if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesInput != 0) { 1852 DISPLAYLEVEL(2, "\r%79s\r", ""); 1853 DISPLAYLEVEL(2, "%d files compressed : %.2f%% (%6zu => %6zu bytes)\n", fCtx->nbFilesProcessed, 1854 (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100, 1855 fCtx->totalBytesInput, fCtx->totalBytesOutput); 1856 } 1857 1858 FIO_freeCResources(&ress); 1859 return error; 1860 } 1861 1862 #endif /* #ifndef ZSTD_NOCOMPRESS */ 1863 1864 1865 1866 #ifndef ZSTD_NODECOMPRESS 1867 1868 /* ************************************************************************** 1869 * Decompression 1870 ***************************************************************************/ 1871 typedef struct { 1872 void* srcBuffer; 1873 size_t srcBufferSize; 1874 size_t srcBufferLoaded; 1875 void* dstBuffer; 1876 size_t dstBufferSize; 1877 ZSTD_DStream* dctx; 1878 FILE* dstFile; 1879 } dRess_t; 1880 1881 static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName) 1882 { 1883 dRess_t ress; 1884 memset(&ress, 0, sizeof(ress)); 1885 1886 if (prefs->patchFromMode) 1887 FIO_adjustMemLimitForPatchFromMode(prefs, UTIL_getFileSize(dictFileName), 0 /* just use the dict size */); 1888 1889 /* Allocation */ 1890 ress.dctx = ZSTD_createDStream(); 1891 if (ress.dctx==NULL) 1892 EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno)); 1893 CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) ); 1894 CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag)); 1895 1896 ress.srcBufferSize = ZSTD_DStreamInSize(); 1897 ress.srcBuffer = malloc(ress.srcBufferSize); 1898 ress.dstBufferSize = ZSTD_DStreamOutSize(); 1899 ress.dstBuffer = malloc(ress.dstBufferSize); 1900 if (!ress.srcBuffer || !ress.dstBuffer) 1901 EXM_THROW(61, "Allocation error : not enough memory"); 1902 1903 /* dictionary */ 1904 { void* dictBuffer; 1905 size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs); 1906 CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) ); 1907 free(dictBuffer); 1908 } 1909 1910 return ress; 1911 } 1912 1913 static void FIO_freeDResources(dRess_t ress) 1914 { 1915 CHECK( ZSTD_freeDStream(ress.dctx) ); 1916 free(ress.srcBuffer); 1917 free(ress.dstBuffer); 1918 } 1919 1920 1921 /** FIO_fwriteSparse() : 1922 * @return : storedSkips, 1923 * argument for next call to FIO_fwriteSparse() or FIO_fwriteSparseEnd() */ 1924 static unsigned 1925 FIO_fwriteSparse(FILE* file, 1926 const void* buffer, size_t bufferSize, 1927 const FIO_prefs_t* const prefs, 1928 unsigned storedSkips) 1929 { 1930 const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */ 1931 size_t bufferSizeT = bufferSize / sizeof(size_t); 1932 const size_t* const bufferTEnd = bufferT + bufferSizeT; 1933 const size_t* ptrT = bufferT; 1934 static const size_t segmentSizeT = (32 KB) / sizeof(size_t); /* check every 32 KB */ 1935 1936 if (prefs->testMode) return 0; /* do not output anything in test mode */ 1937 1938 if (!prefs->sparseFileSupport) { /* normal write */ 1939 size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file); 1940 if (sizeCheck != bufferSize) 1941 EXM_THROW(70, "Write error : cannot write decoded block : %s", 1942 strerror(errno)); 1943 return 0; 1944 } 1945 1946 /* avoid int overflow */ 1947 if (storedSkips > 1 GB) { 1948 if (LONG_SEEK(file, 1 GB, SEEK_CUR) != 0) 1949 EXM_THROW(91, "1 GB skip error (sparse file support)"); 1950 storedSkips -= 1 GB; 1951 } 1952 1953 while (ptrT < bufferTEnd) { 1954 size_t nb0T; 1955 1956 /* adjust last segment if < 32 KB */ 1957 size_t seg0SizeT = segmentSizeT; 1958 if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT; 1959 bufferSizeT -= seg0SizeT; 1960 1961 /* count leading zeroes */ 1962 for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ; 1963 storedSkips += (unsigned)(nb0T * sizeof(size_t)); 1964 1965 if (nb0T != seg0SizeT) { /* not all 0s */ 1966 size_t const nbNon0ST = seg0SizeT - nb0T; 1967 /* skip leading zeros */ 1968 if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0) 1969 EXM_THROW(92, "Sparse skip error ; try --no-sparse"); 1970 storedSkips = 0; 1971 /* write the rest */ 1972 if (fwrite(ptrT + nb0T, sizeof(size_t), nbNon0ST, file) != nbNon0ST) 1973 EXM_THROW(93, "Write error : cannot write decoded block : %s", 1974 strerror(errno)); 1975 } 1976 ptrT += seg0SizeT; 1977 } 1978 1979 { static size_t const maskT = sizeof(size_t)-1; 1980 if (bufferSize & maskT) { 1981 /* size not multiple of sizeof(size_t) : implies end of block */ 1982 const char* const restStart = (const char*)bufferTEnd; 1983 const char* restPtr = restStart; 1984 const char* const restEnd = (const char*)buffer + bufferSize; 1985 assert(restEnd > restStart && restEnd < restStart + sizeof(size_t)); 1986 for ( ; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ; 1987 storedSkips += (unsigned) (restPtr - restStart); 1988 if (restPtr != restEnd) { 1989 /* not all remaining bytes are 0 */ 1990 size_t const restSize = (size_t)(restEnd - restPtr); 1991 if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0) 1992 EXM_THROW(92, "Sparse skip error ; try --no-sparse"); 1993 if (fwrite(restPtr, 1, restSize, file) != restSize) 1994 EXM_THROW(95, "Write error : cannot write end of decoded block : %s", 1995 strerror(errno)); 1996 storedSkips = 0; 1997 } } } 1998 1999 return storedSkips; 2000 } 2001 2002 static void 2003 FIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips) 2004 { 2005 if (prefs->testMode) assert(storedSkips == 0); 2006 if (storedSkips>0) { 2007 assert(prefs->sparseFileSupport > 0); /* storedSkips>0 implies sparse support is enabled */ 2008 (void)prefs; /* assert can be disabled, in which case prefs becomes unused */ 2009 if (LONG_SEEK(file, storedSkips-1, SEEK_CUR) != 0) 2010 EXM_THROW(69, "Final skip error (sparse file support)"); 2011 /* last zero must be explicitly written, 2012 * so that skipped ones get implicitly translated as zero by FS */ 2013 { const char lastZeroByte[1] = { 0 }; 2014 if (fwrite(lastZeroByte, 1, 1, file) != 1) 2015 EXM_THROW(69, "Write error : cannot write last zero : %s", strerror(errno)); 2016 } } 2017 } 2018 2019 2020 /** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode 2021 @return : 0 (no error) */ 2022 static int FIO_passThrough(const FIO_prefs_t* const prefs, 2023 FILE* foutput, FILE* finput, 2024 void* buffer, size_t bufferSize, 2025 size_t alreadyLoaded) 2026 { 2027 size_t const blockSize = MIN(64 KB, bufferSize); 2028 size_t readFromInput; 2029 unsigned storedSkips = 0; 2030 2031 /* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */ 2032 { size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput); 2033 if (sizeCheck != alreadyLoaded) { 2034 DISPLAYLEVEL(1, "Pass-through write error : %s\n", strerror(errno)); 2035 return 1; 2036 } } 2037 2038 do { 2039 readFromInput = fread(buffer, 1, blockSize, finput); 2040 storedSkips = FIO_fwriteSparse(foutput, buffer, readFromInput, prefs, storedSkips); 2041 } while (readFromInput == blockSize); 2042 if (ferror(finput)) { 2043 DISPLAYLEVEL(1, "Pass-through read error : %s\n", strerror(errno)); 2044 return 1; 2045 } 2046 assert(feof(finput)); 2047 2048 FIO_fwriteSparseEnd(prefs, foutput, storedSkips); 2049 return 0; 2050 } 2051 2052 /* FIO_zstdErrorHelp() : 2053 * detailed error message when requested window size is too large */ 2054 static void 2055 FIO_zstdErrorHelp(const FIO_prefs_t* const prefs, 2056 const dRess_t* ress, 2057 size_t err, const char* srcFileName) 2058 { 2059 ZSTD_frameHeader header; 2060 2061 /* Help message only for one specific error */ 2062 if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge) 2063 return; 2064 2065 /* Try to decode the frame header */ 2066 err = ZSTD_getFrameHeader(&header, ress->srcBuffer, ress->srcBufferLoaded); 2067 if (err == 0) { 2068 unsigned long long const windowSize = header.windowSize; 2069 unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0); 2070 assert(prefs->memLimit > 0); 2071 DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u \n", 2072 srcFileName, windowSize, prefs->memLimit); 2073 if (windowLog <= ZSTD_WINDOWLOG_MAX) { 2074 unsigned const windowMB = (unsigned)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0)); 2075 assert(windowSize < (U64)(1ULL << 52)); /* ensure now overflow for windowMB */ 2076 DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB \n", 2077 srcFileName, windowLog, windowMB); 2078 return; 2079 } } 2080 DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported \n", 2081 srcFileName, ZSTD_WINDOWLOG_MAX); 2082 } 2083 2084 /** FIO_decompressFrame() : 2085 * @return : size of decoded zstd frame, or an error code 2086 */ 2087 #define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2)) 2088 static unsigned long long 2089 FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput, 2090 const FIO_prefs_t* const prefs, 2091 const char* srcFileName, 2092 U64 alreadyDecoded) /* for multi-frames streams */ 2093 { 2094 U64 frameSize = 0; 2095 U32 storedSkips = 0; 2096 2097 /* display last 20 characters only */ 2098 { size_t const srcFileLength = strlen(srcFileName); 2099 if (srcFileLength>20) srcFileName += srcFileLength-20; 2100 } 2101 2102 ZSTD_resetDStream(ress->dctx); 2103 2104 /* Header loading : ensures ZSTD_getFrameHeader() will succeed */ 2105 { size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX; 2106 if (ress->srcBufferLoaded < toDecode) { 2107 size_t const toRead = toDecode - ress->srcBufferLoaded; 2108 void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded; 2109 ress->srcBufferLoaded += fread(startPosition, 1, toRead, finput); 2110 } } 2111 2112 /* Main decompression Loop */ 2113 while (1) { 2114 ZSTD_inBuffer inBuff = { ress->srcBuffer, ress->srcBufferLoaded, 0 }; 2115 ZSTD_outBuffer outBuff= { ress->dstBuffer, ress->dstBufferSize, 0 }; 2116 size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff); 2117 if (ZSTD_isError(readSizeHint)) { 2118 DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n", 2119 srcFileName, ZSTD_getErrorName(readSizeHint)); 2120 FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName); 2121 return FIO_ERROR_FRAME_DECODING; 2122 } 2123 2124 /* Write block */ 2125 storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, prefs, storedSkips); 2126 frameSize += outBuff.pos; 2127 if (!fCtx->hasStdoutOutput) { 2128 if (fCtx->nbFilesTotal > 1) { 2129 size_t srcFileNameSize = strlen(srcFileName); 2130 if (srcFileNameSize > 18) { 2131 const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15; 2132 DISPLAYUPDATE(2, "\rDecompress: %2u/%2u files. Current: ...%s : %u MB... ", 2133 fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) ); 2134 } else { 2135 DISPLAYUPDATE(2, "\rDecompress: %2u/%2u files. Current: %s : %u MB... ", 2136 fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) ); 2137 } 2138 } else { 2139 DISPLAYUPDATE(2, "\r%-20.20s : %u MB... ", 2140 srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) ); 2141 } 2142 } 2143 2144 if (inBuff.pos > 0) { 2145 memmove(ress->srcBuffer, (char*)ress->srcBuffer + inBuff.pos, inBuff.size - inBuff.pos); 2146 ress->srcBufferLoaded -= inBuff.pos; 2147 } 2148 2149 if (readSizeHint == 0) break; /* end of frame */ 2150 2151 /* Fill input buffer */ 2152 { size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize); /* support large skippable frames */ 2153 if (ress->srcBufferLoaded < toDecode) { 2154 size_t const toRead = toDecode - ress->srcBufferLoaded; /* > 0 */ 2155 void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded; 2156 size_t const readSize = fread(startPosition, 1, toRead, finput); 2157 if (readSize==0) { 2158 DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n", 2159 srcFileName); 2160 return FIO_ERROR_FRAME_DECODING; 2161 } 2162 ress->srcBufferLoaded += readSize; 2163 } } } 2164 2165 FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips); 2166 2167 return frameSize; 2168 } 2169 2170 2171 #ifdef ZSTD_GZDECOMPRESS 2172 static unsigned long long 2173 FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, 2174 const FIO_prefs_t* const prefs, 2175 const char* srcFileName) 2176 { 2177 unsigned long long outFileSize = 0; 2178 z_stream strm; 2179 int flush = Z_NO_FLUSH; 2180 int decodingError = 0; 2181 unsigned storedSkips = 0; 2182 2183 strm.zalloc = Z_NULL; 2184 strm.zfree = Z_NULL; 2185 strm.opaque = Z_NULL; 2186 strm.next_in = 0; 2187 strm.avail_in = 0; 2188 /* see http://www.zlib.net/manual.html */ 2189 if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK) 2190 return FIO_ERROR_FRAME_DECODING; 2191 2192 strm.next_out = (Bytef*)ress->dstBuffer; 2193 strm.avail_out = (uInt)ress->dstBufferSize; 2194 strm.avail_in = (uInt)ress->srcBufferLoaded; 2195 strm.next_in = (z_const unsigned char*)ress->srcBuffer; 2196 2197 for ( ; ; ) { 2198 int ret; 2199 if (strm.avail_in == 0) { 2200 ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); 2201 if (ress->srcBufferLoaded == 0) flush = Z_FINISH; 2202 strm.next_in = (z_const unsigned char*)ress->srcBuffer; 2203 strm.avail_in = (uInt)ress->srcBufferLoaded; 2204 } 2205 ret = inflate(&strm, flush); 2206 if (ret == Z_BUF_ERROR) { 2207 DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName); 2208 decodingError = 1; break; 2209 } 2210 if (ret != Z_OK && ret != Z_STREAM_END) { 2211 DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret); 2212 decodingError = 1; break; 2213 } 2214 { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; 2215 if (decompBytes) { 2216 storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decompBytes, prefs, storedSkips); 2217 outFileSize += decompBytes; 2218 strm.next_out = (Bytef*)ress->dstBuffer; 2219 strm.avail_out = (uInt)ress->dstBufferSize; 2220 } 2221 } 2222 if (ret == Z_STREAM_END) break; 2223 } 2224 2225 if (strm.avail_in > 0) 2226 memmove(ress->srcBuffer, strm.next_in, strm.avail_in); 2227 ress->srcBufferLoaded = strm.avail_in; 2228 if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */ 2229 && (decodingError==0) ) { 2230 DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName); 2231 decodingError = 1; 2232 } 2233 FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips); 2234 return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; 2235 } 2236 #endif 2237 2238 2239 #ifdef ZSTD_LZMADECOMPRESS 2240 static unsigned long long 2241 FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, 2242 const FIO_prefs_t* const prefs, 2243 const char* srcFileName, int plain_lzma) 2244 { 2245 unsigned long long outFileSize = 0; 2246 lzma_stream strm = LZMA_STREAM_INIT; 2247 lzma_action action = LZMA_RUN; 2248 lzma_ret initRet; 2249 int decodingError = 0; 2250 unsigned storedSkips = 0; 2251 2252 strm.next_in = 0; 2253 strm.avail_in = 0; 2254 if (plain_lzma) { 2255 initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */ 2256 } else { 2257 initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */ 2258 } 2259 2260 if (initRet != LZMA_OK) { 2261 DISPLAYLEVEL(1, "zstd: %s: %s error %d \n", 2262 plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder", 2263 srcFileName, initRet); 2264 return FIO_ERROR_FRAME_DECODING; 2265 } 2266 2267 strm.next_out = (BYTE*)ress->dstBuffer; 2268 strm.avail_out = ress->dstBufferSize; 2269 strm.next_in = (BYTE const*)ress->srcBuffer; 2270 strm.avail_in = ress->srcBufferLoaded; 2271 2272 for ( ; ; ) { 2273 lzma_ret ret; 2274 if (strm.avail_in == 0) { 2275 ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); 2276 if (ress->srcBufferLoaded == 0) action = LZMA_FINISH; 2277 strm.next_in = (BYTE const*)ress->srcBuffer; 2278 strm.avail_in = ress->srcBufferLoaded; 2279 } 2280 ret = lzma_code(&strm, action); 2281 2282 if (ret == LZMA_BUF_ERROR) { 2283 DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName); 2284 decodingError = 1; break; 2285 } 2286 if (ret != LZMA_OK && ret != LZMA_STREAM_END) { 2287 DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n", 2288 srcFileName, ret); 2289 decodingError = 1; break; 2290 } 2291 { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; 2292 if (decompBytes) { 2293 storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decompBytes, prefs, storedSkips); 2294 outFileSize += decompBytes; 2295 strm.next_out = (BYTE*)ress->dstBuffer; 2296 strm.avail_out = ress->dstBufferSize; 2297 } } 2298 if (ret == LZMA_STREAM_END) break; 2299 } 2300 2301 if (strm.avail_in > 0) 2302 memmove(ress->srcBuffer, strm.next_in, strm.avail_in); 2303 ress->srcBufferLoaded = strm.avail_in; 2304 lzma_end(&strm); 2305 FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips); 2306 return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; 2307 } 2308 #endif 2309 2310 #ifdef ZSTD_LZ4DECOMPRESS 2311 static unsigned long long 2312 FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile, 2313 const FIO_prefs_t* const prefs, 2314 const char* srcFileName) 2315 { 2316 unsigned long long filesize = 0; 2317 LZ4F_errorCode_t nextToLoad; 2318 LZ4F_decompressionContext_t dCtx; 2319 LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION); 2320 int decodingError = 0; 2321 unsigned storedSkips = 0; 2322 2323 if (LZ4F_isError(errorCode)) { 2324 DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n"); 2325 return FIO_ERROR_FRAME_DECODING; 2326 } 2327 2328 /* Init feed with magic number (already consumed from FILE* sFile) */ 2329 { size_t inSize = 4; 2330 size_t outSize= 0; 2331 MEM_writeLE32(ress->srcBuffer, LZ4_MAGICNUMBER); 2332 nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &outSize, ress->srcBuffer, &inSize, NULL); 2333 if (LZ4F_isError(nextToLoad)) { 2334 DISPLAYLEVEL(1, "zstd: %s: lz4 header error : %s \n", 2335 srcFileName, LZ4F_getErrorName(nextToLoad)); 2336 LZ4F_freeDecompressionContext(dCtx); 2337 return FIO_ERROR_FRAME_DECODING; 2338 } } 2339 2340 /* Main Loop */ 2341 for (;nextToLoad;) { 2342 size_t readSize; 2343 size_t pos = 0; 2344 size_t decodedBytes = ress->dstBufferSize; 2345 2346 /* Read input */ 2347 if (nextToLoad > ress->srcBufferSize) nextToLoad = ress->srcBufferSize; 2348 readSize = fread(ress->srcBuffer, 1, nextToLoad, srcFile); 2349 if (!readSize) break; /* reached end of file or stream */ 2350 2351 while ((pos < readSize) || (decodedBytes == ress->dstBufferSize)) { /* still to read, or still to flush */ 2352 /* Decode Input (at least partially) */ 2353 size_t remaining = readSize - pos; 2354 decodedBytes = ress->dstBufferSize; 2355 nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL); 2356 if (LZ4F_isError(nextToLoad)) { 2357 DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n", 2358 srcFileName, LZ4F_getErrorName(nextToLoad)); 2359 decodingError = 1; nextToLoad = 0; break; 2360 } 2361 pos += remaining; 2362 2363 /* Write Block */ 2364 if (decodedBytes) { 2365 storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decodedBytes, prefs, storedSkips); 2366 filesize += decodedBytes; 2367 DISPLAYUPDATE(2, "\rDecompressed : %u MB ", (unsigned)(filesize>>20)); 2368 } 2369 2370 if (!nextToLoad) break; 2371 } 2372 } 2373 /* can be out because readSize == 0, which could be an fread() error */ 2374 if (ferror(srcFile)) { 2375 DISPLAYLEVEL(1, "zstd: %s: read error \n", srcFileName); 2376 decodingError=1; 2377 } 2378 2379 if (nextToLoad!=0) { 2380 DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName); 2381 decodingError=1; 2382 } 2383 2384 LZ4F_freeDecompressionContext(dCtx); 2385 ress->srcBufferLoaded = 0; /* LZ4F will reach exact frame boundary */ 2386 FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips); 2387 2388 return decodingError ? FIO_ERROR_FRAME_DECODING : filesize; 2389 } 2390 #endif 2391 2392 2393 2394 /** FIO_decompressFrames() : 2395 * Find and decode frames inside srcFile 2396 * srcFile presumed opened and valid 2397 * @return : 0 : OK 2398 * 1 : error 2399 */ 2400 static int FIO_decompressFrames(FIO_ctx_t* const fCtx, 2401 dRess_t ress, FILE* srcFile, 2402 const FIO_prefs_t* const prefs, 2403 const char* dstFileName, const char* srcFileName) 2404 { 2405 unsigned readSomething = 0; 2406 unsigned long long filesize = 0; 2407 assert(srcFile != NULL); 2408 2409 /* for each frame */ 2410 for ( ; ; ) { 2411 /* check magic number -> version */ 2412 size_t const toRead = 4; 2413 const BYTE* const buf = (const BYTE*)ress.srcBuffer; 2414 if (ress.srcBufferLoaded < toRead) /* load up to 4 bytes for header */ 2415 ress.srcBufferLoaded += fread((char*)ress.srcBuffer + ress.srcBufferLoaded, 2416 (size_t)1, toRead - ress.srcBufferLoaded, srcFile); 2417 if (ress.srcBufferLoaded==0) { 2418 if (readSomething==0) { /* srcFile is empty (which is invalid) */ 2419 DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName); 2420 return 1; 2421 } /* else, just reached frame boundary */ 2422 break; /* no more input */ 2423 } 2424 readSomething = 1; /* there is at least 1 byte in srcFile */ 2425 if (ress.srcBufferLoaded < toRead) { 2426 DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName); 2427 return 1; 2428 } 2429 if (ZSTD_isFrame(buf, ress.srcBufferLoaded)) { 2430 unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, srcFile, prefs, srcFileName, filesize); 2431 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 2432 filesize += frameSize; 2433 } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */ 2434 #ifdef ZSTD_GZDECOMPRESS 2435 unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFile, prefs, srcFileName); 2436 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 2437 filesize += frameSize; 2438 #else 2439 DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName); 2440 return 1; 2441 #endif 2442 } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */ 2443 || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */ 2444 #ifdef ZSTD_LZMADECOMPRESS 2445 unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFile, prefs, srcFileName, buf[0] != 0xFD); 2446 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 2447 filesize += frameSize; 2448 #else 2449 DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName); 2450 return 1; 2451 #endif 2452 } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) { 2453 #ifdef ZSTD_LZ4DECOMPRESS 2454 unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFile, prefs, srcFileName); 2455 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 2456 filesize += frameSize; 2457 #else 2458 DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName); 2459 return 1; 2460 #endif 2461 } else if ((prefs->overwrite) && !strcmp (dstFileName, stdoutmark)) { /* pass-through mode */ 2462 return FIO_passThrough(prefs, 2463 ress.dstFile, srcFile, 2464 ress.srcBuffer, ress.srcBufferSize, 2465 ress.srcBufferLoaded); 2466 } else { 2467 DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName); 2468 return 1; 2469 } } /* for each frame */ 2470 2471 /* Final Status */ 2472 fCtx->totalBytesOutput += (size_t)filesize; 2473 DISPLAYLEVEL(2, "\r%79s\r", ""); 2474 /* No status message in pipe mode (stdin - stdout) or multi-files mode */ 2475 if (g_display_prefs.displayLevel >= 2) { 2476 if (fCtx->nbFilesTotal <= 1 || g_display_prefs.displayLevel >= 3) { 2477 DISPLAYLEVEL(2, "%-20s: %llu bytes \n", srcFileName, filesize); 2478 } 2479 } 2480 2481 return 0; 2482 } 2483 2484 /** FIO_decompressDstFile() : 2485 open `dstFileName`, 2486 or path-through if ress.dstFile is already != 0, 2487 then start decompression process (FIO_decompressFrames()). 2488 @return : 0 : OK 2489 1 : operation aborted 2490 */ 2491 static int FIO_decompressDstFile(FIO_ctx_t* const fCtx, 2492 FIO_prefs_t* const prefs, 2493 dRess_t ress, FILE* srcFile, 2494 const char* dstFileName, const char* srcFileName) 2495 { 2496 int result; 2497 stat_t statbuf; 2498 int transfer_permissions = 0; 2499 int releaseDstFile = 0; 2500 2501 if ((ress.dstFile == NULL) && (prefs->testMode==0)) { 2502 releaseDstFile = 1; 2503 2504 ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName); 2505 if (ress.dstFile==NULL) return 1; 2506 2507 /* Must only be added after FIO_openDstFile() succeeds. 2508 * Otherwise we may delete the destination file if it already exists, 2509 * and the user presses Ctrl-C when asked if they wish to overwrite. 2510 */ 2511 addHandler(dstFileName); 2512 2513 if ( strcmp(srcFileName, stdinmark) /* special case : don't transfer permissions from stdin */ 2514 && UTIL_stat(srcFileName, &statbuf) 2515 && UTIL_isRegularFileStat(&statbuf) ) 2516 transfer_permissions = 1; 2517 } 2518 2519 result = FIO_decompressFrames(fCtx, ress, srcFile, prefs, dstFileName, srcFileName); 2520 2521 if (releaseDstFile) { 2522 FILE* const dstFile = ress.dstFile; 2523 clearHandler(); 2524 ress.dstFile = NULL; 2525 if (fclose(dstFile)) { 2526 DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); 2527 result = 1; 2528 } 2529 2530 if ( (result != 0) /* operation failure */ 2531 && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */ 2532 ) { 2533 FIO_removeFile(dstFileName); /* remove decompression artefact; note: don't do anything special if remove() fails */ 2534 } else if ( transfer_permissions /* file permissions correctly extracted from src */ ) { 2535 UTIL_setFileStat(dstFileName, &statbuf); /* transfer file permissions from src into dst */ 2536 } 2537 } 2538 2539 return result; 2540 } 2541 2542 2543 /** FIO_decompressSrcFile() : 2544 Open `srcFileName`, transfer control to decompressDstFile() 2545 @return : 0 : OK 2546 1 : error 2547 */ 2548 static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName) 2549 { 2550 FILE* srcFile; 2551 int result; 2552 2553 if (UTIL_isDirectory(srcFileName)) { 2554 DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); 2555 return 1; 2556 } 2557 2558 srcFile = FIO_openSrcFile(srcFileName); 2559 if (srcFile==NULL) return 1; 2560 ress.srcBufferLoaded = 0; 2561 2562 result = FIO_decompressDstFile(fCtx, prefs, ress, srcFile, dstFileName, srcFileName); 2563 2564 /* Close file */ 2565 if (fclose(srcFile)) { 2566 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); /* error should not happen */ 2567 return 1; 2568 } 2569 if ( prefs->removeSrcFile /* --rm */ 2570 && (result==0) /* decompression successful */ 2571 && strcmp(srcFileName, stdinmark) ) /* not stdin */ { 2572 /* We must clear the handler, since after this point calling it would 2573 * delete both the source and destination files. 2574 */ 2575 clearHandler(); 2576 if (FIO_removeFile(srcFileName)) { 2577 /* failed to remove src file */ 2578 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); 2579 return 1; 2580 } } 2581 return result; 2582 } 2583 2584 2585 2586 int FIO_decompressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, 2587 const char* dstFileName, const char* srcFileName, 2588 const char* dictFileName) 2589 { 2590 dRess_t const ress = FIO_createDResources(prefs, dictFileName); 2591 2592 int const decodingError = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName); 2593 2594 FIO_freeDResources(ress); 2595 return decodingError; 2596 } 2597 2598 static const char *suffixList[] = { 2599 ZSTD_EXTENSION, 2600 TZSTD_EXTENSION, 2601 #ifndef ZSTD_NODECOMPRESS 2602 ZSTD_ALT_EXTENSION, 2603 #endif 2604 #ifdef ZSTD_GZDECOMPRESS 2605 GZ_EXTENSION, 2606 TGZ_EXTENSION, 2607 #endif 2608 #ifdef ZSTD_LZMADECOMPRESS 2609 LZMA_EXTENSION, 2610 XZ_EXTENSION, 2611 TXZ_EXTENSION, 2612 #endif 2613 #ifdef ZSTD_LZ4DECOMPRESS 2614 LZ4_EXTENSION, 2615 TLZ4_EXTENSION, 2616 #endif 2617 NULL 2618 }; 2619 2620 static const char *suffixListStr = 2621 ZSTD_EXTENSION "/" TZSTD_EXTENSION 2622 #ifdef ZSTD_GZDECOMPRESS 2623 "/" GZ_EXTENSION "/" TGZ_EXTENSION 2624 #endif 2625 #ifdef ZSTD_LZMADECOMPRESS 2626 "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION 2627 #endif 2628 #ifdef ZSTD_LZ4DECOMPRESS 2629 "/" LZ4_EXTENSION "/" TLZ4_EXTENSION 2630 #endif 2631 ; 2632 2633 /* FIO_determineDstName() : 2634 * create a destination filename from a srcFileName. 2635 * @return a pointer to it. 2636 * @return == NULL if there is an error */ 2637 static const char* 2638 FIO_determineDstName(const char* srcFileName, const char* outDirName) 2639 { 2640 static size_t dfnbCapacity = 0; 2641 static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ 2642 size_t dstFileNameEndPos; 2643 char* outDirFilename = NULL; 2644 const char* dstSuffix = ""; 2645 size_t dstSuffixLen = 0; 2646 2647 size_t sfnSize = strlen(srcFileName); 2648 2649 size_t srcSuffixLen; 2650 const char* const srcSuffix = strrchr(srcFileName, '.'); 2651 if (srcSuffix == NULL) { 2652 DISPLAYLEVEL(1, 2653 "zstd: %s: unknown suffix (%s expected). " 2654 "Can't derive the output file name. " 2655 "Specify it with -o dstFileName. Ignoring.\n", 2656 srcFileName, suffixListStr); 2657 return NULL; 2658 } 2659 srcSuffixLen = strlen(srcSuffix); 2660 2661 { 2662 const char** matchedSuffixPtr; 2663 for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) { 2664 if (!strcmp(*matchedSuffixPtr, srcSuffix)) { 2665 break; 2666 } 2667 } 2668 2669 /* check suffix is authorized */ 2670 if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) { 2671 DISPLAYLEVEL(1, 2672 "zstd: %s: unknown suffix (%s expected). " 2673 "Can't derive the output file name. " 2674 "Specify it with -o dstFileName. Ignoring.\n", 2675 srcFileName, suffixListStr); 2676 return NULL; 2677 } 2678 2679 if ((*matchedSuffixPtr)[1] == 't') { 2680 dstSuffix = ".tar"; 2681 dstSuffixLen = strlen(dstSuffix); 2682 } 2683 } 2684 2685 if (outDirName) { 2686 outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0); 2687 sfnSize = strlen(outDirFilename); 2688 assert(outDirFilename != NULL); 2689 } 2690 2691 if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) { 2692 /* allocate enough space to write dstFilename into it */ 2693 free(dstFileNameBuffer); 2694 dfnbCapacity = sfnSize + 20; 2695 dstFileNameBuffer = (char*)malloc(dfnbCapacity); 2696 if (dstFileNameBuffer==NULL) 2697 EXM_THROW(74, "%s : not enough memory for dstFileName", 2698 strerror(errno)); 2699 } 2700 2701 /* return dst name == src name truncated from suffix */ 2702 assert(dstFileNameBuffer != NULL); 2703 dstFileNameEndPos = sfnSize - srcSuffixLen; 2704 if (outDirFilename) { 2705 memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos); 2706 free(outDirFilename); 2707 } else { 2708 memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos); 2709 } 2710 2711 /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar" 2712 * extension on decompression. Also writes terminating null. */ 2713 strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix); 2714 return dstFileNameBuffer; 2715 2716 /* note : dstFileNameBuffer memory is not going to be free */ 2717 } 2718 2719 int 2720 FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx, 2721 FIO_prefs_t* const prefs, 2722 const char** srcNamesTable, 2723 const char* outMirroredRootDirName, 2724 const char* outDirName, const char* outFileName, 2725 const char* dictFileName) 2726 { 2727 int status; 2728 int error = 0; 2729 dRess_t ress = FIO_createDResources(prefs, dictFileName); 2730 2731 if (outFileName) { 2732 if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) { 2733 FIO_freeDResources(ress); 2734 return 1; 2735 } 2736 if (!prefs->testMode) { 2737 ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName); 2738 if (ress.dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName); 2739 } 2740 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { 2741 status = FIO_decompressSrcFile(fCtx, prefs, ress, outFileName, srcNamesTable[fCtx->currFileIdx]); 2742 if (!status) fCtx->nbFilesProcessed++; 2743 error |= status; 2744 } 2745 if ((!prefs->testMode) && (fclose(ress.dstFile))) 2746 EXM_THROW(72, "Write error : %s : cannot properly close output file", 2747 strerror(errno)); 2748 } else { 2749 if (outMirroredRootDirName) 2750 UTIL_mirrorSourceFilesDirectories(srcNamesTable, fCtx->nbFilesTotal, outMirroredRootDirName); 2751 2752 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { /* create dstFileName */ 2753 const char* const srcFileName = srcNamesTable[fCtx->currFileIdx]; 2754 const char* dstFileName = NULL; 2755 if (outMirroredRootDirName) { 2756 char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName); 2757 if (validMirroredDirName) { 2758 dstFileName = FIO_determineDstName(srcFileName, validMirroredDirName); 2759 free(validMirroredDirName); 2760 } else { 2761 DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot decompress '%s' into '%s'\n", srcFileName, outMirroredRootDirName); 2762 } 2763 } else { 2764 dstFileName = FIO_determineDstName(srcFileName, outDirName); 2765 } 2766 if (dstFileName == NULL) { error=1; continue; } 2767 status = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName); 2768 if (!status) fCtx->nbFilesProcessed++; 2769 error |= status; 2770 } 2771 if (outDirName) 2772 FIO_checkFilenameCollisions(srcNamesTable , fCtx->nbFilesTotal); 2773 } 2774 2775 if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesOutput != 0) 2776 DISPLAYLEVEL(2, "%d files decompressed : %6zu bytes total \n", fCtx->nbFilesProcessed, fCtx->totalBytesOutput); 2777 2778 FIO_freeDResources(ress); 2779 return error; 2780 } 2781 2782 /* ************************************************************************** 2783 * .zst file info (--list command) 2784 ***************************************************************************/ 2785 2786 typedef struct { 2787 U64 decompressedSize; 2788 U64 compressedSize; 2789 U64 windowSize; 2790 int numActualFrames; 2791 int numSkippableFrames; 2792 int decompUnavailable; 2793 int usesCheck; 2794 U32 nbFiles; 2795 } fileInfo_t; 2796 2797 typedef enum { 2798 info_success=0, 2799 info_frame_error=1, 2800 info_not_zstd=2, 2801 info_file_error=3, 2802 info_truncated_input=4, 2803 } InfoError; 2804 2805 #define ERROR_IF(c,n,...) { \ 2806 if (c) { \ 2807 DISPLAYLEVEL(1, __VA_ARGS__); \ 2808 DISPLAYLEVEL(1, " \n"); \ 2809 return n; \ 2810 } \ 2811 } 2812 2813 static InfoError 2814 FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile) 2815 { 2816 /* begin analyzing frame */ 2817 for ( ; ; ) { 2818 BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; 2819 size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile); 2820 if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN(ZSTD_f_zstd1)) { 2821 if ( feof(srcFile) 2822 && (numBytesRead == 0) 2823 && (info->compressedSize > 0) 2824 && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) { 2825 unsigned long long file_position = (unsigned long long) LONG_TELL(srcFile); 2826 unsigned long long file_size = (unsigned long long) info->compressedSize; 2827 ERROR_IF(file_position != file_size, info_truncated_input, 2828 "Error: seeked to position %llu, which is beyond file size of %llu\n", 2829 file_position, 2830 file_size); 2831 break; /* correct end of file => success */ 2832 } 2833 ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame"); 2834 ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames"); 2835 } 2836 { U32 const magicNumber = MEM_readLE32(headerBuffer); 2837 /* Zstandard frame */ 2838 if (magicNumber == ZSTD_MAGICNUMBER) { 2839 ZSTD_frameHeader header; 2840 U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead); 2841 if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR 2842 || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) { 2843 info->decompUnavailable = 1; 2844 } else { 2845 info->decompressedSize += frameContentSize; 2846 } 2847 ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0, 2848 info_frame_error, "Error: could not decode frame header"); 2849 info->windowSize = header.windowSize; 2850 /* move to the end of the frame header */ 2851 { size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead); 2852 ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size"); 2853 ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0, 2854 info_frame_error, "Error: could not move to end of frame header"); 2855 } 2856 2857 /* skip all blocks in the frame */ 2858 { int lastBlock = 0; 2859 do { 2860 BYTE blockHeaderBuffer[3]; 2861 ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3, 2862 info_frame_error, "Error while reading block header"); 2863 { U32 const blockHeader = MEM_readLE24(blockHeaderBuffer); 2864 U32 const blockTypeID = (blockHeader >> 1) & 3; 2865 U32 const isRLE = (blockTypeID == 1); 2866 U32 const isWrongBlock = (blockTypeID == 3); 2867 long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3); 2868 ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type"); 2869 lastBlock = blockHeader & 1; 2870 ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0, 2871 info_frame_error, "Error: could not skip to end of block"); 2872 } 2873 } while (lastBlock != 1); 2874 } 2875 2876 /* check if checksum is used */ 2877 { BYTE const frameHeaderDescriptor = headerBuffer[4]; 2878 int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2; 2879 if (contentChecksumFlag) { 2880 info->usesCheck = 1; 2881 ERROR_IF(fseek(srcFile, 4, SEEK_CUR) != 0, 2882 info_frame_error, "Error: could not skip past checksum"); 2883 } } 2884 info->numActualFrames++; 2885 } 2886 /* Skippable frame */ 2887 else if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { 2888 U32 const frameSize = MEM_readLE32(headerBuffer + 4); 2889 long const seek = (long)(8 + frameSize - numBytesRead); 2890 ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0, 2891 info_frame_error, "Error: could not find end of skippable frame"); 2892 info->numSkippableFrames++; 2893 } 2894 /* unknown content */ 2895 else { 2896 return info_not_zstd; 2897 } 2898 } /* magic number analysis */ 2899 } /* end analyzing frames */ 2900 return info_success; 2901 } 2902 2903 2904 static InfoError 2905 getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName) 2906 { 2907 InfoError status; 2908 FILE* const srcFile = FIO_openSrcFile(inFileName); 2909 ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName); 2910 2911 info->compressedSize = UTIL_getFileSize(inFileName); 2912 status = FIO_analyzeFrames(info, srcFile); 2913 2914 fclose(srcFile); 2915 info->nbFiles = 1; 2916 return status; 2917 } 2918 2919 2920 /** getFileInfo() : 2921 * Reads information from file, stores in *info 2922 * @return : InfoError status 2923 */ 2924 static InfoError 2925 getFileInfo(fileInfo_t* info, const char* srcFileName) 2926 { 2927 ERROR_IF(!UTIL_isRegularFile(srcFileName), 2928 info_file_error, "Error : %s is not a file", srcFileName); 2929 return getFileInfo_fileConfirmed(info, srcFileName); 2930 } 2931 2932 2933 static void 2934 displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel) 2935 { 2936 unsigned const unit = info->compressedSize < (1 MB) ? (1 KB) : (1 MB); 2937 const char* const unitStr = info->compressedSize < (1 MB) ? "KB" : "MB"; 2938 double const windowSizeUnit = (double)info->windowSize / unit; 2939 double const compressedSizeUnit = (double)info->compressedSize / unit; 2940 double const decompressedSizeUnit = (double)info->decompressedSize / unit; 2941 double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/info->compressedSize; 2942 const char* const checkString = (info->usesCheck ? "XXH64" : "None"); 2943 if (displayLevel <= 2) { 2944 if (!info->decompUnavailable) { 2945 DISPLAYOUT("%6d %5d %7.2f %2s %9.2f %2s %5.3f %5s %s\n", 2946 info->numSkippableFrames + info->numActualFrames, 2947 info->numSkippableFrames, 2948 compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr, 2949 ratio, checkString, inFileName); 2950 } else { 2951 DISPLAYOUT("%6d %5d %7.2f %2s %5s %s\n", 2952 info->numSkippableFrames + info->numActualFrames, 2953 info->numSkippableFrames, 2954 compressedSizeUnit, unitStr, 2955 checkString, inFileName); 2956 } 2957 } else { 2958 DISPLAYOUT("%s \n", inFileName); 2959 DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames); 2960 if (info->numSkippableFrames) 2961 DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames); 2962 DISPLAYOUT("Window Size: %.2f %2s (%llu B)\n", 2963 windowSizeUnit, unitStr, 2964 (unsigned long long)info->windowSize); 2965 DISPLAYOUT("Compressed Size: %.2f %2s (%llu B)\n", 2966 compressedSizeUnit, unitStr, 2967 (unsigned long long)info->compressedSize); 2968 if (!info->decompUnavailable) { 2969 DISPLAYOUT("Decompressed Size: %.2f %2s (%llu B)\n", 2970 decompressedSizeUnit, unitStr, 2971 (unsigned long long)info->decompressedSize); 2972 DISPLAYOUT("Ratio: %.4f\n", ratio); 2973 } 2974 DISPLAYOUT("Check: %s\n", checkString); 2975 DISPLAYOUT("\n"); 2976 } 2977 } 2978 2979 static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2) 2980 { 2981 fileInfo_t total; 2982 memset(&total, 0, sizeof(total)); 2983 total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames; 2984 total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames; 2985 total.compressedSize = fi1.compressedSize + fi2.compressedSize; 2986 total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize; 2987 total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable; 2988 total.usesCheck = fi1.usesCheck & fi2.usesCheck; 2989 total.nbFiles = fi1.nbFiles + fi2.nbFiles; 2990 return total; 2991 } 2992 2993 static int 2994 FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel) 2995 { 2996 fileInfo_t info; 2997 memset(&info, 0, sizeof(info)); 2998 { InfoError const error = getFileInfo(&info, inFileName); 2999 switch (error) { 3000 case info_frame_error: 3001 /* display error, but provide output */ 3002 DISPLAYLEVEL(1, "Error while parsing \"%s\" \n", inFileName); 3003 break; 3004 case info_not_zstd: 3005 DISPLAYOUT("File \"%s\" not compressed by zstd \n", inFileName); 3006 if (displayLevel > 2) DISPLAYOUT("\n"); 3007 return 1; 3008 case info_file_error: 3009 /* error occurred while opening the file */ 3010 if (displayLevel > 2) DISPLAYOUT("\n"); 3011 return 1; 3012 case info_truncated_input: 3013 DISPLAYOUT("File \"%s\" is truncated \n", inFileName); 3014 if (displayLevel > 2) DISPLAYOUT("\n"); 3015 return 1; 3016 case info_success: 3017 default: 3018 break; 3019 } 3020 3021 displayInfo(inFileName, &info, displayLevel); 3022 *total = FIO_addFInfo(*total, info); 3023 assert(error == info_success || error == info_frame_error); 3024 return (int)error; 3025 } 3026 } 3027 3028 int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel) 3029 { 3030 /* ensure no specified input is stdin (needs fseek() capability) */ 3031 { unsigned u; 3032 for (u=0; u<numFiles;u++) { 3033 ERROR_IF(!strcmp (filenameTable[u], stdinmark), 3034 1, "zstd: --list does not support reading from standard input"); 3035 } } 3036 3037 if (numFiles == 0) { 3038 if (!IS_CONSOLE(stdin)) { 3039 DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n"); 3040 } 3041 DISPLAYLEVEL(1, "No files given \n"); 3042 return 1; 3043 } 3044 3045 if (displayLevel <= 2) { 3046 DISPLAYOUT("Frames Skips Compressed Uncompressed Ratio Check Filename\n"); 3047 } 3048 { int error = 0; 3049 fileInfo_t total; 3050 memset(&total, 0, sizeof(total)); 3051 total.usesCheck = 1; 3052 /* --list each file, and check for any error */ 3053 { unsigned u; 3054 for (u=0; u<numFiles;u++) { 3055 error |= FIO_listFile(&total, filenameTable[u], displayLevel); 3056 } } 3057 if (numFiles > 1 && displayLevel <= 2) { /* display total */ 3058 unsigned const unit = total.compressedSize < (1 MB) ? (1 KB) : (1 MB); 3059 const char* const unitStr = total.compressedSize < (1 MB) ? "KB" : "MB"; 3060 double const compressedSizeUnit = (double)total.compressedSize / unit; 3061 double const decompressedSizeUnit = (double)total.decompressedSize / unit; 3062 double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/total.compressedSize; 3063 const char* const checkString = (total.usesCheck ? "XXH64" : ""); 3064 DISPLAYOUT("----------------------------------------------------------------- \n"); 3065 if (total.decompUnavailable) { 3066 DISPLAYOUT("%6d %5d %7.2f %2s %5s %u files\n", 3067 total.numSkippableFrames + total.numActualFrames, 3068 total.numSkippableFrames, 3069 compressedSizeUnit, unitStr, 3070 checkString, (unsigned)total.nbFiles); 3071 } else { 3072 DISPLAYOUT("%6d %5d %7.2f %2s %9.2f %2s %5.3f %5s %u files\n", 3073 total.numSkippableFrames + total.numActualFrames, 3074 total.numSkippableFrames, 3075 compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr, 3076 ratio, checkString, (unsigned)total.nbFiles); 3077 } } 3078 return error; 3079 } 3080 } 3081 3082 3083 #endif /* #ifndef ZSTD_NODECOMPRESS */ 3084