1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 12 /* ************************************* 13 * Compiler Options 14 ***************************************/ 15 #ifdef _MSC_VER /* Visual */ 16 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 17 # pragma warning(disable : 4204) /* non-constant aggregate initializer */ 18 #endif 19 #if defined(__MINGW32__) && !defined(_POSIX_SOURCE) 20 # define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */ 21 #endif 22 23 /*-************************************* 24 * Includes 25 ***************************************/ 26 #include "platform.h" /* Large Files support, SET_BINARY_MODE */ 27 #include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */ 28 #include <stdio.h> /* fprintf, open, fdopen, fread, _fileno, stdin, stdout */ 29 #include <stdlib.h> /* malloc, free */ 30 #include <string.h> /* strcmp, strlen */ 31 #include <time.h> /* clock_t, to measure process time */ 32 #include <fcntl.h> /* O_WRONLY */ 33 #include <assert.h> 34 #include <errno.h> /* errno */ 35 #include <limits.h> /* INT_MAX */ 36 #include <signal.h> 37 #include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */ 38 39 #if defined (_MSC_VER) 40 # include <sys/stat.h> 41 # include <io.h> 42 #endif 43 44 #include "fileio.h" 45 #include "fileio_asyncio.h" 46 #include "fileio_common.h" 47 48 FIO_display_prefs_t g_display_prefs = {2, FIO_ps_auto}; 49 UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; 50 51 #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */ 52 #include "../lib/zstd.h" 53 #include "../lib/zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */ 54 55 #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS) 56 # include <zlib.h> 57 # if !defined(z_const) 58 # define z_const 59 # endif 60 #endif 61 62 #if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS) 63 # include <lzma.h> 64 #endif 65 66 #define LZ4_MAGICNUMBER 0x184D2204 67 #if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS) 68 # define LZ4F_ENABLE_OBSOLETE_ENUMS 69 # include <lz4frame.h> 70 # include <lz4.h> 71 #endif 72 73 char const* FIO_zlibVersion(void) 74 { 75 #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS) 76 return zlibVersion(); 77 #else 78 return "Unsupported"; 79 #endif 80 } 81 82 char const* FIO_lz4Version(void) 83 { 84 #if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS) 85 /* LZ4_versionString() added in v1.7.3 */ 86 # if LZ4_VERSION_NUMBER >= 10703 87 return LZ4_versionString(); 88 # else 89 # define ZSTD_LZ4_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE 90 # define ZSTD_LZ4_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LZ4_VERSION) 91 return ZSTD_LZ4_VERSION_STRING; 92 # endif 93 #else 94 return "Unsupported"; 95 #endif 96 } 97 98 char const* FIO_lzmaVersion(void) 99 { 100 #if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS) 101 return lzma_version_string(); 102 #else 103 return "Unsupported"; 104 #endif 105 } 106 107 108 /*-************************************* 109 * Constants 110 ***************************************/ 111 #define ADAPT_WINDOWLOG_DEFAULT 23 /* 8 MB */ 112 #define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */ 113 114 #define FNSPACE 30 115 116 /* Default file permissions 0666 (modulated by umask) */ 117 /* Temporary restricted file permissions are used when we're going to 118 * chmod/chown at the end of the operation. */ 119 #if !defined(_WIN32) 120 /* These macros aren't defined on windows. */ 121 #define DEFAULT_FILE_PERMISSIONS (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH) 122 #define TEMPORARY_FILE_PERMISSIONS (S_IRUSR|S_IWUSR) 123 #else 124 #define DEFAULT_FILE_PERMISSIONS (0666) 125 #define TEMPORARY_FILE_PERMISSIONS (0600) 126 #endif 127 128 /*-************************************ 129 * Signal (Ctrl-C trapping) 130 **************************************/ 131 static const char* g_artefact = NULL; 132 static void INThandler(int sig) 133 { 134 assert(sig==SIGINT); (void)sig; 135 #if !defined(_MSC_VER) 136 signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */ 137 #endif 138 if (g_artefact) { 139 assert(UTIL_isRegularFile(g_artefact)); 140 remove(g_artefact); 141 } 142 DISPLAY("\n"); 143 exit(2); 144 } 145 static void addHandler(char const* dstFileName) 146 { 147 if (UTIL_isRegularFile(dstFileName)) { 148 g_artefact = dstFileName; 149 signal(SIGINT, INThandler); 150 } else { 151 g_artefact = NULL; 152 } 153 } 154 /* Idempotent */ 155 static void clearHandler(void) 156 { 157 if (g_artefact) signal(SIGINT, SIG_DFL); 158 g_artefact = NULL; 159 } 160 161 162 /*-********************************************************* 163 * Termination signal trapping (Print debug stack trace) 164 ***********************************************************/ 165 #if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */ 166 # if (__has_feature(address_sanitizer)) 167 # define BACKTRACE_ENABLE 0 168 # endif /* __has_feature(address_sanitizer) */ 169 #elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */ 170 # define BACKTRACE_ENABLE 0 171 #endif 172 173 #if !defined(BACKTRACE_ENABLE) 174 /* automatic detector : backtrace enabled by default on linux+glibc and osx */ 175 # if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \ 176 || (defined(__APPLE__) && defined(__MACH__)) 177 # define BACKTRACE_ENABLE 1 178 # else 179 # define BACKTRACE_ENABLE 0 180 # endif 181 #endif 182 183 /* note : after this point, BACKTRACE_ENABLE is necessarily defined */ 184 185 186 #if BACKTRACE_ENABLE 187 188 #include <execinfo.h> /* backtrace, backtrace_symbols */ 189 190 #define MAX_STACK_FRAMES 50 191 192 static void ABRThandler(int sig) { 193 const char* name; 194 void* addrlist[MAX_STACK_FRAMES]; 195 char** symbollist; 196 int addrlen, i; 197 198 switch (sig) { 199 case SIGABRT: name = "SIGABRT"; break; 200 case SIGFPE: name = "SIGFPE"; break; 201 case SIGILL: name = "SIGILL"; break; 202 case SIGINT: name = "SIGINT"; break; 203 case SIGSEGV: name = "SIGSEGV"; break; 204 default: name = "UNKNOWN"; 205 } 206 207 DISPLAY("Caught %s signal, printing stack:\n", name); 208 /* Retrieve current stack addresses. */ 209 addrlen = backtrace(addrlist, MAX_STACK_FRAMES); 210 if (addrlen == 0) { 211 DISPLAY("\n"); 212 return; 213 } 214 /* Create readable strings to each frame. */ 215 symbollist = backtrace_symbols(addrlist, addrlen); 216 /* Print the stack trace, excluding calls handling the signal. */ 217 for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) { 218 DISPLAY("%s\n", symbollist[i]); 219 } 220 free(symbollist); 221 /* Reset and raise the signal so default handler runs. */ 222 signal(sig, SIG_DFL); 223 raise(sig); 224 } 225 #endif 226 227 void FIO_addAbortHandler(void) 228 { 229 #if BACKTRACE_ENABLE 230 signal(SIGABRT, ABRThandler); 231 signal(SIGFPE, ABRThandler); 232 signal(SIGILL, ABRThandler); 233 signal(SIGSEGV, ABRThandler); 234 signal(SIGBUS, ABRThandler); 235 #endif 236 } 237 238 /*-************************************* 239 * Parameters: FIO_ctx_t 240 ***************************************/ 241 242 /* typedef'd to FIO_ctx_t within fileio.h */ 243 struct FIO_ctx_s { 244 245 /* file i/o info */ 246 int nbFilesTotal; 247 int hasStdinInput; 248 int hasStdoutOutput; 249 250 /* file i/o state */ 251 int currFileIdx; 252 int nbFilesProcessed; 253 size_t totalBytesInput; 254 size_t totalBytesOutput; 255 }; 256 257 static int FIO_shouldDisplayFileSummary(FIO_ctx_t const* fCtx) 258 { 259 return fCtx->nbFilesTotal <= 1 || g_display_prefs.displayLevel >= 3; 260 } 261 262 static int FIO_shouldDisplayMultipleFileSummary(FIO_ctx_t const* fCtx) 263 { 264 int const shouldDisplay = (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1); 265 assert(shouldDisplay || FIO_shouldDisplayFileSummary(fCtx) || fCtx->nbFilesProcessed == 0); 266 return shouldDisplay; 267 } 268 269 270 /*-************************************* 271 * Parameters: Initialization 272 ***************************************/ 273 274 #define FIO_OVERLAP_LOG_NOTSET 9999 275 #define FIO_LDM_PARAM_NOTSET 9999 276 277 278 FIO_prefs_t* FIO_createPreferences(void) 279 { 280 FIO_prefs_t* const ret = (FIO_prefs_t*)malloc(sizeof(FIO_prefs_t)); 281 if (!ret) EXM_THROW(21, "Allocation error : not enough memory"); 282 283 ret->compressionType = FIO_zstdCompression; 284 ret->overwrite = 0; 285 ret->sparseFileSupport = ZSTD_SPARSE_DEFAULT; 286 ret->dictIDFlag = 1; 287 ret->checksumFlag = 1; 288 ret->removeSrcFile = 0; 289 ret->memLimit = 0; 290 ret->nbWorkers = 1; 291 ret->blockSize = 0; 292 ret->overlapLog = FIO_OVERLAP_LOG_NOTSET; 293 ret->adaptiveMode = 0; 294 ret->rsyncable = 0; 295 ret->minAdaptLevel = -50; /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */ 296 ret->maxAdaptLevel = 22; /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */ 297 ret->ldmFlag = 0; 298 ret->ldmHashLog = 0; 299 ret->ldmMinMatch = 0; 300 ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; 301 ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET; 302 ret->streamSrcSize = 0; 303 ret->targetCBlockSize = 0; 304 ret->srcSizeHint = 0; 305 ret->testMode = 0; 306 ret->literalCompressionMode = ZSTD_ps_auto; 307 ret->excludeCompressedFiles = 0; 308 ret->allowBlockDevices = 0; 309 ret->asyncIO = AIO_supported(); 310 ret->passThrough = -1; 311 return ret; 312 } 313 314 FIO_ctx_t* FIO_createContext(void) 315 { 316 FIO_ctx_t* const ret = (FIO_ctx_t*)malloc(sizeof(FIO_ctx_t)); 317 if (!ret) EXM_THROW(21, "Allocation error : not enough memory"); 318 319 ret->currFileIdx = 0; 320 ret->hasStdinInput = 0; 321 ret->hasStdoutOutput = 0; 322 ret->nbFilesTotal = 1; 323 ret->nbFilesProcessed = 0; 324 ret->totalBytesInput = 0; 325 ret->totalBytesOutput = 0; 326 return ret; 327 } 328 329 void FIO_freePreferences(FIO_prefs_t* const prefs) 330 { 331 free(prefs); 332 } 333 334 void FIO_freeContext(FIO_ctx_t* const fCtx) 335 { 336 free(fCtx); 337 } 338 339 340 /*-************************************* 341 * Parameters: Display Options 342 ***************************************/ 343 344 void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; } 345 346 void FIO_setProgressSetting(FIO_progressSetting_e setting) { g_display_prefs.progressSetting = setting; } 347 348 349 /*-************************************* 350 * Parameters: Setters 351 ***************************************/ 352 353 /* FIO_prefs_t functions */ 354 355 void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; } 356 357 void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; } 358 359 void FIO_setSparseWrite(FIO_prefs_t* const prefs, int sparse) { prefs->sparseFileSupport = sparse; } 360 361 void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; } 362 363 void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; } 364 365 void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, int flag) { prefs->removeSrcFile = (flag!=0); } 366 367 void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; } 368 369 void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) { 370 #ifndef ZSTD_MULTITHREAD 371 if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n"); 372 #endif 373 prefs->nbWorkers = nbWorkers; 374 } 375 376 void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; } 377 378 void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices) { prefs->allowBlockDevices = allowBlockDevices; } 379 380 void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) { 381 if (blockSize && prefs->nbWorkers==0) 382 DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n"); 383 prefs->blockSize = blockSize; 384 } 385 386 void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){ 387 if (overlapLog && prefs->nbWorkers==0) 388 DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n"); 389 prefs->overlapLog = overlapLog; 390 } 391 392 void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, int adapt) { 393 if ((adapt>0) && (prefs->nbWorkers==0)) 394 EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n"); 395 prefs->adaptiveMode = adapt; 396 } 397 398 void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder) { 399 prefs->useRowMatchFinder = useRowMatchFinder; 400 } 401 402 void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) { 403 if ((rsyncable>0) && (prefs->nbWorkers==0)) 404 EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n"); 405 prefs->rsyncable = rsyncable; 406 } 407 408 void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) { 409 prefs->streamSrcSize = streamSrcSize; 410 } 411 412 void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) { 413 prefs->targetCBlockSize = targetCBlockSize; 414 } 415 416 void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) { 417 prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint); 418 } 419 420 void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode) { 421 prefs->testMode = (testMode!=0); 422 } 423 424 void FIO_setLiteralCompressionMode( 425 FIO_prefs_t* const prefs, 426 ZSTD_ParamSwitch_e mode) { 427 prefs->literalCompressionMode = mode; 428 } 429 430 void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel) 431 { 432 #ifndef ZSTD_NOCOMPRESS 433 assert(minCLevel >= ZSTD_minCLevel()); 434 #endif 435 prefs->minAdaptLevel = minCLevel; 436 } 437 438 void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel) 439 { 440 prefs->maxAdaptLevel = maxCLevel; 441 } 442 443 void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag) { 444 prefs->ldmFlag = (ldmFlag>0); 445 } 446 447 void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog) { 448 prefs->ldmHashLog = ldmHashLog; 449 } 450 451 void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch) { 452 prefs->ldmMinMatch = ldmMinMatch; 453 } 454 455 void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog) { 456 prefs->ldmBucketSizeLog = ldmBucketSizeLog; 457 } 458 459 460 void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) { 461 prefs->ldmHashRateLog = ldmHashRateLog; 462 } 463 464 void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value) 465 { 466 prefs->patchFromMode = value != 0; 467 } 468 469 void FIO_setContentSize(FIO_prefs_t* const prefs, int value) 470 { 471 prefs->contentSize = value != 0; 472 } 473 474 void FIO_setAsyncIOFlag(FIO_prefs_t* const prefs, int value) { 475 #ifdef ZSTD_MULTITHREAD 476 prefs->asyncIO = value; 477 #else 478 (void) prefs; 479 (void) value; 480 DISPLAYLEVEL(2, "Note : asyncio is disabled (lack of multithreading support) \n"); 481 #endif 482 } 483 484 void FIO_setPassThroughFlag(FIO_prefs_t* const prefs, int value) { 485 prefs->passThrough = (value != 0); 486 } 487 488 void FIO_setMMapDict(FIO_prefs_t* const prefs, ZSTD_ParamSwitch_e value) 489 { 490 prefs->mmapDict = value; 491 } 492 493 /* FIO_ctx_t functions */ 494 495 void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) { 496 fCtx->hasStdoutOutput = value; 497 } 498 499 void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value) 500 { 501 fCtx->nbFilesTotal = value; 502 } 503 504 void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames) { 505 size_t i = 0; 506 for ( ; i < filenames->tableSize; ++i) { 507 if (!strcmp(stdinmark, filenames->fileNames[i])) { 508 fCtx->hasStdinInput = 1; 509 return; 510 } 511 } 512 } 513 514 /*-************************************* 515 * Functions 516 ***************************************/ 517 /** FIO_removeFile() : 518 * @result : Unlink `fileName`, even if it's read-only */ 519 static int FIO_removeFile(const char* path) 520 { 521 stat_t statbuf; 522 if (!UTIL_stat(path, &statbuf)) { 523 DISPLAYLEVEL(2, "zstd: Failed to stat %s while trying to remove it\n", path); 524 return 0; 525 } 526 if (!UTIL_isRegularFileStat(&statbuf)) { 527 DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path); 528 return 0; 529 } 530 #if defined(_WIN32) 531 /* windows doesn't allow remove read-only files, 532 * so try to make it writable first */ 533 if (!(statbuf.st_mode & _S_IWRITE)) { 534 UTIL_chmod(path, &statbuf, _S_IWRITE); 535 } 536 #endif 537 return remove(path); 538 } 539 540 /** FIO_openSrcFile() : 541 * condition : `srcFileName` must be non-NULL. `prefs` may be NULL. 542 * @result : FILE* to `srcFileName`, or NULL if it fails */ 543 static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName, stat_t* statbuf) 544 { 545 int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0; 546 assert(srcFileName != NULL); 547 assert(statbuf != NULL); 548 if (!strcmp (srcFileName, stdinmark)) { 549 DISPLAYLEVEL(4,"Using stdin for input \n"); 550 SET_BINARY_MODE(stdin); 551 return stdin; 552 } 553 554 if (!UTIL_stat(srcFileName, statbuf)) { 555 DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n", 556 srcFileName, strerror(errno)); 557 return NULL; 558 } 559 560 if (!UTIL_isRegularFileStat(statbuf) 561 && !UTIL_isFIFOStat(statbuf) 562 && !(allowBlockDevices && UTIL_isBlockDevStat(statbuf)) 563 ) { 564 DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n", 565 srcFileName); 566 return NULL; 567 } 568 569 { FILE* const f = fopen(srcFileName, "rb"); 570 if (f == NULL) 571 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); 572 return f; 573 } 574 } 575 576 /** FIO_openDstFile() : 577 * condition : `dstFileName` must be non-NULL. 578 * @result : FILE* to `dstFileName`, or NULL if it fails */ 579 static FILE* 580 FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs, 581 const char* srcFileName, const char* dstFileName, 582 const int mode) 583 { 584 int isDstRegFile; 585 586 if (prefs->testMode) return NULL; /* do not open file in test mode */ 587 588 assert(dstFileName != NULL); 589 if (!strcmp (dstFileName, stdoutmark)) { 590 DISPLAYLEVEL(4,"Using stdout for output \n"); 591 SET_BINARY_MODE(stdout); 592 if (prefs->sparseFileSupport == 1) { 593 prefs->sparseFileSupport = 0; 594 DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n"); 595 } 596 return stdout; 597 } 598 599 /* ensure dst is not the same as src */ 600 if (srcFileName != NULL && UTIL_isSameFile(srcFileName, dstFileName)) { 601 DISPLAYLEVEL(1, "zstd: Refusing to open an output file which will overwrite the input file \n"); 602 return NULL; 603 } 604 605 isDstRegFile = UTIL_isRegularFile(dstFileName); /* invoke once */ 606 if (prefs->sparseFileSupport == 1) { 607 prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT; 608 if (!isDstRegFile) { 609 prefs->sparseFileSupport = 0; 610 DISPLAYLEVEL(4, "Sparse File Support is disabled when output is not a file \n"); 611 } 612 } 613 614 if (isDstRegFile) { 615 /* Check if destination file already exists */ 616 #if !defined(_WIN32) 617 /* this test does not work on Windows : 618 * `NUL` and `nul` are detected as regular files */ 619 if (!strcmp(dstFileName, nulmark)) { 620 EXM_THROW(40, "%s is unexpectedly categorized as a regular file", 621 dstFileName); 622 } 623 #endif 624 if (!prefs->overwrite) { 625 if (g_display_prefs.displayLevel <= 1) { 626 /* No interaction possible */ 627 DISPLAYLEVEL(1, "zstd: %s already exists; not overwritten \n", 628 dstFileName); 629 return NULL; 630 } 631 DISPLAY("zstd: %s already exists; ", dstFileName); 632 if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten \n", "yY", fCtx->hasStdinInput)) 633 return NULL; 634 } 635 /* need to unlink */ 636 FIO_removeFile(dstFileName); 637 } 638 639 { 640 #if defined(_WIN32) 641 /* Windows requires opening the file as a "binary" file to avoid 642 * mangling. This macro doesn't exist on unix. */ 643 const int openflags = O_WRONLY|O_CREAT|O_TRUNC|O_BINARY; 644 const int fd = _open(dstFileName, openflags, mode); 645 FILE* f = NULL; 646 if (fd != -1) { 647 f = _fdopen(fd, "wb"); 648 } 649 #else 650 const int openflags = O_WRONLY|O_CREAT|O_TRUNC; 651 const int fd = open(dstFileName, openflags, mode); 652 FILE* f = NULL; 653 if (fd != -1) { 654 f = fdopen(fd, "wb"); 655 } 656 #endif 657 if (f == NULL) { 658 DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno)); 659 } else { 660 /* An increased buffer size can provide a significant performance 661 * boost on some platforms. Note that providing a NULL buf with a 662 * size that's not 0 is not defined in ANSI C, but is defined in an 663 * extension. There are three possibilities here: 664 * 1. Libc supports the extended version and everything is good. 665 * 2. Libc ignores the size when buf is NULL, in which case 666 * everything will continue as if we didn't call `setvbuf()`. 667 * 3. We fail the call and execution continues but a warning 668 * message might be shown. 669 * In all cases due execution continues. For now, I believe that 670 * this is a more cost-effective solution than managing the buffers 671 * allocations ourselves (will require an API change). 672 */ 673 if (setvbuf(f, NULL, _IOFBF, 1 MB)) { 674 DISPLAYLEVEL(2, "Warning: setvbuf failed for %s\n", dstFileName); 675 } 676 } 677 return f; 678 } 679 } 680 681 682 /* FIO_getDictFileStat() : 683 */ 684 static void FIO_getDictFileStat(const char* fileName, stat_t* dictFileStat) { 685 assert(dictFileStat != NULL); 686 if (fileName == NULL) return; 687 688 if (!UTIL_stat(fileName, dictFileStat)) { 689 EXM_THROW(31, "Stat failed on dictionary file %s: %s", fileName, strerror(errno)); 690 } 691 692 if (!UTIL_isRegularFileStat(dictFileStat)) { 693 EXM_THROW(32, "Dictionary %s must be a regular file.", fileName); 694 } 695 } 696 697 /* FIO_setDictBufferMalloc() : 698 * allocates a buffer, pointed by `dict->dictBuffer`, 699 * loads `filename` content into it, up to DICTSIZE_MAX bytes. 700 * @return : loaded size 701 * if fileName==NULL, returns 0 and a NULL pointer 702 */ 703 static size_t FIO_setDictBufferMalloc(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat) 704 { 705 FILE* fileHandle; 706 U64 fileSize; 707 void** bufferPtr = &dict->dictBuffer; 708 709 assert(bufferPtr != NULL); 710 assert(dictFileStat != NULL); 711 *bufferPtr = NULL; 712 if (fileName == NULL) return 0; 713 714 DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName); 715 716 fileHandle = fopen(fileName, "rb"); 717 718 if (fileHandle == NULL) { 719 EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno)); 720 } 721 722 fileSize = UTIL_getFileSizeStat(dictFileStat); 723 { 724 size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX; 725 if (fileSize > dictSizeMax) { 726 EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)", 727 fileName, (unsigned)dictSizeMax); /* avoid extreme cases */ 728 } 729 } 730 *bufferPtr = malloc((size_t)fileSize); 731 if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno)); 732 { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle); 733 if (readSize != fileSize) { 734 EXM_THROW(35, "Error reading dictionary file %s : %s", 735 fileName, strerror(errno)); 736 } 737 } 738 fclose(fileHandle); 739 return (size_t)fileSize; 740 } 741 742 #if (PLATFORM_POSIX_VERSION > 0) 743 #include <sys/mman.h> 744 static void FIO_munmap(FIO_Dict_t* dict) 745 { 746 munmap(dict->dictBuffer, dict->dictBufferSize); 747 dict->dictBuffer = NULL; 748 dict->dictBufferSize = 0; 749 } 750 static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat) 751 { 752 int fileHandle; 753 U64 fileSize; 754 void** bufferPtr = &dict->dictBuffer; 755 756 assert(bufferPtr != NULL); 757 assert(dictFileStat != NULL); 758 *bufferPtr = NULL; 759 if (fileName == NULL) return 0; 760 761 DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName); 762 763 fileHandle = open(fileName, O_RDONLY); 764 765 if (fileHandle == -1) { 766 EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno)); 767 } 768 769 fileSize = UTIL_getFileSizeStat(dictFileStat); 770 { 771 size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX; 772 if (fileSize > dictSizeMax) { 773 EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)", 774 fileName, (unsigned)dictSizeMax); /* avoid extreme cases */ 775 } 776 } 777 778 *bufferPtr = mmap(NULL, (size_t)fileSize, PROT_READ, MAP_PRIVATE, fileHandle, 0); 779 if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno)); 780 781 close(fileHandle); 782 return (size_t)fileSize; 783 } 784 #elif defined(_MSC_VER) || defined(_WIN32) 785 #include <windows.h> 786 static void FIO_munmap(FIO_Dict_t* dict) 787 { 788 UnmapViewOfFile(dict->dictBuffer); 789 CloseHandle(dict->dictHandle); 790 dict->dictBuffer = NULL; 791 dict->dictBufferSize = 0; 792 } 793 static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat) 794 { 795 HANDLE fileHandle, mapping; 796 U64 fileSize; 797 void** bufferPtr = &dict->dictBuffer; 798 799 assert(bufferPtr != NULL); 800 assert(dictFileStat != NULL); 801 *bufferPtr = NULL; 802 if (fileName == NULL) return 0; 803 804 DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName); 805 806 fileHandle = CreateFileA(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL); 807 808 if (fileHandle == INVALID_HANDLE_VALUE) { 809 EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno)); 810 } 811 812 fileSize = UTIL_getFileSizeStat(dictFileStat); 813 { 814 size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX; 815 if (fileSize > dictSizeMax) { 816 EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)", 817 fileName, (unsigned)dictSizeMax); /* avoid extreme cases */ 818 } 819 } 820 821 mapping = CreateFileMapping(fileHandle, NULL, PAGE_READONLY, 0, 0, NULL); 822 if (mapping == NULL) { 823 EXM_THROW(35, "Couldn't map dictionary %s: %s", fileName, strerror(errno)); 824 } 825 826 *bufferPtr = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, (DWORD)fileSize); /* we can only cast to DWORD here because dictSize <= 2GB */ 827 if (*bufferPtr==NULL) EXM_THROW(36, "%s", strerror(errno)); 828 829 dict->dictHandle = fileHandle; 830 return (size_t)fileSize; 831 } 832 #else 833 static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat) 834 { 835 return FIO_setDictBufferMalloc(dict, fileName, prefs, dictFileStat); 836 } 837 static void FIO_munmap(FIO_Dict_t* dict) { 838 free(dict->dictBuffer); 839 dict->dictBuffer = NULL; 840 dict->dictBufferSize = 0; 841 } 842 #endif 843 844 static void FIO_freeDict(FIO_Dict_t* dict) { 845 if (dict->dictBufferType == FIO_mallocDict) { 846 free(dict->dictBuffer); 847 dict->dictBuffer = NULL; 848 dict->dictBufferSize = 0; 849 } else if (dict->dictBufferType == FIO_mmapDict) { 850 FIO_munmap(dict); 851 } else { 852 assert(0); /* Should not reach this case */ 853 } 854 } 855 856 static void FIO_initDict(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat, FIO_dictBufferType_t dictBufferType) { 857 dict->dictBufferType = dictBufferType; 858 if (dict->dictBufferType == FIO_mallocDict) { 859 dict->dictBufferSize = FIO_setDictBufferMalloc(dict, fileName, prefs, dictFileStat); 860 } else if (dict->dictBufferType == FIO_mmapDict) { 861 dict->dictBufferSize = FIO_setDictBufferMMap(dict, fileName, prefs, dictFileStat); 862 } else { 863 assert(0); /* Should not reach this case */ 864 } 865 } 866 867 868 /* FIO_checkFilenameCollisions() : 869 * Checks for and warns if there are any files that would have the same output path 870 */ 871 int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) { 872 const char **filenameTableSorted, *prevElem, *filename; 873 unsigned u; 874 875 filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles); 876 if (!filenameTableSorted) { 877 DISPLAYLEVEL(1, "Allocation error during filename collision checking \n"); 878 return 1; 879 } 880 881 for (u = 0; u < nbFiles; ++u) { 882 filename = strrchr(filenameTable[u], PATH_SEP); 883 if (filename == NULL) { 884 filenameTableSorted[u] = filenameTable[u]; 885 } else { 886 filenameTableSorted[u] = filename+1; 887 } 888 } 889 890 qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr); 891 prevElem = filenameTableSorted[0]; 892 for (u = 1; u < nbFiles; ++u) { 893 if (strcmp(prevElem, filenameTableSorted[u]) == 0) { 894 DISPLAYLEVEL(2, "WARNING: Two files have same filename: %s\n", prevElem); 895 } 896 prevElem = filenameTableSorted[u]; 897 } 898 899 free((void*)filenameTableSorted); 900 return 0; 901 } 902 903 static const char* 904 extractFilename(const char* path, char separator) 905 { 906 const char* search = strrchr(path, separator); 907 if (search == NULL) return path; 908 return search+1; 909 } 910 911 /* FIO_createFilename_fromOutDir() : 912 * Takes a source file name and specified output directory, and 913 * allocates memory for and returns a pointer to final path. 914 * This function never returns an error (it may abort() in case of pb) 915 */ 916 static char* 917 FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const size_t suffixLen) 918 { 919 const char* filenameStart; 920 char separator; 921 char* result; 922 923 #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ 924 separator = '\\'; 925 #else 926 separator = '/'; 927 #endif 928 929 filenameStart = extractFilename(path, separator); 930 #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ 931 filenameStart = extractFilename(filenameStart, '/'); /* sometimes, '/' separator is also used on Windows (mingw+msys2) */ 932 #endif 933 934 result = (char*) calloc(1, strlen(outDirName) + 1 + strlen(filenameStart) + suffixLen + 1); 935 if (!result) { 936 EXM_THROW(30, "zstd: FIO_createFilename_fromOutDir: %s", strerror(errno)); 937 } 938 939 memcpy(result, outDirName, strlen(outDirName)); 940 if (outDirName[strlen(outDirName)-1] == separator) { 941 memcpy(result + strlen(outDirName), filenameStart, strlen(filenameStart)); 942 } else { 943 memcpy(result + strlen(outDirName), &separator, 1); 944 memcpy(result + strlen(outDirName) + 1, filenameStart, strlen(filenameStart)); 945 } 946 947 return result; 948 } 949 950 /* FIO_highbit64() : 951 * gives position of highest bit. 952 * note : only works for v > 0 ! 953 */ 954 static unsigned FIO_highbit64(unsigned long long v) 955 { 956 unsigned count = 0; 957 assert(v != 0); 958 v >>= 1; 959 while (v) { v >>= 1; count++; } 960 return count; 961 } 962 963 static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs, 964 unsigned long long const dictSize, 965 unsigned long long const maxSrcFileSize) 966 { 967 unsigned long long maxSize = MAX(prefs->memLimit, MAX(dictSize, maxSrcFileSize)); 968 unsigned const maxWindowSize = (1U << ZSTD_WINDOWLOG_MAX); 969 if (maxSize == UTIL_FILESIZE_UNKNOWN) 970 EXM_THROW(42, "Using --patch-from with stdin requires --stream-size"); 971 assert(maxSize != UTIL_FILESIZE_UNKNOWN); 972 if (maxSize > maxWindowSize) 973 EXM_THROW(42, "Can't handle files larger than %u GB\n", maxWindowSize/(1 GB)); 974 FIO_setMemLimit(prefs, (unsigned)maxSize); 975 } 976 977 /* FIO_multiFilesConcatWarning() : 978 * This function handles logic when processing multiple files with -o or -c, displaying the appropriate warnings/prompts. 979 * Returns 1 if the console should abort, 0 if console should proceed. 980 * 981 * If output is stdout or test mode is active, check that `--rm` disabled. 982 * 983 * If there is just 1 file to process, zstd will proceed as usual. 984 * If each file get processed into its own separate destination file, proceed as usual. 985 * 986 * When multiple files are processed into a single output, 987 * display a warning message, then disable --rm if it's set. 988 * 989 * If -f is specified or if output is stdout, just proceed. 990 * If output is set with -o, prompt for confirmation. 991 */ 992 static int FIO_multiFilesConcatWarning(const FIO_ctx_t* fCtx, FIO_prefs_t* prefs, const char* outFileName, int displayLevelCutoff) 993 { 994 if (fCtx->hasStdoutOutput) { 995 if (prefs->removeSrcFile) 996 /* this should not happen ; hard fail, to protect user's data 997 * note: this should rather be an assert(), but we want to be certain that user's data will not be wiped out in case it nonetheless happen */ 998 EXM_THROW(43, "It's not allowed to remove input files when processed output is piped to stdout. " 999 "This scenario is not supposed to be possible. " 1000 "This is a programming error. File an issue for it to be fixed."); 1001 } 1002 if (prefs->testMode) { 1003 if (prefs->removeSrcFile) 1004 /* this should not happen ; hard fail, to protect user's data 1005 * note: this should rather be an assert(), but we want to be certain that user's data will not be wiped out in case it nonetheless happen */ 1006 EXM_THROW(43, "Test mode shall not remove input files! " 1007 "This scenario is not supposed to be possible. " 1008 "This is a programming error. File an issue for it to be fixed."); 1009 return 0; 1010 } 1011 1012 if (fCtx->nbFilesTotal == 1) return 0; 1013 assert(fCtx->nbFilesTotal > 1); 1014 1015 if (!outFileName) return 0; 1016 1017 if (fCtx->hasStdoutOutput) { 1018 DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into stdout. \n"); 1019 } else { 1020 DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into a single output file: %s \n", outFileName); 1021 } 1022 DISPLAYLEVEL(2, "The concatenated output CANNOT regenerate original file names nor directory structure. \n") 1023 1024 /* multi-input into single output : --rm is not allowed */ 1025 if (prefs->removeSrcFile) { 1026 DISPLAYLEVEL(2, "Since it's a destructive operation, input files will not be removed. \n"); 1027 prefs->removeSrcFile = 0; 1028 } 1029 1030 if (fCtx->hasStdoutOutput) return 0; 1031 if (prefs->overwrite) return 0; 1032 1033 /* multiple files concatenated into single destination file using -o without -f */ 1034 if (g_display_prefs.displayLevel <= displayLevelCutoff) { 1035 /* quiet mode => no prompt => fail automatically */ 1036 DISPLAYLEVEL(1, "Concatenating multiple processed inputs into a single output loses file metadata. \n"); 1037 DISPLAYLEVEL(1, "Aborting. \n"); 1038 return 1; 1039 } 1040 /* normal mode => prompt */ 1041 return UTIL_requireUserConfirmation("Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput); 1042 } 1043 1044 static ZSTD_inBuffer setInBuffer(const void* buf, size_t s, size_t pos) 1045 { 1046 ZSTD_inBuffer i; 1047 i.src = buf; 1048 i.size = s; 1049 i.pos = pos; 1050 return i; 1051 } 1052 1053 static ZSTD_outBuffer setOutBuffer(void* buf, size_t s, size_t pos) 1054 { 1055 ZSTD_outBuffer o; 1056 o.dst = buf; 1057 o.size = s; 1058 o.pos = pos; 1059 return o; 1060 } 1061 1062 #ifndef ZSTD_NOCOMPRESS 1063 1064 /* ********************************************************************** 1065 * Compression 1066 ************************************************************************/ 1067 typedef struct { 1068 FIO_Dict_t dict; 1069 const char* dictFileName; 1070 stat_t dictFileStat; 1071 ZSTD_CStream* cctx; 1072 WritePoolCtx_t *writeCtx; 1073 ReadPoolCtx_t *readCtx; 1074 } cRess_t; 1075 1076 /** ZSTD_cycleLog() : 1077 * condition for correct operation : hashLog > 1 */ 1078 static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) 1079 { 1080 U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); 1081 assert(hashLog > 1); 1082 return hashLog - btScale; 1083 } 1084 1085 static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs, 1086 ZSTD_compressionParameters* comprParams, 1087 unsigned long long const dictSize, 1088 unsigned long long const maxSrcFileSize, 1089 int cLevel) 1090 { 1091 unsigned const fileWindowLog = FIO_highbit64(maxSrcFileSize) + 1; 1092 ZSTD_compressionParameters const cParams = ZSTD_getCParams(cLevel, (size_t)maxSrcFileSize, (size_t)dictSize); 1093 FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, maxSrcFileSize); 1094 if (fileWindowLog > ZSTD_WINDOWLOG_MAX) 1095 DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n"); 1096 comprParams->windowLog = MAX(ZSTD_WINDOWLOG_MIN, MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog)); 1097 if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) { 1098 if (!prefs->ldmFlag) 1099 DISPLAYLEVEL(2, "long mode automatically triggered\n"); 1100 FIO_setLdmFlag(prefs, 1); 1101 } 1102 if (cParams.strategy >= ZSTD_btopt) { 1103 DISPLAYLEVEL(4, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n"); 1104 DISPLAYLEVEL(4, "- Set a larger targetLength (e.g. --zstd=targetLength=4096)\n"); 1105 DISPLAYLEVEL(4, "- Set a larger chainLog (e.g. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX); 1106 DISPLAYLEVEL(4, "- Set a larger LDM hashLog (e.g. --zstd=ldmHashLog=%u)\n", ZSTD_LDM_HASHLOG_MAX); 1107 DISPLAYLEVEL(4, "- Set a smaller LDM rateLog (e.g. --zstd=ldmHashRateLog=%u)\n", ZSTD_LDM_HASHRATELOG_MIN); 1108 DISPLAYLEVEL(4, "Also consider playing around with searchLog and hashLog\n"); 1109 } 1110 } 1111 1112 static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, 1113 const char* dictFileName, unsigned long long const maxSrcFileSize, 1114 int cLevel, ZSTD_compressionParameters comprParams) { 1115 int useMMap = prefs->mmapDict == ZSTD_ps_enable; 1116 int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable; 1117 FIO_dictBufferType_t dictBufferType; 1118 cRess_t ress; 1119 memset(&ress, 0, sizeof(ress)); 1120 1121 DISPLAYLEVEL(6, "FIO_createCResources \n"); 1122 ress.cctx = ZSTD_createCCtx(); 1123 if (ress.cctx == NULL) 1124 EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx", 1125 strerror(errno)); 1126 1127 FIO_getDictFileStat(dictFileName, &ress.dictFileStat); 1128 1129 /* need to update memLimit before calling createDictBuffer 1130 * because of memLimit check inside it */ 1131 if (prefs->patchFromMode) { 1132 U64 const dictSize = UTIL_getFileSizeStat(&ress.dictFileStat); 1133 unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize; 1134 useMMap |= dictSize > prefs->memLimit; 1135 FIO_adjustParamsForPatchFromMode(prefs, &comprParams, dictSize, ssSize > 0 ? ssSize : maxSrcFileSize, cLevel); 1136 } 1137 1138 dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict; 1139 FIO_initDict(&ress.dict, dictFileName, prefs, &ress.dictFileStat, dictBufferType); /* works with dictFileName==NULL */ 1140 1141 ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_CStreamOutSize()); 1142 ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_CStreamInSize()); 1143 1144 /* Advanced parameters, including dictionary */ 1145 if (dictFileName && (ress.dict.dictBuffer==NULL)) 1146 EXM_THROW(32, "allocation error : can't create dictBuffer"); 1147 ress.dictFileName = dictFileName; 1148 1149 if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog) 1150 comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT; 1151 1152 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, prefs->contentSize) ); /* always enable content size when available (note: supposed to be default) */ 1153 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) ); 1154 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) ); 1155 /* compression level */ 1156 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) ); 1157 /* max compressed block size */ 1158 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) ); 1159 /* source size hint */ 1160 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) ); 1161 /* long distance matching */ 1162 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) ); 1163 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) ); 1164 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) ); 1165 if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) { 1166 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) ); 1167 } 1168 if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) { 1169 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) ); 1170 } 1171 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_useRowMatchFinder, prefs->useRowMatchFinder)); 1172 /* compression parameters */ 1173 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) ); 1174 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) ); 1175 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) ); 1176 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) ); 1177 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) ); 1178 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) ); 1179 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, (int)comprParams.strategy) ); 1180 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) ); 1181 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) ); 1182 /* multi-threading */ 1183 #ifdef ZSTD_MULTITHREAD 1184 DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers); 1185 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) ); 1186 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) ); 1187 if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) { 1188 DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog); 1189 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) ); 1190 } 1191 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) ); 1192 #endif 1193 /* dictionary */ 1194 if (prefs->patchFromMode) { 1195 CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) ); 1196 } else { 1197 CHECK( ZSTD_CCtx_loadDictionary_byReference(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) ); 1198 } 1199 1200 return ress; 1201 } 1202 1203 static void FIO_freeCResources(cRess_t* const ress) 1204 { 1205 FIO_freeDict(&(ress->dict)); 1206 AIO_WritePool_free(ress->writeCtx); 1207 AIO_ReadPool_free(ress->readCtx); 1208 ZSTD_freeCStream(ress->cctx); /* never fails */ 1209 } 1210 1211 1212 #ifdef ZSTD_GZCOMPRESS 1213 static unsigned long long 1214 FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but not changed */ 1215 const char* srcFileName, U64 const srcFileSize, 1216 int compressionLevel, U64* readsize) 1217 { 1218 unsigned long long inFileSize = 0, outFileSize = 0; 1219 z_stream strm; 1220 IOJob_t *writeJob = NULL; 1221 1222 if (compressionLevel > Z_BEST_COMPRESSION) 1223 compressionLevel = Z_BEST_COMPRESSION; 1224 1225 strm.zalloc = Z_NULL; 1226 strm.zfree = Z_NULL; 1227 strm.opaque = Z_NULL; 1228 1229 { int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED, 1230 15 /* maxWindowLogSize */ + 16 /* gzip only */, 1231 8, Z_DEFAULT_STRATEGY); /* see https://www.zlib.net/manual.html */ 1232 if (ret != Z_OK) { 1233 EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret); 1234 } } 1235 1236 writeJob = AIO_WritePool_acquireJob(ress->writeCtx); 1237 strm.next_in = 0; 1238 strm.avail_in = 0; 1239 strm.next_out = (Bytef*)writeJob->buffer; 1240 strm.avail_out = (uInt)writeJob->bufferSize; 1241 1242 while (1) { 1243 int ret; 1244 if (strm.avail_in == 0) { 1245 AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize()); 1246 if (ress->readCtx->srcBufferLoaded == 0) break; 1247 inFileSize += ress->readCtx->srcBufferLoaded; 1248 strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer; 1249 strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded; 1250 } 1251 1252 { 1253 size_t const availBefore = strm.avail_in; 1254 ret = deflate(&strm, Z_NO_FLUSH); 1255 AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in); 1256 } 1257 1258 if (ret != Z_OK) 1259 EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret); 1260 { size_t const cSize = writeJob->bufferSize - strm.avail_out; 1261 if (cSize) { 1262 writeJob->usedBufferSize = cSize; 1263 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); 1264 outFileSize += cSize; 1265 strm.next_out = (Bytef*)writeJob->buffer; 1266 strm.avail_out = (uInt)writeJob->bufferSize; 1267 } } 1268 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) { 1269 DISPLAYUPDATE_PROGRESS( 1270 "\rRead : %u MB ==> %.2f%% ", 1271 (unsigned)(inFileSize>>20), 1272 (double)outFileSize/(double)inFileSize*100) 1273 } else { 1274 DISPLAYUPDATE_PROGRESS( 1275 "\rRead : %u / %u MB ==> %.2f%% ", 1276 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), 1277 (double)outFileSize/(double)inFileSize*100); 1278 } } 1279 1280 while (1) { 1281 int const ret = deflate(&strm, Z_FINISH); 1282 { size_t const cSize = writeJob->bufferSize - strm.avail_out; 1283 if (cSize) { 1284 writeJob->usedBufferSize = cSize; 1285 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); 1286 outFileSize += cSize; 1287 strm.next_out = (Bytef*)writeJob->buffer; 1288 strm.avail_out = (uInt)writeJob->bufferSize; 1289 } } 1290 if (ret == Z_STREAM_END) break; 1291 if (ret != Z_BUF_ERROR) 1292 EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret); 1293 } 1294 1295 { int const ret = deflateEnd(&strm); 1296 if (ret != Z_OK) { 1297 EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret); 1298 } } 1299 *readsize = inFileSize; 1300 AIO_WritePool_releaseIoJob(writeJob); 1301 AIO_WritePool_sparseWriteEnd(ress->writeCtx); 1302 return outFileSize; 1303 } 1304 #endif 1305 1306 1307 #ifdef ZSTD_LZMACOMPRESS 1308 static unsigned long long 1309 FIO_compressLzmaFrame(cRess_t* ress, 1310 const char* srcFileName, U64 const srcFileSize, 1311 int compressionLevel, U64* readsize, int plain_lzma) 1312 { 1313 unsigned long long inFileSize = 0, outFileSize = 0; 1314 lzma_stream strm = LZMA_STREAM_INIT; 1315 lzma_action action = LZMA_RUN; 1316 lzma_ret ret; 1317 IOJob_t *writeJob = NULL; 1318 1319 if (compressionLevel < 0) compressionLevel = 0; 1320 if (compressionLevel > 9) compressionLevel = 9; 1321 1322 if (plain_lzma) { 1323 lzma_options_lzma opt_lzma; 1324 if (lzma_lzma_preset(&opt_lzma, compressionLevel)) 1325 EXM_THROW(81, "zstd: %s: lzma_lzma_preset error", srcFileName); 1326 ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */ 1327 if (ret != LZMA_OK) 1328 EXM_THROW(82, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret); 1329 } else { 1330 ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */ 1331 if (ret != LZMA_OK) 1332 EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret); 1333 } 1334 1335 writeJob =AIO_WritePool_acquireJob(ress->writeCtx); 1336 strm.next_out = (BYTE*)writeJob->buffer; 1337 strm.avail_out = writeJob->bufferSize; 1338 strm.next_in = 0; 1339 strm.avail_in = 0; 1340 1341 while (1) { 1342 if (strm.avail_in == 0) { 1343 size_t const inSize = AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize()); 1344 if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH; 1345 inFileSize += inSize; 1346 strm.next_in = (BYTE const*)ress->readCtx->srcBuffer; 1347 strm.avail_in = ress->readCtx->srcBufferLoaded; 1348 } 1349 1350 { 1351 size_t const availBefore = strm.avail_in; 1352 ret = lzma_code(&strm, action); 1353 AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in); 1354 } 1355 1356 1357 if (ret != LZMA_OK && ret != LZMA_STREAM_END) 1358 EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret); 1359 { size_t const compBytes = writeJob->bufferSize - strm.avail_out; 1360 if (compBytes) { 1361 writeJob->usedBufferSize = compBytes; 1362 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); 1363 outFileSize += compBytes; 1364 strm.next_out = (BYTE*)writeJob->buffer; 1365 strm.avail_out = writeJob->bufferSize; 1366 } } 1367 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) 1368 DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%", 1369 (unsigned)(inFileSize>>20), 1370 (double)outFileSize/(double)inFileSize*100) 1371 else 1372 DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%", 1373 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), 1374 (double)outFileSize/(double)inFileSize*100); 1375 if (ret == LZMA_STREAM_END) break; 1376 } 1377 1378 lzma_end(&strm); 1379 *readsize = inFileSize; 1380 1381 AIO_WritePool_releaseIoJob(writeJob); 1382 AIO_WritePool_sparseWriteEnd(ress->writeCtx); 1383 1384 return outFileSize; 1385 } 1386 #endif 1387 1388 #ifdef ZSTD_LZ4COMPRESS 1389 1390 #if LZ4_VERSION_NUMBER <= 10600 1391 #define LZ4F_blockLinked blockLinked 1392 #define LZ4F_max64KB max64KB 1393 #endif 1394 1395 static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); } 1396 1397 static unsigned long long 1398 FIO_compressLz4Frame(cRess_t* ress, 1399 const char* srcFileName, U64 const srcFileSize, 1400 int compressionLevel, int checksumFlag, 1401 U64* readsize) 1402 { 1403 const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB); 1404 unsigned long long inFileSize = 0, outFileSize = 0; 1405 1406 LZ4F_preferences_t prefs; 1407 LZ4F_compressionContext_t ctx; 1408 1409 IOJob_t* writeJob = AIO_WritePool_acquireJob(ress->writeCtx); 1410 1411 LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); 1412 if (LZ4F_isError(errorCode)) 1413 EXM_THROW(31, "zstd: failed to create lz4 compression context"); 1414 1415 memset(&prefs, 0, sizeof(prefs)); 1416 1417 assert(blockSize <= ress->readCtx->base.jobBufferSize); 1418 1419 /* autoflush off to mitigate a bug in lz4<=1.9.3 for compression level 12 */ 1420 prefs.autoFlush = 0; 1421 prefs.compressionLevel = compressionLevel; 1422 prefs.frameInfo.blockMode = LZ4F_blockLinked; 1423 prefs.frameInfo.blockSizeID = LZ4F_max64KB; 1424 prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)checksumFlag; 1425 #if LZ4_VERSION_NUMBER >= 10600 1426 prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize; 1427 #endif 1428 assert(LZ4F_compressBound(blockSize, &prefs) <= writeJob->bufferSize); 1429 1430 { 1431 size_t headerSize = LZ4F_compressBegin(ctx, writeJob->buffer, writeJob->bufferSize, &prefs); 1432 if (LZ4F_isError(headerSize)) 1433 EXM_THROW(33, "File header generation failed : %s", 1434 LZ4F_getErrorName(headerSize)); 1435 writeJob->usedBufferSize = headerSize; 1436 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); 1437 outFileSize += headerSize; 1438 1439 /* Read first block */ 1440 inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); 1441 1442 /* Main Loop */ 1443 while (ress->readCtx->srcBufferLoaded) { 1444 size_t inSize = MIN(blockSize, ress->readCtx->srcBufferLoaded); 1445 size_t const outSize = LZ4F_compressUpdate(ctx, writeJob->buffer, writeJob->bufferSize, 1446 ress->readCtx->srcBuffer, inSize, NULL); 1447 if (LZ4F_isError(outSize)) 1448 EXM_THROW(35, "zstd: %s: lz4 compression failed : %s", 1449 srcFileName, LZ4F_getErrorName(outSize)); 1450 outFileSize += outSize; 1451 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) { 1452 DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%", 1453 (unsigned)(inFileSize>>20), 1454 (double)outFileSize/(double)inFileSize*100) 1455 } else { 1456 DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%", 1457 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), 1458 (double)outFileSize/(double)inFileSize*100); 1459 } 1460 1461 /* Write Block */ 1462 writeJob->usedBufferSize = outSize; 1463 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); 1464 1465 /* Read next block */ 1466 AIO_ReadPool_consumeBytes(ress->readCtx, inSize); 1467 inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); 1468 } 1469 1470 /* End of Stream mark */ 1471 headerSize = LZ4F_compressEnd(ctx, writeJob->buffer, writeJob->bufferSize, NULL); 1472 if (LZ4F_isError(headerSize)) 1473 EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s", 1474 srcFileName, LZ4F_getErrorName(headerSize)); 1475 1476 writeJob->usedBufferSize = headerSize; 1477 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); 1478 outFileSize += headerSize; 1479 } 1480 1481 *readsize = inFileSize; 1482 LZ4F_freeCompressionContext(ctx); 1483 AIO_WritePool_releaseIoJob(writeJob); 1484 AIO_WritePool_sparseWriteEnd(ress->writeCtx); 1485 1486 return outFileSize; 1487 } 1488 #endif 1489 1490 static unsigned long long 1491 FIO_compressZstdFrame(FIO_ctx_t* const fCtx, 1492 FIO_prefs_t* const prefs, 1493 const cRess_t* ressPtr, 1494 const char* srcFileName, U64 fileSize, 1495 int compressionLevel, U64* readsize) 1496 { 1497 cRess_t const ress = *ressPtr; 1498 IOJob_t* writeJob = AIO_WritePool_acquireJob(ressPtr->writeCtx); 1499 1500 U64 compressedfilesize = 0; 1501 ZSTD_EndDirective directive = ZSTD_e_continue; 1502 U64 pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; 1503 1504 /* stats */ 1505 ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 }; 1506 ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 }; 1507 typedef enum { noChange, slower, faster } speedChange_e; 1508 speedChange_e speedChange = noChange; 1509 unsigned flushWaiting = 0; 1510 unsigned inputPresented = 0; 1511 unsigned inputBlocked = 0; 1512 unsigned lastJobID = 0; 1513 UTIL_time_t lastAdaptTime = UTIL_getTime(); 1514 U64 const adaptEveryMicro = REFRESH_RATE; 1515 1516 UTIL_HumanReadableSize_t const file_hrs = UTIL_makeHumanReadableSize(fileSize); 1517 1518 DISPLAYLEVEL(6, "compression using zstd format \n"); 1519 1520 /* init */ 1521 if (fileSize != UTIL_FILESIZE_UNKNOWN) { 1522 pledgedSrcSize = fileSize; 1523 CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize)); 1524 } else if (prefs->streamSrcSize > 0) { 1525 /* unknown source size; use the declared stream size */ 1526 pledgedSrcSize = prefs->streamSrcSize; 1527 CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) ); 1528 } 1529 1530 { int windowLog; 1531 UTIL_HumanReadableSize_t windowSize; 1532 CHECK(ZSTD_CCtx_getParameter(ress.cctx, ZSTD_c_windowLog, &windowLog)); 1533 if (windowLog == 0) { 1534 if (prefs->ldmFlag) { 1535 /* If long mode is set without a window size libzstd will set this size internally */ 1536 windowLog = ZSTD_WINDOWLOG_LIMIT_DEFAULT; 1537 } else { 1538 const ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, fileSize, 0); 1539 windowLog = (int)cParams.windowLog; 1540 } 1541 } 1542 windowSize = UTIL_makeHumanReadableSize(MAX(1ULL, MIN(1ULL << windowLog, pledgedSrcSize))); 1543 DISPLAYLEVEL(4, "Decompression will require %.*f%s of memory\n", windowSize.precision, windowSize.value, windowSize.suffix); 1544 } 1545 1546 /* Main compression loop */ 1547 do { 1548 size_t stillToFlush; 1549 /* Fill input Buffer */ 1550 size_t const inSize = AIO_ReadPool_fillBuffer(ress.readCtx, ZSTD_CStreamInSize()); 1551 ZSTD_inBuffer inBuff = setInBuffer( ress.readCtx->srcBuffer, ress.readCtx->srcBufferLoaded, 0 ); 1552 DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize); 1553 *readsize += inSize; 1554 1555 if ((ress.readCtx->srcBufferLoaded == 0) || (*readsize == fileSize)) 1556 directive = ZSTD_e_end; 1557 1558 stillToFlush = 1; 1559 while ((inBuff.pos != inBuff.size) /* input buffer must be entirely ingested */ 1560 || (directive == ZSTD_e_end && stillToFlush != 0) ) { 1561 1562 size_t const oldIPos = inBuff.pos; 1563 ZSTD_outBuffer outBuff = setOutBuffer( writeJob->buffer, writeJob->bufferSize, 0 ); 1564 size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx); 1565 CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive)); 1566 AIO_ReadPool_consumeBytes(ress.readCtx, inBuff.pos - oldIPos); 1567 1568 /* count stats */ 1569 inputPresented++; 1570 if (oldIPos == inBuff.pos) inputBlocked++; /* input buffer is full and can't take any more : input speed is faster than consumption rate */ 1571 if (!toFlushNow) flushWaiting = 1; 1572 1573 /* Write compressed stream */ 1574 DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n", 1575 (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos); 1576 if (outBuff.pos) { 1577 writeJob->usedBufferSize = outBuff.pos; 1578 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); 1579 compressedfilesize += outBuff.pos; 1580 } 1581 1582 /* adaptive mode : statistics measurement and speed correction */ 1583 if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) { 1584 ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx); 1585 1586 lastAdaptTime = UTIL_getTime(); 1587 1588 /* check output speed */ 1589 if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */ 1590 1591 unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced; 1592 unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed; 1593 assert(zfp.produced >= previous_zfp_update.produced); 1594 assert(prefs->nbWorkers >= 1); 1595 1596 /* test if compression is blocked 1597 * either because output is slow and all buffers are full 1598 * or because input is slow and no job can start while waiting for at least one buffer to be filled. 1599 * note : exclude starting part, since currentJobID > 1 */ 1600 if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/ 1601 && (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */ 1602 ) { 1603 DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n") 1604 speedChange = slower; 1605 } 1606 1607 previous_zfp_update = zfp; 1608 1609 if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */ 1610 && (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */ 1611 ) { 1612 DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed); 1613 speedChange = slower; 1614 } 1615 flushWaiting = 0; 1616 } 1617 1618 /* course correct only if there is at least one new job completed */ 1619 if (zfp.currentJobID > lastJobID) { 1620 DISPLAYLEVEL(6, "compression level adaptation check \n") 1621 1622 /* check input speed */ 1623 if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */ 1624 if (inputBlocked <= 0) { 1625 DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n"); 1626 speedChange = slower; 1627 } else if (speedChange == noChange) { 1628 unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested; 1629 unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed; 1630 unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced; 1631 unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed; 1632 previous_zfp_correction = zfp; 1633 assert(inputPresented > 0); 1634 DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n", 1635 inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100, 1636 (unsigned)newlyIngested, (unsigned)newlyConsumed, 1637 (unsigned)newlyFlushed, (unsigned)newlyProduced); 1638 if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */ 1639 && (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */ 1640 && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */ 1641 ) { 1642 DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n", 1643 newlyIngested, newlyConsumed, newlyProduced, newlyFlushed); 1644 speedChange = faster; 1645 } 1646 } 1647 inputBlocked = 0; 1648 inputPresented = 0; 1649 } 1650 1651 if (speedChange == slower) { 1652 DISPLAYLEVEL(6, "slower speed , higher compression \n") 1653 compressionLevel ++; 1654 if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel(); 1655 if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel; 1656 compressionLevel += (compressionLevel == 0); /* skip 0 */ 1657 ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); 1658 } 1659 if (speedChange == faster) { 1660 DISPLAYLEVEL(6, "faster speed , lighter compression \n") 1661 compressionLevel --; 1662 if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel; 1663 compressionLevel -= (compressionLevel == 0); /* skip 0 */ 1664 ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); 1665 } 1666 speedChange = noChange; 1667 1668 lastJobID = zfp.currentJobID; 1669 } /* if (zfp.currentJobID > lastJobID) */ 1670 } /* if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) */ 1671 1672 /* display notification */ 1673 if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) { 1674 ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx); 1675 double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100; 1676 UTIL_HumanReadableSize_t const buffered_hrs = UTIL_makeHumanReadableSize(zfp.ingested - zfp.consumed); 1677 UTIL_HumanReadableSize_t const consumed_hrs = UTIL_makeHumanReadableSize(zfp.consumed); 1678 UTIL_HumanReadableSize_t const produced_hrs = UTIL_makeHumanReadableSize(zfp.produced); 1679 1680 DELAY_NEXT_UPDATE(); 1681 1682 /* display progress notifications */ 1683 DISPLAY_PROGRESS("\r%79s\r", ""); /* Clear out the current displayed line */ 1684 if (g_display_prefs.displayLevel >= 3) { 1685 /* Verbose progress update */ 1686 DISPLAY_PROGRESS( 1687 "(L%i) Buffered:%5.*f%s - Consumed:%5.*f%s - Compressed:%5.*f%s => %.2f%% ", 1688 compressionLevel, 1689 buffered_hrs.precision, buffered_hrs.value, buffered_hrs.suffix, 1690 consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix, 1691 produced_hrs.precision, produced_hrs.value, produced_hrs.suffix, 1692 cShare ); 1693 } else { 1694 /* Require level 2 or forcibly displayed progress counter for summarized updates */ 1695 if (fCtx->nbFilesTotal > 1) { 1696 size_t srcFileNameSize = strlen(srcFileName); 1697 /* Ensure that the string we print is roughly the same size each time */ 1698 if (srcFileNameSize > 18) { 1699 const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15; 1700 DISPLAY_PROGRESS("Compress: %u/%u files. Current: ...%s ", 1701 fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName); 1702 } else { 1703 DISPLAY_PROGRESS("Compress: %u/%u files. Current: %*s ", 1704 fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName); 1705 } 1706 } 1707 DISPLAY_PROGRESS("Read:%6.*f%4s ", consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix); 1708 if (fileSize != UTIL_FILESIZE_UNKNOWN) 1709 DISPLAY_PROGRESS("/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix); 1710 DISPLAY_PROGRESS(" ==> %2.f%%", cShare); 1711 } 1712 } /* if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) */ 1713 } /* while ((inBuff.pos != inBuff.size) */ 1714 } while (directive != ZSTD_e_end); 1715 1716 if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) { 1717 EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B", 1718 (unsigned long long)*readsize, (unsigned long long)fileSize); 1719 } 1720 1721 AIO_WritePool_releaseIoJob(writeJob); 1722 AIO_WritePool_sparseWriteEnd(ressPtr->writeCtx); 1723 1724 return compressedfilesize; 1725 } 1726 1727 /*! FIO_compressFilename_internal() : 1728 * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened. 1729 * @return : 0 : compression completed correctly, 1730 * 1 : missing or pb opening srcFileName 1731 */ 1732 static int 1733 FIO_compressFilename_internal(FIO_ctx_t* const fCtx, 1734 FIO_prefs_t* const prefs, 1735 cRess_t ress, 1736 const char* dstFileName, const char* srcFileName, 1737 int compressionLevel) 1738 { 1739 UTIL_time_t const timeStart = UTIL_getTime(); 1740 clock_t const cpuStart = clock(); 1741 U64 readsize = 0; 1742 U64 compressedfilesize = 0; 1743 U64 const fileSize = UTIL_getFileSize(srcFileName); 1744 DISPLAYLEVEL(5, "%s: %llu bytes \n", srcFileName, (unsigned long long)fileSize); 1745 1746 /* compression format selection */ 1747 switch (prefs->compressionType) { 1748 default: 1749 case FIO_zstdCompression: 1750 compressedfilesize = FIO_compressZstdFrame(fCtx, prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize); 1751 break; 1752 1753 case FIO_gzipCompression: 1754 #ifdef ZSTD_GZCOMPRESS 1755 compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize); 1756 #else 1757 (void)compressionLevel; 1758 EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", 1759 srcFileName); 1760 #endif 1761 break; 1762 1763 case FIO_xzCompression: 1764 case FIO_lzmaCompression: 1765 #ifdef ZSTD_LZMACOMPRESS 1766 compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_lzmaCompression); 1767 #else 1768 (void)compressionLevel; 1769 EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n", 1770 srcFileName); 1771 #endif 1772 break; 1773 1774 case FIO_lz4Compression: 1775 #ifdef ZSTD_LZ4COMPRESS 1776 compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, prefs->checksumFlag, &readsize); 1777 #else 1778 (void)compressionLevel; 1779 EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n", 1780 srcFileName); 1781 #endif 1782 break; 1783 } 1784 1785 /* Status */ 1786 fCtx->totalBytesInput += (size_t)readsize; 1787 fCtx->totalBytesOutput += (size_t)compressedfilesize; 1788 DISPLAY_PROGRESS("\r%79s\r", ""); 1789 if (FIO_shouldDisplayFileSummary(fCtx)) { 1790 UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) readsize); 1791 UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) compressedfilesize); 1792 if (readsize == 0) { 1793 DISPLAY_SUMMARY("%-20s : (%6.*f%s => %6.*f%s, %s) \n", 1794 srcFileName, 1795 hr_isize.precision, hr_isize.value, hr_isize.suffix, 1796 hr_osize.precision, hr_osize.value, hr_osize.suffix, 1797 dstFileName); 1798 } else { 1799 DISPLAY_SUMMARY("%-20s :%6.2f%% (%6.*f%s => %6.*f%s, %s) \n", 1800 srcFileName, 1801 (double)compressedfilesize / (double)readsize * 100, 1802 hr_isize.precision, hr_isize.value, hr_isize.suffix, 1803 hr_osize.precision, hr_osize.value, hr_osize.suffix, 1804 dstFileName); 1805 } 1806 } 1807 1808 /* Elapsed Time and CPU Load */ 1809 { clock_t const cpuEnd = clock(); 1810 double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC; 1811 U64 const timeLength_ns = UTIL_clockSpanNano(timeStart); 1812 double const timeLength_s = (double)timeLength_ns / 1000000000; 1813 double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100; 1814 DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec (cpu load : %.0f%%)\n", 1815 srcFileName, timeLength_s, cpuLoad_pct); 1816 } 1817 return 0; 1818 } 1819 1820 1821 /*! FIO_compressFilename_dstFile() : 1822 * open dstFileName, or pass-through if ress.file != NULL, 1823 * then start compression with FIO_compressFilename_internal(). 1824 * Manages source removal (--rm) and file permissions transfer. 1825 * note : ress.srcFile must be != NULL, 1826 * so reach this function through FIO_compressFilename_srcFile(). 1827 * @return : 0 : compression completed correctly, 1828 * 1 : pb 1829 */ 1830 static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx, 1831 FIO_prefs_t* const prefs, 1832 cRess_t ress, 1833 const char* dstFileName, 1834 const char* srcFileName, 1835 const stat_t* srcFileStat, 1836 int compressionLevel) 1837 { 1838 int closeDstFile = 0; 1839 int result; 1840 int transferStat = 0; 1841 int dstFd = -1; 1842 1843 assert(AIO_ReadPool_getFile(ress.readCtx) != NULL); 1844 if (AIO_WritePool_getFile(ress.writeCtx) == NULL) { 1845 int dstFileInitialPermissions = DEFAULT_FILE_PERMISSIONS; 1846 if ( strcmp (srcFileName, stdinmark) 1847 && strcmp (dstFileName, stdoutmark) 1848 && UTIL_isRegularFileStat(srcFileStat) ) { 1849 transferStat = 1; 1850 dstFileInitialPermissions = TEMPORARY_FILE_PERMISSIONS; 1851 } 1852 1853 closeDstFile = 1; 1854 DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName); 1855 { FILE *dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFileInitialPermissions); 1856 if (dstFile==NULL) return 1; /* could not open dstFileName */ 1857 dstFd = fileno(dstFile); 1858 AIO_WritePool_setFile(ress.writeCtx, dstFile); 1859 } 1860 /* Must only be added after FIO_openDstFile() succeeds. 1861 * Otherwise we may delete the destination file if it already exists, 1862 * and the user presses Ctrl-C when asked if they wish to overwrite. 1863 */ 1864 addHandler(dstFileName); 1865 } 1866 1867 result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); 1868 1869 if (closeDstFile) { 1870 clearHandler(); 1871 1872 if (transferStat) { 1873 UTIL_setFDStat(dstFd, dstFileName, srcFileStat); 1874 } 1875 1876 DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName); 1877 if (AIO_WritePool_closeFile(ress.writeCtx)) { /* error closing file */ 1878 DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); 1879 result=1; 1880 } 1881 1882 if (transferStat) { 1883 UTIL_utime(dstFileName, srcFileStat); 1884 } 1885 1886 if ( (result != 0) /* operation failure */ 1887 && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */ 1888 ) { 1889 FIO_removeFile(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */ 1890 } 1891 } 1892 1893 return result; 1894 } 1895 1896 /* List used to compare file extensions (used with --exclude-compressed flag) 1897 * Different from the suffixList and should only apply to ZSTD compress operationResult 1898 */ 1899 static const char *compressedFileExtensions[] = { 1900 ZSTD_EXTENSION, 1901 TZSTD_EXTENSION, 1902 GZ_EXTENSION, 1903 TGZ_EXTENSION, 1904 LZMA_EXTENSION, 1905 XZ_EXTENSION, 1906 TXZ_EXTENSION, 1907 LZ4_EXTENSION, 1908 TLZ4_EXTENSION, 1909 ".7z", 1910 ".aa3", 1911 ".aac", 1912 ".aar", 1913 ".ace", 1914 ".alac", 1915 ".ape", 1916 ".apk", 1917 ".apng", 1918 ".arc", 1919 ".archive", 1920 ".arj", 1921 ".ark", 1922 ".asf", 1923 ".avi", 1924 ".avif", 1925 ".ba", 1926 ".br", 1927 ".bz2", 1928 ".cab", 1929 ".cdx", 1930 ".chm", 1931 ".cr2", 1932 ".divx", 1933 ".dmg", 1934 ".dng", 1935 ".docm", 1936 ".docx", 1937 ".dotm", 1938 ".dotx", 1939 ".dsft", 1940 ".ear", 1941 ".eftx", 1942 ".emz", 1943 ".eot", 1944 ".epub", 1945 ".f4v", 1946 ".flac", 1947 ".flv", 1948 ".gho", 1949 ".gif", 1950 ".gifv", 1951 ".gnp", 1952 ".iso", 1953 ".jar", 1954 ".jpeg", 1955 ".jpg", 1956 ".jxl", 1957 ".lz", 1958 ".lzh", 1959 ".m4a", 1960 ".m4v", 1961 ".mkv", 1962 ".mov", 1963 ".mp2", 1964 ".mp3", 1965 ".mp4", 1966 ".mpa", 1967 ".mpc", 1968 ".mpe", 1969 ".mpeg", 1970 ".mpg", 1971 ".mpl", 1972 ".mpv", 1973 ".msi", 1974 ".odp", 1975 ".ods", 1976 ".odt", 1977 ".ogg", 1978 ".ogv", 1979 ".otp", 1980 ".ots", 1981 ".ott", 1982 ".pea", 1983 ".png", 1984 ".pptx", 1985 ".qt", 1986 ".rar", 1987 ".s7z", 1988 ".sfx", 1989 ".sit", 1990 ".sitx", 1991 ".sqx", 1992 ".svgz", 1993 ".swf", 1994 ".tbz2", 1995 ".tib", 1996 ".tlz", 1997 ".vob", 1998 ".war", 1999 ".webm", 2000 ".webp", 2001 ".wma", 2002 ".wmv", 2003 ".woff", 2004 ".woff2", 2005 ".wvl", 2006 ".xlsx", 2007 ".xpi", 2008 ".xps", 2009 ".zip", 2010 ".zipx", 2011 ".zoo", 2012 ".zpaq", 2013 NULL 2014 }; 2015 2016 /*! FIO_compressFilename_srcFile() : 2017 * @return : 0 : compression completed correctly, 2018 * 1 : missing or pb opening srcFileName 2019 */ 2020 static int 2021 FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx, 2022 FIO_prefs_t* const prefs, 2023 cRess_t ress, 2024 const char* dstFileName, 2025 const char* srcFileName, 2026 int compressionLevel) 2027 { 2028 int result; 2029 FILE* srcFile; 2030 stat_t srcFileStat; 2031 U64 fileSize = UTIL_FILESIZE_UNKNOWN; 2032 DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName); 2033 2034 if (strcmp(srcFileName, stdinmark)) { 2035 if (UTIL_stat(srcFileName, &srcFileStat)) { 2036 /* failure to stat at all is handled during opening */ 2037 2038 /* ensure src is not a directory */ 2039 if (UTIL_isDirectoryStat(&srcFileStat)) { 2040 DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); 2041 return 1; 2042 } 2043 2044 /* ensure src is not the same as dict (if present) */ 2045 if (ress.dictFileName != NULL && UTIL_isSameFileStat(srcFileName, ress.dictFileName, &srcFileStat, &ress.dictFileStat)) { 2046 DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName); 2047 return 1; 2048 } 2049 } 2050 } 2051 2052 /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used 2053 * YES => ZSTD will skip compression of the file and will return 0. 2054 * NO => ZSTD will resume with compress operation. 2055 */ 2056 if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) { 2057 DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName); 2058 return 0; 2059 } 2060 2061 srcFile = FIO_openSrcFile(prefs, srcFileName, &srcFileStat); 2062 if (srcFile == NULL) return 1; /* srcFile could not be opened */ 2063 2064 /* Don't use AsyncIO for small files */ 2065 if (strcmp(srcFileName, stdinmark)) /* Stdin doesn't have stats */ 2066 fileSize = UTIL_getFileSizeStat(&srcFileStat); 2067 if(fileSize != UTIL_FILESIZE_UNKNOWN && fileSize < ZSTD_BLOCKSIZE_MAX * 3) { 2068 AIO_ReadPool_setAsync(ress.readCtx, 0); 2069 AIO_WritePool_setAsync(ress.writeCtx, 0); 2070 } else { 2071 AIO_ReadPool_setAsync(ress.readCtx, 1); 2072 AIO_WritePool_setAsync(ress.writeCtx, 1); 2073 } 2074 2075 AIO_ReadPool_setFile(ress.readCtx, srcFile); 2076 result = FIO_compressFilename_dstFile( 2077 fCtx, prefs, ress, 2078 dstFileName, srcFileName, 2079 &srcFileStat, compressionLevel); 2080 AIO_ReadPool_closeFile(ress.readCtx); 2081 2082 if ( prefs->removeSrcFile /* --rm */ 2083 && result == 0 /* success */ 2084 && strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */ 2085 ) { 2086 /* We must clear the handler, since after this point calling it would 2087 * delete both the source and destination files. 2088 */ 2089 clearHandler(); 2090 if (FIO_removeFile(srcFileName)) 2091 EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno)); 2092 } 2093 return result; 2094 } 2095 2096 static const char* 2097 checked_index(const char* options[], size_t length, size_t index) { 2098 assert(index < length); 2099 /* Necessary to avoid warnings since -O3 will omit the above `assert` */ 2100 (void) length; 2101 return options[index]; 2102 } 2103 2104 #define INDEX(options, index) checked_index((options), sizeof(options) / sizeof(char*), (size_t)(index)) 2105 2106 void FIO_displayCompressionParameters(const FIO_prefs_t* prefs) 2107 { 2108 static const char* formatOptions[5] = {ZSTD_EXTENSION, GZ_EXTENSION, XZ_EXTENSION, 2109 LZMA_EXTENSION, LZ4_EXTENSION}; 2110 static const char* sparseOptions[3] = {" --no-sparse", "", " --sparse"}; 2111 static const char* checkSumOptions[3] = {" --no-check", "", " --check"}; 2112 static const char* rowMatchFinderOptions[3] = {"", " --no-row-match-finder", " --row-match-finder"}; 2113 static const char* compressLiteralsOptions[3] = {"", " --compress-literals", " --no-compress-literals"}; 2114 2115 assert(g_display_prefs.displayLevel >= 4); 2116 2117 DISPLAY("--format=%s", formatOptions[prefs->compressionType]); 2118 DISPLAY("%s", INDEX(sparseOptions, prefs->sparseFileSupport)); 2119 DISPLAY("%s", prefs->dictIDFlag ? "" : " --no-dictID"); 2120 DISPLAY("%s", INDEX(checkSumOptions, prefs->checksumFlag)); 2121 DISPLAY(" --block-size=%d", prefs->blockSize); 2122 if (prefs->adaptiveMode) 2123 DISPLAY(" --adapt=min=%d,max=%d", prefs->minAdaptLevel, prefs->maxAdaptLevel); 2124 DISPLAY("%s", INDEX(rowMatchFinderOptions, prefs->useRowMatchFinder)); 2125 DISPLAY("%s", prefs->rsyncable ? " --rsyncable" : ""); 2126 if (prefs->streamSrcSize) 2127 DISPLAY(" --stream-size=%u", (unsigned) prefs->streamSrcSize); 2128 if (prefs->srcSizeHint) 2129 DISPLAY(" --size-hint=%d", prefs->srcSizeHint); 2130 if (prefs->targetCBlockSize) 2131 DISPLAY(" --target-compressed-block-size=%u", (unsigned) prefs->targetCBlockSize); 2132 DISPLAY("%s", INDEX(compressLiteralsOptions, prefs->literalCompressionMode)); 2133 DISPLAY(" --memory=%u", prefs->memLimit ? prefs->memLimit : 128 MB); 2134 DISPLAY(" --threads=%d", prefs->nbWorkers); 2135 DISPLAY("%s", prefs->excludeCompressedFiles ? " --exclude-compressed" : ""); 2136 DISPLAY(" --%scontent-size", prefs->contentSize ? "" : "no-"); 2137 DISPLAY("\n"); 2138 } 2139 2140 #undef INDEX 2141 2142 int FIO_compressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* dstFileName, 2143 const char* srcFileName, const char* dictFileName, 2144 int compressionLevel, ZSTD_compressionParameters comprParams) 2145 { 2146 cRess_t ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams); 2147 int const result = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); 2148 2149 #define DISPLAY_LEVEL_DEFAULT 2 2150 2151 FIO_freeCResources(&ress); 2152 return result; 2153 } 2154 2155 /* FIO_determineCompressedName() : 2156 * create a destination filename for compressed srcFileName. 2157 * @return a pointer to it. 2158 * This function never returns an error (it may abort() in case of pb) 2159 */ 2160 static const char* 2161 FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix) 2162 { 2163 static size_t dfnbCapacity = 0; 2164 static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ 2165 char* outDirFilename = NULL; 2166 size_t sfnSize = strlen(srcFileName); 2167 size_t const srcSuffixLen = strlen(suffix); 2168 2169 if(!strcmp(srcFileName, stdinmark)) { 2170 return stdoutmark; 2171 } 2172 2173 if (outDirName) { 2174 outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen); 2175 sfnSize = strlen(outDirFilename); 2176 assert(outDirFilename != NULL); 2177 } 2178 2179 if (dfnbCapacity <= sfnSize+srcSuffixLen+1) { 2180 /* resize buffer for dstName */ 2181 free(dstFileNameBuffer); 2182 dfnbCapacity = sfnSize + srcSuffixLen + 30; 2183 dstFileNameBuffer = (char*)malloc(dfnbCapacity); 2184 if (!dstFileNameBuffer) { 2185 EXM_THROW(30, "zstd: %s", strerror(errno)); 2186 } 2187 } 2188 assert(dstFileNameBuffer != NULL); 2189 2190 if (outDirFilename) { 2191 memcpy(dstFileNameBuffer, outDirFilename, sfnSize); 2192 free(outDirFilename); 2193 } else { 2194 memcpy(dstFileNameBuffer, srcFileName, sfnSize); 2195 } 2196 memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */); 2197 return dstFileNameBuffer; 2198 } 2199 2200 static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles) 2201 { 2202 size_t i; 2203 unsigned long long fileSize, maxFileSize = 0; 2204 for (i = 0; i < nbFiles; i++) { 2205 fileSize = UTIL_getFileSize(inFileNames[i]); 2206 maxFileSize = fileSize > maxFileSize ? fileSize : maxFileSize; 2207 } 2208 return maxFileSize; 2209 } 2210 2211 /* FIO_compressMultipleFilenames() : 2212 * compress nbFiles files 2213 * into either one destination (outFileName), 2214 * or into one file each (outFileName == NULL, but suffix != NULL), 2215 * or into a destination folder (specified with -O) 2216 */ 2217 int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, 2218 FIO_prefs_t* const prefs, 2219 const char** inFileNamesTable, 2220 const char* outMirroredRootDirName, 2221 const char* outDirName, 2222 const char* outFileName, const char* suffix, 2223 const char* dictFileName, int compressionLevel, 2224 ZSTD_compressionParameters comprParams) 2225 { 2226 int status; 2227 int error = 0; 2228 cRess_t ress = FIO_createCResources(prefs, dictFileName, 2229 FIO_getLargestFileSize(inFileNamesTable, (unsigned)fCtx->nbFilesTotal), 2230 compressionLevel, comprParams); 2231 2232 /* init */ 2233 assert(outFileName != NULL || suffix != NULL); 2234 if (outFileName != NULL) { /* output into a single destination (stdout typically) */ 2235 FILE *dstFile; 2236 if (FIO_multiFilesConcatWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) { 2237 FIO_freeCResources(&ress); 2238 return 1; 2239 } 2240 dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS); 2241 if (dstFile == NULL) { /* could not open outFileName */ 2242 error = 1; 2243 } else { 2244 AIO_WritePool_setFile(ress.writeCtx, dstFile); 2245 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) { 2246 status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel); 2247 if (!status) fCtx->nbFilesProcessed++; 2248 error |= status; 2249 } 2250 if (AIO_WritePool_closeFile(ress.writeCtx)) 2251 EXM_THROW(29, "Write error (%s) : cannot properly close %s", 2252 strerror(errno), outFileName); 2253 } 2254 } else { 2255 if (outMirroredRootDirName) 2256 UTIL_mirrorSourceFilesDirectories(inFileNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName); 2257 2258 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) { 2259 const char* const srcFileName = inFileNamesTable[fCtx->currFileIdx]; 2260 const char* dstFileName = NULL; 2261 if (outMirroredRootDirName) { 2262 char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName); 2263 if (validMirroredDirName) { 2264 dstFileName = FIO_determineCompressedName(srcFileName, validMirroredDirName, suffix); 2265 free(validMirroredDirName); 2266 } else { 2267 DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot compress '%s' into '%s' \n", srcFileName, outMirroredRootDirName); 2268 error=1; 2269 continue; 2270 } 2271 } else { 2272 dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */ 2273 } 2274 status = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); 2275 if (!status) fCtx->nbFilesProcessed++; 2276 error |= status; 2277 } 2278 2279 if (outDirName) 2280 FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal); 2281 } 2282 2283 if (FIO_shouldDisplayMultipleFileSummary(fCtx)) { 2284 UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesInput); 2285 UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesOutput); 2286 2287 DISPLAY_PROGRESS("\r%79s\r", ""); 2288 if (fCtx->totalBytesInput == 0) { 2289 DISPLAY_SUMMARY("%3d files compressed : (%6.*f%4s => %6.*f%4s)\n", 2290 fCtx->nbFilesProcessed, 2291 hr_isize.precision, hr_isize.value, hr_isize.suffix, 2292 hr_osize.precision, hr_osize.value, hr_osize.suffix); 2293 } else { 2294 DISPLAY_SUMMARY("%3d files compressed : %.2f%% (%6.*f%4s => %6.*f%4s)\n", 2295 fCtx->nbFilesProcessed, 2296 (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100, 2297 hr_isize.precision, hr_isize.value, hr_isize.suffix, 2298 hr_osize.precision, hr_osize.value, hr_osize.suffix); 2299 } 2300 } 2301 2302 FIO_freeCResources(&ress); 2303 return error; 2304 } 2305 2306 #endif /* #ifndef ZSTD_NOCOMPRESS */ 2307 2308 2309 2310 #ifndef ZSTD_NODECOMPRESS 2311 2312 /* ************************************************************************** 2313 * Decompression 2314 ***************************************************************************/ 2315 typedef struct { 2316 FIO_Dict_t dict; 2317 ZSTD_DStream* dctx; 2318 WritePoolCtx_t *writeCtx; 2319 ReadPoolCtx_t *readCtx; 2320 } dRess_t; 2321 2322 static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName) 2323 { 2324 int useMMap = prefs->mmapDict == ZSTD_ps_enable; 2325 int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable; 2326 stat_t statbuf; 2327 dRess_t ress; 2328 memset(&statbuf, 0, sizeof(statbuf)); 2329 memset(&ress, 0, sizeof(ress)); 2330 2331 FIO_getDictFileStat(dictFileName, &statbuf); 2332 2333 if (prefs->patchFromMode){ 2334 U64 const dictSize = UTIL_getFileSizeStat(&statbuf); 2335 useMMap |= dictSize > prefs->memLimit; 2336 FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, 0 /* just use the dict size */); 2337 } 2338 2339 /* Allocation */ 2340 ress.dctx = ZSTD_createDStream(); 2341 if (ress.dctx==NULL) 2342 EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno)); 2343 CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) ); 2344 CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag)); 2345 2346 /* dictionary */ 2347 { 2348 FIO_dictBufferType_t dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict; 2349 FIO_initDict(&ress.dict, dictFileName, prefs, &statbuf, dictBufferType); 2350 2351 CHECK(ZSTD_DCtx_reset(ress.dctx, ZSTD_reset_session_only) ); 2352 2353 if (prefs->patchFromMode){ 2354 CHECK(ZSTD_DCtx_refPrefix(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize)); 2355 } else { 2356 CHECK(ZSTD_DCtx_loadDictionary_byReference(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize)); 2357 } 2358 } 2359 2360 ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_DStreamOutSize()); 2361 ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_DStreamInSize()); 2362 return ress; 2363 } 2364 2365 static void FIO_freeDResources(dRess_t ress) 2366 { 2367 FIO_freeDict(&(ress.dict)); 2368 CHECK( ZSTD_freeDStream(ress.dctx) ); 2369 AIO_WritePool_free(ress.writeCtx); 2370 AIO_ReadPool_free(ress.readCtx); 2371 } 2372 2373 /* FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode 2374 * @return : 0 (no error) */ 2375 static int FIO_passThrough(dRess_t *ress) 2376 { 2377 size_t const blockSize = MIN(MIN(64 KB, ZSTD_DStreamInSize()), ZSTD_DStreamOutSize()); 2378 IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx); 2379 AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); 2380 2381 while(ress->readCtx->srcBufferLoaded) { 2382 size_t writeSize; 2383 writeSize = MIN(blockSize, ress->readCtx->srcBufferLoaded); 2384 assert(writeSize <= writeJob->bufferSize); 2385 memcpy(writeJob->buffer, ress->readCtx->srcBuffer, writeSize); 2386 writeJob->usedBufferSize = writeSize; 2387 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); 2388 AIO_ReadPool_consumeBytes(ress->readCtx, writeSize); 2389 AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); 2390 } 2391 assert(ress->readCtx->reachedEof); 2392 AIO_WritePool_releaseIoJob(writeJob); 2393 AIO_WritePool_sparseWriteEnd(ress->writeCtx); 2394 return 0; 2395 } 2396 2397 /* FIO_zstdErrorHelp() : 2398 * detailed error message when requested window size is too large */ 2399 static void 2400 FIO_zstdErrorHelp(const FIO_prefs_t* const prefs, 2401 const dRess_t* ress, 2402 size_t err, 2403 const char* srcFileName) 2404 { 2405 ZSTD_FrameHeader header; 2406 2407 /* Help message only for one specific error */ 2408 if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge) 2409 return; 2410 2411 /* Try to decode the frame header */ 2412 err = ZSTD_getFrameHeader(&header, ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded); 2413 if (err == 0) { 2414 unsigned long long const windowSize = header.windowSize; 2415 unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0); 2416 assert(prefs->memLimit > 0); 2417 DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u \n", 2418 srcFileName, windowSize, prefs->memLimit); 2419 if (windowLog <= ZSTD_WINDOWLOG_MAX) { 2420 unsigned const windowMB = (unsigned)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0)); 2421 assert(windowSize < (U64)(1ULL << 52)); /* ensure now overflow for windowMB */ 2422 DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB \n", 2423 srcFileName, windowLog, windowMB); 2424 return; 2425 } } 2426 DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported \n", 2427 srcFileName, ZSTD_WINDOWLOG_MAX); 2428 } 2429 2430 /** FIO_decompressFrame() : 2431 * @return : size of decoded zstd frame, or an error code 2432 */ 2433 #define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2)) 2434 static unsigned long long 2435 FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, 2436 const FIO_prefs_t* const prefs, 2437 const char* srcFileName, 2438 U64 alreadyDecoded) /* for multi-frames streams */ 2439 { 2440 U64 frameSize = 0; 2441 const char* srcFName20 = srcFileName; 2442 IOJob_t* writeJob = AIO_WritePool_acquireJob(ress->writeCtx); 2443 assert(writeJob); 2444 2445 /* display last 20 characters only when not --verbose */ 2446 { size_t const srcFileLength = strlen(srcFileName); 2447 if ((srcFileLength>20) && (g_display_prefs.displayLevel<3)) 2448 srcFName20 += srcFileLength-20; 2449 } 2450 2451 ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only); 2452 2453 /* Header loading : ensures ZSTD_getFrameHeader() will succeed */ 2454 AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_FRAMEHEADERSIZE_MAX); 2455 2456 /* Main decompression Loop */ 2457 while (1) { 2458 ZSTD_inBuffer inBuff = setInBuffer( ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded, 0 ); 2459 ZSTD_outBuffer outBuff= setOutBuffer( writeJob->buffer, writeJob->bufferSize, 0 ); 2460 size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff); 2461 UTIL_HumanReadableSize_t const hrs = UTIL_makeHumanReadableSize(alreadyDecoded+frameSize); 2462 if (ZSTD_isError(readSizeHint)) { 2463 DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n", 2464 srcFileName, ZSTD_getErrorName(readSizeHint)); 2465 FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName); 2466 AIO_WritePool_releaseIoJob(writeJob); 2467 return FIO_ERROR_FRAME_DECODING; 2468 } 2469 2470 /* Write block */ 2471 writeJob->usedBufferSize = outBuff.pos; 2472 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); 2473 frameSize += outBuff.pos; 2474 if (fCtx->nbFilesTotal > 1) { 2475 DISPLAYUPDATE_PROGRESS( 2476 "\rDecompress: %2u/%2u files. Current: %s : %.*f%s... ", 2477 fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFName20, hrs.precision, hrs.value, hrs.suffix); 2478 } else { 2479 DISPLAYUPDATE_PROGRESS("\r%-20.20s : %.*f%s... ", 2480 srcFName20, hrs.precision, hrs.value, hrs.suffix); 2481 } 2482 2483 AIO_ReadPool_consumeBytes(ress->readCtx, inBuff.pos); 2484 2485 if (readSizeHint == 0) break; /* end of frame */ 2486 2487 /* Fill input buffer */ 2488 { size_t const toDecode = MIN(readSizeHint, ZSTD_DStreamInSize()); /* support large skippable frames */ 2489 if (ress->readCtx->srcBufferLoaded < toDecode) { 2490 size_t const readSize = AIO_ReadPool_fillBuffer(ress->readCtx, toDecode); 2491 if (readSize==0) { 2492 DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n", 2493 srcFileName); 2494 AIO_WritePool_releaseIoJob(writeJob); 2495 return FIO_ERROR_FRAME_DECODING; 2496 } 2497 } } } 2498 2499 AIO_WritePool_releaseIoJob(writeJob); 2500 AIO_WritePool_sparseWriteEnd(ress->writeCtx); 2501 2502 return frameSize; 2503 } 2504 2505 2506 #ifdef ZSTD_GZDECOMPRESS 2507 static unsigned long long 2508 FIO_decompressGzFrame(dRess_t* ress, const char* srcFileName) 2509 { 2510 unsigned long long outFileSize = 0; 2511 z_stream strm; 2512 int flush = Z_NO_FLUSH; 2513 int decodingError = 0; 2514 IOJob_t *writeJob = NULL; 2515 2516 strm.zalloc = Z_NULL; 2517 strm.zfree = Z_NULL; 2518 strm.opaque = Z_NULL; 2519 strm.next_in = 0; 2520 strm.avail_in = 0; 2521 /* see https://www.zlib.net/manual.html */ 2522 if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK) 2523 return FIO_ERROR_FRAME_DECODING; 2524 2525 writeJob = AIO_WritePool_acquireJob(ress->writeCtx); 2526 strm.next_out = (Bytef*)writeJob->buffer; 2527 strm.avail_out = (uInt)writeJob->bufferSize; 2528 strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded; 2529 strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer; 2530 2531 for ( ; ; ) { 2532 int ret; 2533 if (strm.avail_in == 0) { 2534 AIO_ReadPool_consumeAndRefill(ress->readCtx); 2535 if (ress->readCtx->srcBufferLoaded == 0) flush = Z_FINISH; 2536 strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer; 2537 strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded; 2538 } 2539 ret = inflate(&strm, flush); 2540 if (ret == Z_BUF_ERROR) { 2541 DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName); 2542 decodingError = 1; break; 2543 } 2544 if (ret != Z_OK && ret != Z_STREAM_END) { 2545 DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret); 2546 decodingError = 1; break; 2547 } 2548 { size_t const decompBytes = writeJob->bufferSize - strm.avail_out; 2549 if (decompBytes) { 2550 writeJob->usedBufferSize = decompBytes; 2551 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); 2552 outFileSize += decompBytes; 2553 strm.next_out = (Bytef*)writeJob->buffer; 2554 strm.avail_out = (uInt)writeJob->bufferSize; 2555 } 2556 } 2557 if (ret == Z_STREAM_END) break; 2558 } 2559 2560 AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in); 2561 2562 if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */ 2563 && (decodingError==0) ) { 2564 DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName); 2565 decodingError = 1; 2566 } 2567 AIO_WritePool_releaseIoJob(writeJob); 2568 AIO_WritePool_sparseWriteEnd(ress->writeCtx); 2569 return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; 2570 } 2571 #endif 2572 2573 #ifdef ZSTD_LZMADECOMPRESS 2574 static unsigned long long 2575 FIO_decompressLzmaFrame(dRess_t* ress, 2576 const char* srcFileName, int plain_lzma) 2577 { 2578 unsigned long long outFileSize = 0; 2579 lzma_stream strm = LZMA_STREAM_INIT; 2580 lzma_action action = LZMA_RUN; 2581 lzma_ret initRet; 2582 int decodingError = 0; 2583 IOJob_t *writeJob = NULL; 2584 2585 strm.next_in = 0; 2586 strm.avail_in = 0; 2587 if (plain_lzma) { 2588 initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */ 2589 } else { 2590 initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */ 2591 } 2592 2593 if (initRet != LZMA_OK) { 2594 DISPLAYLEVEL(1, "zstd: %s: %s error %d \n", 2595 plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder", 2596 srcFileName, initRet); 2597 return FIO_ERROR_FRAME_DECODING; 2598 } 2599 2600 writeJob = AIO_WritePool_acquireJob(ress->writeCtx); 2601 strm.next_out = (BYTE*)writeJob->buffer; 2602 strm.avail_out = writeJob->bufferSize; 2603 strm.next_in = (BYTE const*)ress->readCtx->srcBuffer; 2604 strm.avail_in = ress->readCtx->srcBufferLoaded; 2605 2606 for ( ; ; ) { 2607 lzma_ret ret; 2608 if (strm.avail_in == 0) { 2609 AIO_ReadPool_consumeAndRefill(ress->readCtx); 2610 if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH; 2611 strm.next_in = (BYTE const*)ress->readCtx->srcBuffer; 2612 strm.avail_in = ress->readCtx->srcBufferLoaded; 2613 } 2614 ret = lzma_code(&strm, action); 2615 2616 if (ret == LZMA_BUF_ERROR) { 2617 DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName); 2618 decodingError = 1; break; 2619 } 2620 if (ret != LZMA_OK && ret != LZMA_STREAM_END) { 2621 DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n", 2622 srcFileName, ret); 2623 decodingError = 1; break; 2624 } 2625 { size_t const decompBytes = writeJob->bufferSize - strm.avail_out; 2626 if (decompBytes) { 2627 writeJob->usedBufferSize = decompBytes; 2628 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); 2629 outFileSize += decompBytes; 2630 strm.next_out = (BYTE*)writeJob->buffer; 2631 strm.avail_out = writeJob->bufferSize; 2632 } } 2633 if (ret == LZMA_STREAM_END) break; 2634 } 2635 2636 AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in); 2637 lzma_end(&strm); 2638 AIO_WritePool_releaseIoJob(writeJob); 2639 AIO_WritePool_sparseWriteEnd(ress->writeCtx); 2640 return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; 2641 } 2642 #endif 2643 2644 #ifdef ZSTD_LZ4DECOMPRESS 2645 static unsigned long long 2646 FIO_decompressLz4Frame(dRess_t* ress, const char* srcFileName) 2647 { 2648 unsigned long long filesize = 0; 2649 LZ4F_errorCode_t nextToLoad = 4; 2650 LZ4F_decompressionContext_t dCtx; 2651 LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION); 2652 int decodingError = 0; 2653 IOJob_t *writeJob = NULL; 2654 2655 if (LZ4F_isError(errorCode)) { 2656 DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n"); 2657 return FIO_ERROR_FRAME_DECODING; 2658 } 2659 2660 writeJob = AIO_WritePool_acquireJob(ress->writeCtx); 2661 2662 /* Main Loop */ 2663 for (;nextToLoad;) { 2664 size_t pos = 0; 2665 size_t decodedBytes = writeJob->bufferSize; 2666 int fullBufferDecoded = 0; 2667 2668 /* Read input */ 2669 AIO_ReadPool_fillBuffer(ress->readCtx, nextToLoad); 2670 if(!ress->readCtx->srcBufferLoaded) break; /* reached end of file */ 2671 2672 while ((pos < ress->readCtx->srcBufferLoaded) || fullBufferDecoded) { /* still to read, or still to flush */ 2673 /* Decode Input (at least partially) */ 2674 size_t remaining = ress->readCtx->srcBufferLoaded - pos; 2675 decodedBytes = writeJob->bufferSize; 2676 nextToLoad = LZ4F_decompress(dCtx, writeJob->buffer, &decodedBytes, (char*)(ress->readCtx->srcBuffer)+pos, 2677 &remaining, NULL); 2678 if (LZ4F_isError(nextToLoad)) { 2679 DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n", 2680 srcFileName, LZ4F_getErrorName(nextToLoad)); 2681 decodingError = 1; nextToLoad = 0; break; 2682 } 2683 pos += remaining; 2684 assert(pos <= ress->readCtx->srcBufferLoaded); 2685 fullBufferDecoded = decodedBytes == writeJob->bufferSize; 2686 2687 /* Write Block */ 2688 if (decodedBytes) { 2689 UTIL_HumanReadableSize_t hrs; 2690 writeJob->usedBufferSize = decodedBytes; 2691 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); 2692 filesize += decodedBytes; 2693 hrs = UTIL_makeHumanReadableSize(filesize); 2694 DISPLAYUPDATE_PROGRESS("\rDecompressed : %.*f%s ", hrs.precision, hrs.value, hrs.suffix); 2695 } 2696 2697 if (!nextToLoad) break; 2698 } 2699 AIO_ReadPool_consumeBytes(ress->readCtx, pos); 2700 } 2701 if (nextToLoad!=0) { 2702 DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName); 2703 decodingError=1; 2704 } 2705 2706 LZ4F_freeDecompressionContext(dCtx); 2707 AIO_WritePool_releaseIoJob(writeJob); 2708 AIO_WritePool_sparseWriteEnd(ress->writeCtx); 2709 2710 return decodingError ? FIO_ERROR_FRAME_DECODING : filesize; 2711 } 2712 #endif 2713 2714 2715 2716 /** FIO_decompressFrames() : 2717 * Find and decode frames inside srcFile 2718 * srcFile presumed opened and valid 2719 * @return : 0 : OK 2720 * 1 : error 2721 */ 2722 static int FIO_decompressFrames(FIO_ctx_t* const fCtx, 2723 dRess_t ress, const FIO_prefs_t* const prefs, 2724 const char* dstFileName, const char* srcFileName) 2725 { 2726 unsigned readSomething = 0; 2727 unsigned long long filesize = 0; 2728 int passThrough = prefs->passThrough; 2729 2730 if (passThrough == -1) { 2731 /* If pass-through mode is not explicitly enabled or disabled, 2732 * default to the legacy behavior of enabling it if we are writing 2733 * to stdout with the overwrite flag enabled. 2734 */ 2735 passThrough = prefs->overwrite && !strcmp(dstFileName, stdoutmark); 2736 } 2737 assert(passThrough == 0 || passThrough == 1); 2738 2739 /* for each frame */ 2740 for ( ; ; ) { 2741 /* check magic number -> version */ 2742 size_t const toRead = 4; 2743 const BYTE* buf; 2744 AIO_ReadPool_fillBuffer(ress.readCtx, toRead); 2745 buf = (const BYTE*)ress.readCtx->srcBuffer; 2746 if (ress.readCtx->srcBufferLoaded==0) { 2747 if (readSomething==0) { /* srcFile is empty (which is invalid) */ 2748 DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName); 2749 return 1; 2750 } /* else, just reached frame boundary */ 2751 break; /* no more input */ 2752 } 2753 readSomething = 1; /* there is at least 1 byte in srcFile */ 2754 if (ress.readCtx->srcBufferLoaded < toRead) { /* not enough input to check magic number */ 2755 if (passThrough) { 2756 return FIO_passThrough(&ress); 2757 } 2758 DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName); 2759 return 1; 2760 } 2761 if (ZSTD_isFrame(buf, ress.readCtx->srcBufferLoaded)) { 2762 unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, prefs, srcFileName, filesize); 2763 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 2764 filesize += frameSize; 2765 } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */ 2766 #ifdef ZSTD_GZDECOMPRESS 2767 unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFileName); 2768 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 2769 filesize += frameSize; 2770 #else 2771 DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName); 2772 return 1; 2773 #endif 2774 } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */ 2775 || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */ 2776 #ifdef ZSTD_LZMADECOMPRESS 2777 unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFileName, buf[0] != 0xFD); 2778 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 2779 filesize += frameSize; 2780 #else 2781 DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName); 2782 return 1; 2783 #endif 2784 } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) { 2785 #ifdef ZSTD_LZ4DECOMPRESS 2786 unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFileName); 2787 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 2788 filesize += frameSize; 2789 #else 2790 DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName); 2791 return 1; 2792 #endif 2793 } else if (passThrough) { 2794 return FIO_passThrough(&ress); 2795 } else { 2796 DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName); 2797 return 1; 2798 } } /* for each frame */ 2799 2800 /* Final Status */ 2801 fCtx->totalBytesOutput += (size_t)filesize; 2802 DISPLAY_PROGRESS("\r%79s\r", ""); 2803 if (FIO_shouldDisplayFileSummary(fCtx)) 2804 DISPLAY_SUMMARY("%-20s: %llu bytes \n", srcFileName, filesize); 2805 2806 return 0; 2807 } 2808 2809 /** FIO_decompressDstFile() : 2810 open `dstFileName`, or pass-through if writeCtx's file is already != 0, 2811 then start decompression process (FIO_decompressFrames()). 2812 @return : 0 : OK 2813 1 : operation aborted 2814 */ 2815 static int FIO_decompressDstFile(FIO_ctx_t* const fCtx, 2816 FIO_prefs_t* const prefs, 2817 dRess_t ress, 2818 const char* dstFileName, 2819 const char* srcFileName, 2820 const stat_t* srcFileStat) 2821 { 2822 int result; 2823 int releaseDstFile = 0; 2824 int transferStat = 0; 2825 int dstFd = 0; 2826 2827 if ((AIO_WritePool_getFile(ress.writeCtx) == NULL) && (prefs->testMode == 0)) { 2828 FILE *dstFile; 2829 int dstFilePermissions = DEFAULT_FILE_PERMISSIONS; 2830 if ( strcmp(srcFileName, stdinmark) /* special case : don't transfer permissions from stdin */ 2831 && strcmp(dstFileName, stdoutmark) 2832 && UTIL_isRegularFileStat(srcFileStat) ) { 2833 transferStat = 1; 2834 dstFilePermissions = TEMPORARY_FILE_PERMISSIONS; 2835 } 2836 2837 releaseDstFile = 1; 2838 2839 dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions); 2840 if (dstFile==NULL) return 1; 2841 dstFd = fileno(dstFile); 2842 AIO_WritePool_setFile(ress.writeCtx, dstFile); 2843 2844 /* Must only be added after FIO_openDstFile() succeeds. 2845 * Otherwise we may delete the destination file if it already exists, 2846 * and the user presses Ctrl-C when asked if they wish to overwrite. 2847 */ 2848 addHandler(dstFileName); 2849 } 2850 2851 result = FIO_decompressFrames(fCtx, ress, prefs, dstFileName, srcFileName); 2852 2853 if (releaseDstFile) { 2854 clearHandler(); 2855 2856 if (transferStat) { 2857 UTIL_setFDStat(dstFd, dstFileName, srcFileStat); 2858 } 2859 2860 if (AIO_WritePool_closeFile(ress.writeCtx)) { 2861 DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); 2862 result = 1; 2863 } 2864 2865 if (transferStat) { 2866 UTIL_utime(dstFileName, srcFileStat); 2867 } 2868 2869 if ( (result != 0) /* operation failure */ 2870 && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */ 2871 ) { 2872 FIO_removeFile(dstFileName); /* remove decompression artefact; note: don't do anything special if remove() fails */ 2873 } 2874 } 2875 2876 return result; 2877 } 2878 2879 2880 /** FIO_decompressSrcFile() : 2881 Open `srcFileName`, transfer control to decompressDstFile() 2882 @return : 0 : OK 2883 1 : error 2884 */ 2885 static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName) 2886 { 2887 FILE* srcFile; 2888 stat_t srcFileStat; 2889 int result; 2890 U64 fileSize = UTIL_FILESIZE_UNKNOWN; 2891 2892 if (UTIL_isDirectory(srcFileName)) { 2893 DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); 2894 return 1; 2895 } 2896 2897 srcFile = FIO_openSrcFile(prefs, srcFileName, &srcFileStat); 2898 if (srcFile==NULL) return 1; 2899 2900 /* Don't use AsyncIO for small files */ 2901 if (strcmp(srcFileName, stdinmark)) /* Stdin doesn't have stats */ 2902 fileSize = UTIL_getFileSizeStat(&srcFileStat); 2903 if(fileSize != UTIL_FILESIZE_UNKNOWN && fileSize < ZSTD_BLOCKSIZE_MAX * 3) { 2904 AIO_ReadPool_setAsync(ress.readCtx, 0); 2905 AIO_WritePool_setAsync(ress.writeCtx, 0); 2906 } else { 2907 AIO_ReadPool_setAsync(ress.readCtx, 1); 2908 AIO_WritePool_setAsync(ress.writeCtx, 1); 2909 } 2910 2911 AIO_ReadPool_setFile(ress.readCtx, srcFile); 2912 2913 result = FIO_decompressDstFile(fCtx, prefs, ress, dstFileName, srcFileName, &srcFileStat); 2914 2915 AIO_ReadPool_setFile(ress.readCtx, NULL); 2916 2917 /* Close file */ 2918 if (fclose(srcFile)) { 2919 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); /* error should not happen */ 2920 return 1; 2921 } 2922 if ( prefs->removeSrcFile /* --rm */ 2923 && (result==0) /* decompression successful */ 2924 && strcmp(srcFileName, stdinmark) ) /* not stdin */ { 2925 /* We must clear the handler, since after this point calling it would 2926 * delete both the source and destination files. 2927 */ 2928 clearHandler(); 2929 if (FIO_removeFile(srcFileName)) { 2930 /* failed to remove src file */ 2931 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); 2932 return 1; 2933 } } 2934 return result; 2935 } 2936 2937 2938 2939 int FIO_decompressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, 2940 const char* dstFileName, const char* srcFileName, 2941 const char* dictFileName) 2942 { 2943 dRess_t const ress = FIO_createDResources(prefs, dictFileName); 2944 2945 int const decodingError = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName); 2946 2947 2948 2949 FIO_freeDResources(ress); 2950 return decodingError; 2951 } 2952 2953 static const char *suffixList[] = { 2954 ZSTD_EXTENSION, 2955 TZSTD_EXTENSION, 2956 #ifndef ZSTD_NODECOMPRESS 2957 ZSTD_ALT_EXTENSION, 2958 #endif 2959 #ifdef ZSTD_GZDECOMPRESS 2960 GZ_EXTENSION, 2961 TGZ_EXTENSION, 2962 #endif 2963 #ifdef ZSTD_LZMADECOMPRESS 2964 LZMA_EXTENSION, 2965 XZ_EXTENSION, 2966 TXZ_EXTENSION, 2967 #endif 2968 #ifdef ZSTD_LZ4DECOMPRESS 2969 LZ4_EXTENSION, 2970 TLZ4_EXTENSION, 2971 #endif 2972 NULL 2973 }; 2974 2975 static const char *suffixListStr = 2976 ZSTD_EXTENSION "/" TZSTD_EXTENSION 2977 #ifdef ZSTD_GZDECOMPRESS 2978 "/" GZ_EXTENSION "/" TGZ_EXTENSION 2979 #endif 2980 #ifdef ZSTD_LZMADECOMPRESS 2981 "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION 2982 #endif 2983 #ifdef ZSTD_LZ4DECOMPRESS 2984 "/" LZ4_EXTENSION "/" TLZ4_EXTENSION 2985 #endif 2986 ; 2987 2988 /* FIO_determineDstName() : 2989 * create a destination filename from a srcFileName. 2990 * @return a pointer to it. 2991 * @return == NULL if there is an error */ 2992 static const char* 2993 FIO_determineDstName(const char* srcFileName, const char* outDirName) 2994 { 2995 static size_t dfnbCapacity = 0; 2996 static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ 2997 size_t dstFileNameEndPos; 2998 char* outDirFilename = NULL; 2999 const char* dstSuffix = ""; 3000 size_t dstSuffixLen = 0; 3001 3002 size_t sfnSize = strlen(srcFileName); 3003 3004 size_t srcSuffixLen; 3005 const char* const srcSuffix = strrchr(srcFileName, '.'); 3006 3007 if(!strcmp(srcFileName, stdinmark)) { 3008 return stdoutmark; 3009 } 3010 3011 if (srcSuffix == NULL) { 3012 DISPLAYLEVEL(1, 3013 "zstd: %s: unknown suffix (%s expected). " 3014 "Can't derive the output file name. " 3015 "Specify it with -o dstFileName. Ignoring.\n", 3016 srcFileName, suffixListStr); 3017 return NULL; 3018 } 3019 srcSuffixLen = strlen(srcSuffix); 3020 3021 { 3022 const char** matchedSuffixPtr; 3023 for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) { 3024 if (!strcmp(*matchedSuffixPtr, srcSuffix)) { 3025 break; 3026 } 3027 } 3028 3029 /* check suffix is authorized */ 3030 if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) { 3031 DISPLAYLEVEL(1, 3032 "zstd: %s: unknown suffix (%s expected). " 3033 "Can't derive the output file name. " 3034 "Specify it with -o dstFileName. Ignoring.\n", 3035 srcFileName, suffixListStr); 3036 return NULL; 3037 } 3038 3039 if ((*matchedSuffixPtr)[1] == 't') { 3040 dstSuffix = ".tar"; 3041 dstSuffixLen = strlen(dstSuffix); 3042 } 3043 } 3044 3045 if (outDirName) { 3046 outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0); 3047 sfnSize = strlen(outDirFilename); 3048 assert(outDirFilename != NULL); 3049 } 3050 3051 if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) { 3052 /* allocate enough space to write dstFilename into it */ 3053 free(dstFileNameBuffer); 3054 dfnbCapacity = sfnSize + 20; 3055 dstFileNameBuffer = (char*)malloc(dfnbCapacity); 3056 if (dstFileNameBuffer==NULL) 3057 EXM_THROW(74, "%s : not enough memory for dstFileName", 3058 strerror(errno)); 3059 } 3060 3061 /* return dst name == src name truncated from suffix */ 3062 assert(dstFileNameBuffer != NULL); 3063 dstFileNameEndPos = sfnSize - srcSuffixLen; 3064 if (outDirFilename) { 3065 memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos); 3066 free(outDirFilename); 3067 } else { 3068 memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos); 3069 } 3070 3071 /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar" 3072 * extension on decompression. Also writes terminating null. */ 3073 strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix); 3074 return dstFileNameBuffer; 3075 3076 /* note : dstFileNameBuffer memory is not going to be free */ 3077 } 3078 3079 int 3080 FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx, 3081 FIO_prefs_t* const prefs, 3082 const char** srcNamesTable, 3083 const char* outMirroredRootDirName, 3084 const char* outDirName, const char* outFileName, 3085 const char* dictFileName) 3086 { 3087 int status; 3088 int error = 0; 3089 dRess_t ress = FIO_createDResources(prefs, dictFileName); 3090 3091 if (outFileName) { 3092 if (FIO_multiFilesConcatWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) { 3093 FIO_freeDResources(ress); 3094 return 1; 3095 } 3096 if (!prefs->testMode) { 3097 FILE* dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS); 3098 if (dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName); 3099 AIO_WritePool_setFile(ress.writeCtx, dstFile); 3100 } 3101 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { 3102 status = FIO_decompressSrcFile(fCtx, prefs, ress, outFileName, srcNamesTable[fCtx->currFileIdx]); 3103 if (!status) fCtx->nbFilesProcessed++; 3104 error |= status; 3105 } 3106 if ((!prefs->testMode) && (AIO_WritePool_closeFile(ress.writeCtx))) 3107 EXM_THROW(72, "Write error : %s : cannot properly close output file", 3108 strerror(errno)); 3109 } else { 3110 if (outMirroredRootDirName) 3111 UTIL_mirrorSourceFilesDirectories(srcNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName); 3112 3113 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { /* create dstFileName */ 3114 const char* const srcFileName = srcNamesTable[fCtx->currFileIdx]; 3115 const char* dstFileName = NULL; 3116 if (outMirroredRootDirName) { 3117 char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName); 3118 if (validMirroredDirName) { 3119 dstFileName = FIO_determineDstName(srcFileName, validMirroredDirName); 3120 free(validMirroredDirName); 3121 } else { 3122 DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot decompress '%s' into '%s'\n", srcFileName, outMirroredRootDirName); 3123 } 3124 } else { 3125 dstFileName = FIO_determineDstName(srcFileName, outDirName); 3126 } 3127 if (dstFileName == NULL) { error=1; continue; } 3128 status = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName); 3129 if (!status) fCtx->nbFilesProcessed++; 3130 error |= status; 3131 } 3132 if (outDirName) 3133 FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal); 3134 } 3135 3136 if (FIO_shouldDisplayMultipleFileSummary(fCtx)) { 3137 DISPLAY_PROGRESS("\r%79s\r", ""); 3138 DISPLAY_SUMMARY("%d files decompressed : %6llu bytes total \n", 3139 fCtx->nbFilesProcessed, (unsigned long long)fCtx->totalBytesOutput); 3140 } 3141 3142 FIO_freeDResources(ress); 3143 return error; 3144 } 3145 3146 /* ************************************************************************** 3147 * .zst file info (--list command) 3148 ***************************************************************************/ 3149 3150 typedef struct { 3151 U64 decompressedSize; 3152 U64 compressedSize; 3153 U64 windowSize; 3154 int numActualFrames; 3155 int numSkippableFrames; 3156 int decompUnavailable; 3157 int usesCheck; 3158 BYTE checksum[4]; 3159 U32 nbFiles; 3160 unsigned dictID; 3161 } fileInfo_t; 3162 3163 typedef enum { 3164 info_success=0, 3165 info_frame_error=1, 3166 info_not_zstd=2, 3167 info_file_error=3, 3168 info_truncated_input=4 3169 } InfoError; 3170 3171 #define ERROR_IF(c,n,...) { \ 3172 if (c) { \ 3173 DISPLAYLEVEL(1, __VA_ARGS__); \ 3174 DISPLAYLEVEL(1, " \n"); \ 3175 return n; \ 3176 } \ 3177 } 3178 3179 static InfoError 3180 FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile) 3181 { 3182 /* begin analyzing frame */ 3183 for ( ; ; ) { 3184 BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; 3185 size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile); 3186 if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN(ZSTD_f_zstd1)) { 3187 if ( feof(srcFile) 3188 && (numBytesRead == 0) 3189 && (info->compressedSize > 0) 3190 && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) { 3191 unsigned long long file_position = (unsigned long long) LONG_TELL(srcFile); 3192 unsigned long long file_size = (unsigned long long) info->compressedSize; 3193 ERROR_IF(file_position != file_size, info_truncated_input, 3194 "Error: seeked to position %llu, which is beyond file size of %llu\n", 3195 file_position, 3196 file_size); 3197 break; /* correct end of file => success */ 3198 } 3199 ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame"); 3200 ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames"); 3201 } 3202 { U32 const magicNumber = MEM_readLE32(headerBuffer); 3203 /* Zstandard frame */ 3204 if (magicNumber == ZSTD_MAGICNUMBER) { 3205 ZSTD_FrameHeader header; 3206 U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead); 3207 if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR 3208 || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) { 3209 info->decompUnavailable = 1; 3210 } else { 3211 info->decompressedSize += frameContentSize; 3212 } 3213 ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0, 3214 info_frame_error, "Error: could not decode frame header"); 3215 if (info->dictID != 0 && info->dictID != header.dictID) { 3216 DISPLAY("WARNING: File contains multiple frames with different dictionary IDs. Showing dictID 0 instead"); 3217 info->dictID = 0; 3218 } else { 3219 info->dictID = header.dictID; 3220 } 3221 info->windowSize = header.windowSize; 3222 /* move to the end of the frame header */ 3223 { size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead); 3224 ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size"); 3225 ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0, 3226 info_frame_error, "Error: could not move to end of frame header"); 3227 } 3228 3229 /* skip all blocks in the frame */ 3230 { int lastBlock = 0; 3231 do { 3232 BYTE blockHeaderBuffer[3]; 3233 ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3, 3234 info_frame_error, "Error while reading block header"); 3235 { U32 const blockHeader = MEM_readLE24(blockHeaderBuffer); 3236 U32 const blockTypeID = (blockHeader >> 1) & 3; 3237 U32 const isRLE = (blockTypeID == 1); 3238 U32 const isWrongBlock = (blockTypeID == 3); 3239 long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3); 3240 ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type"); 3241 lastBlock = blockHeader & 1; 3242 ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0, 3243 info_frame_error, "Error: could not skip to end of block"); 3244 } 3245 } while (lastBlock != 1); 3246 } 3247 3248 /* check if checksum is used */ 3249 { BYTE const frameHeaderDescriptor = headerBuffer[4]; 3250 int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2; 3251 if (contentChecksumFlag) { 3252 info->usesCheck = 1; 3253 ERROR_IF(fread(info->checksum, 1, 4, srcFile) != 4, 3254 info_frame_error, "Error: could not read checksum"); 3255 } } 3256 info->numActualFrames++; 3257 } 3258 /* Skippable frame */ 3259 else if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { 3260 U32 const frameSize = MEM_readLE32(headerBuffer + 4); 3261 long const seek = (long)(8 + frameSize - numBytesRead); 3262 ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0, 3263 info_frame_error, "Error: could not find end of skippable frame"); 3264 info->numSkippableFrames++; 3265 } 3266 /* unknown content */ 3267 else { 3268 return info_not_zstd; 3269 } 3270 } /* magic number analysis */ 3271 } /* end analyzing frames */ 3272 return info_success; 3273 } 3274 3275 3276 static InfoError 3277 getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName) 3278 { 3279 InfoError status; 3280 stat_t srcFileStat; 3281 FILE* const srcFile = FIO_openSrcFile(NULL, inFileName, &srcFileStat); 3282 ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName); 3283 3284 info->compressedSize = UTIL_getFileSizeStat(&srcFileStat); 3285 status = FIO_analyzeFrames(info, srcFile); 3286 3287 fclose(srcFile); 3288 info->nbFiles = 1; 3289 return status; 3290 } 3291 3292 3293 /** getFileInfo() : 3294 * Reads information from file, stores in *info 3295 * @return : InfoError status 3296 */ 3297 static InfoError 3298 getFileInfo(fileInfo_t* info, const char* srcFileName) 3299 { 3300 ERROR_IF(!UTIL_isRegularFile(srcFileName), 3301 info_file_error, "Error : %s is not a file", srcFileName); 3302 return getFileInfo_fileConfirmed(info, srcFileName); 3303 } 3304 3305 3306 static void 3307 displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel) 3308 { 3309 UTIL_HumanReadableSize_t const window_hrs = UTIL_makeHumanReadableSize(info->windowSize); 3310 UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(info->compressedSize); 3311 UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(info->decompressedSize); 3312 double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/(double)info->compressedSize; 3313 const char* const checkString = (info->usesCheck ? "XXH64" : "None"); 3314 if (displayLevel <= 2) { 3315 if (!info->decompUnavailable) { 3316 DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %s\n", 3317 info->numSkippableFrames + info->numActualFrames, 3318 info->numSkippableFrames, 3319 compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix, 3320 decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix, 3321 ratio, checkString, inFileName); 3322 } else { 3323 DISPLAYOUT("%6d %5d %6.*f%4s %5s %s\n", 3324 info->numSkippableFrames + info->numActualFrames, 3325 info->numSkippableFrames, 3326 compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix, 3327 checkString, inFileName); 3328 } 3329 } else { 3330 DISPLAYOUT("%s \n", inFileName); 3331 DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames); 3332 if (info->numSkippableFrames) 3333 DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames); 3334 DISPLAYOUT("DictID: %u\n", info->dictID); 3335 DISPLAYOUT("Window Size: %.*f%s (%llu B)\n", 3336 window_hrs.precision, window_hrs.value, window_hrs.suffix, 3337 (unsigned long long)info->windowSize); 3338 DISPLAYOUT("Compressed Size: %.*f%s (%llu B)\n", 3339 compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix, 3340 (unsigned long long)info->compressedSize); 3341 if (!info->decompUnavailable) { 3342 DISPLAYOUT("Decompressed Size: %.*f%s (%llu B)\n", 3343 decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix, 3344 (unsigned long long)info->decompressedSize); 3345 DISPLAYOUT("Ratio: %.4f\n", ratio); 3346 } 3347 3348 if (info->usesCheck && info->numActualFrames == 1) { 3349 DISPLAYOUT("Check: %s %02x%02x%02x%02x\n", checkString, 3350 info->checksum[3], info->checksum[2], 3351 info->checksum[1], info->checksum[0] 3352 ); 3353 } else { 3354 DISPLAYOUT("Check: %s\n", checkString); 3355 } 3356 3357 DISPLAYOUT("\n"); 3358 } 3359 } 3360 3361 static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2) 3362 { 3363 fileInfo_t total; 3364 memset(&total, 0, sizeof(total)); 3365 total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames; 3366 total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames; 3367 total.compressedSize = fi1.compressedSize + fi2.compressedSize; 3368 total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize; 3369 total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable; 3370 total.usesCheck = fi1.usesCheck & fi2.usesCheck; 3371 total.nbFiles = fi1.nbFiles + fi2.nbFiles; 3372 return total; 3373 } 3374 3375 static int 3376 FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel) 3377 { 3378 fileInfo_t info; 3379 memset(&info, 0, sizeof(info)); 3380 { InfoError const error = getFileInfo(&info, inFileName); 3381 switch (error) { 3382 case info_frame_error: 3383 /* display error, but provide output */ 3384 DISPLAYLEVEL(1, "Error while parsing \"%s\" \n", inFileName); 3385 break; 3386 case info_not_zstd: 3387 DISPLAYOUT("File \"%s\" not compressed by zstd \n", inFileName); 3388 if (displayLevel > 2) DISPLAYOUT("\n"); 3389 return 1; 3390 case info_file_error: 3391 /* error occurred while opening the file */ 3392 if (displayLevel > 2) DISPLAYOUT("\n"); 3393 return 1; 3394 case info_truncated_input: 3395 DISPLAYOUT("File \"%s\" is truncated \n", inFileName); 3396 if (displayLevel > 2) DISPLAYOUT("\n"); 3397 return 1; 3398 case info_success: 3399 default: 3400 break; 3401 } 3402 3403 displayInfo(inFileName, &info, displayLevel); 3404 *total = FIO_addFInfo(*total, info); 3405 assert(error == info_success || error == info_frame_error); 3406 return (int)error; 3407 } 3408 } 3409 3410 int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel) 3411 { 3412 /* ensure no specified input is stdin (needs fseek() capability) */ 3413 { unsigned u; 3414 for (u=0; u<numFiles;u++) { 3415 ERROR_IF(!strcmp (filenameTable[u], stdinmark), 3416 1, "zstd: --list does not support reading from standard input"); 3417 } } 3418 3419 if (numFiles == 0) { 3420 if (!UTIL_isConsole(stdin)) { 3421 DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n"); 3422 } 3423 DISPLAYLEVEL(1, "No files given \n"); 3424 return 1; 3425 } 3426 3427 if (displayLevel <= 2) { 3428 DISPLAYOUT("Frames Skips Compressed Uncompressed Ratio Check Filename\n"); 3429 } 3430 { int error = 0; 3431 fileInfo_t total; 3432 memset(&total, 0, sizeof(total)); 3433 total.usesCheck = 1; 3434 /* --list each file, and check for any error */ 3435 { unsigned u; 3436 for (u=0; u<numFiles;u++) { 3437 error |= FIO_listFile(&total, filenameTable[u], displayLevel); 3438 } } 3439 if (numFiles > 1 && displayLevel <= 2) { /* display total */ 3440 UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(total.compressedSize); 3441 UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(total.decompressedSize); 3442 double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/(double)total.compressedSize; 3443 const char* const checkString = (total.usesCheck ? "XXH64" : ""); 3444 DISPLAYOUT("----------------------------------------------------------------- \n"); 3445 if (total.decompUnavailable) { 3446 DISPLAYOUT("%6d %5d %6.*f%4s %5s %u files\n", 3447 total.numSkippableFrames + total.numActualFrames, 3448 total.numSkippableFrames, 3449 compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix, 3450 checkString, (unsigned)total.nbFiles); 3451 } else { 3452 DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %u files\n", 3453 total.numSkippableFrames + total.numActualFrames, 3454 total.numSkippableFrames, 3455 compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix, 3456 decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix, 3457 ratio, checkString, (unsigned)total.nbFiles); 3458 } } 3459 return error; 3460 } 3461 } 3462 3463 3464 #endif /* #ifndef ZSTD_NODECOMPRESS */ 3465