1 /* 2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 12 /* ************************************* 13 * Compiler Options 14 ***************************************/ 15 #ifdef _MSC_VER /* Visual */ 16 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 17 # pragma warning(disable : 4204) /* non-constant aggregate initializer */ 18 #endif 19 #if defined(__MINGW32__) && !defined(_POSIX_SOURCE) 20 # define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */ 21 #endif 22 23 /*-************************************* 24 * Includes 25 ***************************************/ 26 #include "platform.h" /* Large Files support, SET_BINARY_MODE */ 27 #include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */ 28 #include <stdio.h> /* fprintf, fopen, fread, _fileno, stdin, stdout */ 29 #include <stdlib.h> /* malloc, free */ 30 #include <string.h> /* strcmp, strlen */ 31 #include <assert.h> 32 #include <errno.h> /* errno */ 33 #include <signal.h> 34 #include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */ 35 36 #if defined (_MSC_VER) 37 # include <sys/stat.h> 38 # include <io.h> 39 #endif 40 41 #include "mem.h" /* U32, U64 */ 42 #include "fileio.h" 43 44 #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */ 45 #include "zstd.h" 46 #include "zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */ 47 48 #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS) 49 # include <zlib.h> 50 # if !defined(z_const) 51 # define z_const 52 # endif 53 #endif 54 55 #if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS) 56 # include <lzma.h> 57 #endif 58 59 #define LZ4_MAGICNUMBER 0x184D2204 60 #if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS) 61 # define LZ4F_ENABLE_OBSOLETE_ENUMS 62 # include <lz4frame.h> 63 # include <lz4.h> 64 #endif 65 66 67 /*-************************************* 68 * Constants 69 ***************************************/ 70 #define KB *(1<<10) 71 #define MB *(1<<20) 72 #define GB *(1U<<30) 73 74 #define ADAPT_WINDOWLOG_DEFAULT 23 /* 8 MB */ 75 #define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */ 76 77 #define FNSPACE 30 78 79 80 /*-************************************* 81 * Macros 82 ***************************************/ 83 84 struct FIO_display_prefs_s { 85 int displayLevel; /* 0 : no display; 1: errors; 2: + result + interaction + warnings; 3: + progression; 4: + information */ 86 U32 noProgress; 87 }; 88 89 static FIO_display_prefs_t g_display_prefs = {2, 0}; 90 91 #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) 92 #define DISPLAYOUT(...) fprintf(stdout, __VA_ARGS__) 93 #define DISPLAYLEVEL(l, ...) { if (g_display_prefs.displayLevel>=l) { DISPLAY(__VA_ARGS__); } } 94 95 static const U64 g_refreshRate = SEC_TO_MICRO / 6; 96 static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; 97 98 #define READY_FOR_UPDATE() (!g_display_prefs.noProgress && UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) 99 #define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); } 100 #define DISPLAYUPDATE(l, ...) { \ 101 if (g_display_prefs.displayLevel>=l && !g_display_prefs.noProgress) { \ 102 if (READY_FOR_UPDATE() || (g_display_prefs.displayLevel>=4)) { \ 103 DELAY_NEXT_UPDATE(); \ 104 DISPLAY(__VA_ARGS__); \ 105 if (g_display_prefs.displayLevel>=4) fflush(stderr); \ 106 } } } 107 108 #undef MIN /* in case it would be already defined */ 109 #define MIN(a,b) ((a) < (b) ? (a) : (b)) 110 111 112 #define EXM_THROW(error, ...) \ 113 { \ 114 DISPLAYLEVEL(1, "zstd: "); \ 115 DISPLAYLEVEL(5, "Error defined at %s, line %i : \n", __FILE__, __LINE__); \ 116 DISPLAYLEVEL(1, "error %i : ", error); \ 117 DISPLAYLEVEL(1, __VA_ARGS__); \ 118 DISPLAYLEVEL(1, " \n"); \ 119 exit(error); \ 120 } 121 122 #define CHECK_V(v, f) \ 123 v = f; \ 124 if (ZSTD_isError(v)) { \ 125 DISPLAYLEVEL(5, "%s \n", #f); \ 126 EXM_THROW(11, "%s", ZSTD_getErrorName(v)); \ 127 } 128 #define CHECK(f) { size_t err; CHECK_V(err, f); } 129 130 131 /*-************************************ 132 * Signal (Ctrl-C trapping) 133 **************************************/ 134 static const char* g_artefact = NULL; 135 static void INThandler(int sig) 136 { 137 assert(sig==SIGINT); (void)sig; 138 #if !defined(_MSC_VER) 139 signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */ 140 #endif 141 if (g_artefact) { 142 assert(UTIL_isRegularFile(g_artefact)); 143 remove(g_artefact); 144 } 145 DISPLAY("\n"); 146 exit(2); 147 } 148 static void addHandler(char const* dstFileName) 149 { 150 if (UTIL_isRegularFile(dstFileName)) { 151 g_artefact = dstFileName; 152 signal(SIGINT, INThandler); 153 } else { 154 g_artefact = NULL; 155 } 156 } 157 /* Idempotent */ 158 static void clearHandler(void) 159 { 160 if (g_artefact) signal(SIGINT, SIG_DFL); 161 g_artefact = NULL; 162 } 163 164 165 /*-********************************************************* 166 * Termination signal trapping (Print debug stack trace) 167 ***********************************************************/ 168 #if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */ 169 # if (__has_feature(address_sanitizer)) 170 # define BACKTRACE_ENABLE 0 171 # endif /* __has_feature(address_sanitizer) */ 172 #elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */ 173 # define BACKTRACE_ENABLE 0 174 #endif 175 176 #if !defined(BACKTRACE_ENABLE) 177 /* automatic detector : backtrace enabled by default on linux+glibc and osx */ 178 # if (defined(__linux__) && defined(__GLIBC__)) \ 179 || (defined(__APPLE__) && defined(__MACH__)) 180 # define BACKTRACE_ENABLE 1 181 # else 182 # define BACKTRACE_ENABLE 0 183 # endif 184 #endif 185 186 /* note : after this point, BACKTRACE_ENABLE is necessarily defined */ 187 188 189 #if BACKTRACE_ENABLE 190 191 #include <execinfo.h> /* backtrace, backtrace_symbols */ 192 193 #define MAX_STACK_FRAMES 50 194 195 static void ABRThandler(int sig) { 196 const char* name; 197 void* addrlist[MAX_STACK_FRAMES]; 198 char** symbollist; 199 int addrlen, i; 200 201 switch (sig) { 202 case SIGABRT: name = "SIGABRT"; break; 203 case SIGFPE: name = "SIGFPE"; break; 204 case SIGILL: name = "SIGILL"; break; 205 case SIGINT: name = "SIGINT"; break; 206 case SIGSEGV: name = "SIGSEGV"; break; 207 default: name = "UNKNOWN"; 208 } 209 210 DISPLAY("Caught %s signal, printing stack:\n", name); 211 /* Retrieve current stack addresses. */ 212 addrlen = backtrace(addrlist, MAX_STACK_FRAMES); 213 if (addrlen == 0) { 214 DISPLAY("\n"); 215 return; 216 } 217 /* Create readable strings to each frame. */ 218 symbollist = backtrace_symbols(addrlist, addrlen); 219 /* Print the stack trace, excluding calls handling the signal. */ 220 for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) { 221 DISPLAY("%s\n", symbollist[i]); 222 } 223 free(symbollist); 224 /* Reset and raise the signal so default handler runs. */ 225 signal(sig, SIG_DFL); 226 raise(sig); 227 } 228 #endif 229 230 void FIO_addAbortHandler() 231 { 232 #if BACKTRACE_ENABLE 233 signal(SIGABRT, ABRThandler); 234 signal(SIGFPE, ABRThandler); 235 signal(SIGILL, ABRThandler); 236 signal(SIGSEGV, ABRThandler); 237 signal(SIGBUS, ABRThandler); 238 #endif 239 } 240 241 242 /*-************************************************************ 243 * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW 244 ***************************************************************/ 245 #if defined(_MSC_VER) && _MSC_VER >= 1400 246 # define LONG_SEEK _fseeki64 247 # define LONG_TELL _ftelli64 248 #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ 249 # define LONG_SEEK fseeko 250 # define LONG_TELL ftello 251 #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__) 252 # define LONG_SEEK fseeko64 253 # define LONG_TELL ftello64 254 #elif defined(_WIN32) && !defined(__DJGPP__) 255 # include <windows.h> 256 static int LONG_SEEK(FILE* file, __int64 offset, int origin) { 257 LARGE_INTEGER off; 258 DWORD method; 259 off.QuadPart = offset; 260 if (origin == SEEK_END) 261 method = FILE_END; 262 else if (origin == SEEK_CUR) 263 method = FILE_CURRENT; 264 else 265 method = FILE_BEGIN; 266 267 if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method)) 268 return 0; 269 else 270 return -1; 271 } 272 #else 273 # define LONG_SEEK fseek 274 # define LONG_TELL ftell 275 #endif 276 277 278 /*-************************************* 279 * Parameters: Typedefs 280 ***************************************/ 281 282 struct FIO_prefs_s { 283 284 /* Algorithm preferences */ 285 FIO_compressionType_t compressionType; 286 U32 sparseFileSupport; /* 0: no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */ 287 int dictIDFlag; 288 int checksumFlag; 289 int blockSize; 290 int overlapLog; 291 U32 adaptiveMode; 292 int rsyncable; 293 int minAdaptLevel; 294 int maxAdaptLevel; 295 int ldmFlag; 296 int ldmHashLog; 297 int ldmMinMatch; 298 int ldmBucketSizeLog; 299 int ldmHashRateLog; 300 ZSTD_literalCompressionMode_e literalCompressionMode; 301 302 /* IO preferences */ 303 U32 removeSrcFile; 304 U32 overwrite; 305 306 /* Computation resources preferences */ 307 unsigned memLimit; 308 int nbWorkers; 309 }; 310 311 312 /*-************************************* 313 * Parameters: Initialization 314 ***************************************/ 315 316 #define FIO_OVERLAP_LOG_NOTSET 9999 317 #define FIO_LDM_PARAM_NOTSET 9999 318 319 320 FIO_prefs_t* FIO_createPreferences(void) 321 { 322 FIO_prefs_t* const ret = (FIO_prefs_t*)malloc(sizeof(FIO_prefs_t)); 323 if (!ret) EXM_THROW(21, "Allocation error : not enough memory"); 324 325 ret->compressionType = FIO_zstdCompression; 326 ret->overwrite = 0; 327 ret->sparseFileSupport = ZSTD_SPARSE_DEFAULT; 328 ret->dictIDFlag = 1; 329 ret->checksumFlag = 1; 330 ret->removeSrcFile = 0; 331 ret->memLimit = 0; 332 ret->nbWorkers = 1; 333 ret->blockSize = 0; 334 ret->overlapLog = FIO_OVERLAP_LOG_NOTSET; 335 ret->adaptiveMode = 0; 336 ret->rsyncable = 0; 337 ret->minAdaptLevel = -50; /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */ 338 ret->maxAdaptLevel = 22; /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */ 339 ret->ldmFlag = 0; 340 ret->ldmHashLog = 0; 341 ret->ldmMinMatch = 0; 342 ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; 343 ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET; 344 ret->literalCompressionMode = ZSTD_lcm_auto; 345 return ret; 346 } 347 348 void FIO_freePreferences(FIO_prefs_t* const prefs) 349 { 350 free(prefs); 351 } 352 353 354 /*-************************************* 355 * Parameters: Display Options 356 ***************************************/ 357 358 void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; } 359 360 void FIO_setNoProgress(unsigned noProgress) { g_display_prefs.noProgress = noProgress; } 361 362 363 /*-************************************* 364 * Parameters: Setters 365 ***************************************/ 366 367 void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; } 368 369 void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; } 370 371 void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse) { prefs->sparseFileSupport = sparse; } 372 373 void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; } 374 375 void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; } 376 377 void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag) { prefs->removeSrcFile = (flag>0); } 378 379 void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; } 380 381 void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) { 382 #ifndef ZSTD_MULTITHREAD 383 if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n"); 384 #endif 385 prefs->nbWorkers = nbWorkers; 386 } 387 388 void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) { 389 if (blockSize && prefs->nbWorkers==0) 390 DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n"); 391 prefs->blockSize = blockSize; 392 } 393 394 void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){ 395 if (overlapLog && prefs->nbWorkers==0) 396 DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n"); 397 prefs->overlapLog = overlapLog; 398 } 399 400 void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt) { 401 if ((adapt>0) && (prefs->nbWorkers==0)) 402 EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n"); 403 prefs->adaptiveMode = adapt; 404 } 405 406 void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) { 407 if ((rsyncable>0) && (prefs->nbWorkers==0)) 408 EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n"); 409 prefs->rsyncable = rsyncable; 410 } 411 412 void FIO_setLiteralCompressionMode( 413 FIO_prefs_t* const prefs, 414 ZSTD_literalCompressionMode_e mode) { 415 prefs->literalCompressionMode = mode; 416 } 417 418 void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel) 419 { 420 #ifndef ZSTD_NOCOMPRESS 421 assert(minCLevel >= ZSTD_minCLevel()); 422 #endif 423 prefs->minAdaptLevel = minCLevel; 424 } 425 426 void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel) 427 { 428 prefs->maxAdaptLevel = maxCLevel; 429 } 430 431 void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag) { 432 prefs->ldmFlag = (ldmFlag>0); 433 } 434 435 void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog) { 436 prefs->ldmHashLog = ldmHashLog; 437 } 438 439 void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch) { 440 prefs->ldmMinMatch = ldmMinMatch; 441 } 442 443 void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog) { 444 prefs->ldmBucketSizeLog = ldmBucketSizeLog; 445 } 446 447 448 void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) { 449 prefs->ldmHashRateLog = ldmHashRateLog; 450 } 451 452 453 /*-************************************* 454 * Functions 455 ***************************************/ 456 /** FIO_remove() : 457 * @result : Unlink `fileName`, even if it's read-only */ 458 static int FIO_remove(const char* path) 459 { 460 if (!UTIL_isRegularFile(path)) { 461 DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s \n", path); 462 return 0; 463 } 464 #if defined(_WIN32) || defined(WIN32) 465 /* windows doesn't allow remove read-only files, 466 * so try to make it writable first */ 467 chmod(path, _S_IWRITE); 468 #endif 469 return remove(path); 470 } 471 472 /** FIO_openSrcFile() : 473 * condition : `srcFileName` must be non-NULL. 474 * @result : FILE* to `srcFileName`, or NULL if it fails */ 475 static FILE* FIO_openSrcFile(const char* srcFileName) 476 { 477 assert(srcFileName != NULL); 478 if (!strcmp (srcFileName, stdinmark)) { 479 DISPLAYLEVEL(4,"Using stdin for input \n"); 480 SET_BINARY_MODE(stdin); 481 return stdin; 482 } 483 484 if (!UTIL_fileExist(srcFileName)) { 485 DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n", 486 srcFileName, strerror(errno)); 487 return NULL; 488 } 489 490 if (!UTIL_isRegularFile(srcFileName)) { 491 DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n", 492 srcFileName); 493 return NULL; 494 } 495 496 { FILE* const f = fopen(srcFileName, "rb"); 497 if (f == NULL) 498 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); 499 return f; 500 } 501 } 502 503 /** FIO_openDstFile() : 504 * condition : `dstFileName` must be non-NULL. 505 * @result : FILE* to `dstFileName`, or NULL if it fails */ 506 static FILE* FIO_openDstFile(FIO_prefs_t* const prefs, const char* srcFileName, const char* dstFileName) 507 { 508 assert(dstFileName != NULL); 509 if (!strcmp (dstFileName, stdoutmark)) { 510 DISPLAYLEVEL(4,"Using stdout for output \n"); 511 SET_BINARY_MODE(stdout); 512 if (prefs->sparseFileSupport == 1) { 513 prefs->sparseFileSupport = 0; 514 DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n"); 515 } 516 return stdout; 517 } 518 519 /* ensure dst is not the same as src */ 520 if (srcFileName != NULL && UTIL_isSameFile(srcFileName, dstFileName)) { 521 DISPLAYLEVEL(1, "zstd: Refusing to open an output file which will overwrite the input file \n"); 522 return NULL; 523 } 524 525 if (prefs->sparseFileSupport == 1) { 526 prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT; 527 } 528 529 if (UTIL_isRegularFile(dstFileName)) { 530 /* Check if destination file already exists */ 531 FILE* const fCheck = fopen( dstFileName, "rb" ); 532 if (!strcmp(dstFileName, nulmark)) { 533 EXM_THROW(40, "%s is unexpectedly categorized as a regular file", 534 dstFileName); 535 } 536 if (fCheck != NULL) { /* dst file exists, authorization prompt */ 537 fclose(fCheck); 538 if (!prefs->overwrite) { 539 if (g_display_prefs.displayLevel <= 1) { 540 /* No interaction possible */ 541 DISPLAY("zstd: %s already exists; not overwritten \n", 542 dstFileName); 543 return NULL; 544 } 545 DISPLAY("zstd: %s already exists; overwrite (y/N) ? ", 546 dstFileName); 547 { int ch = getchar(); 548 if ((ch!='Y') && (ch!='y')) { 549 DISPLAY(" not overwritten \n"); 550 return NULL; 551 } 552 /* flush rest of input line */ 553 while ((ch!=EOF) && (ch!='\n')) ch = getchar(); 554 } } 555 /* need to unlink */ 556 FIO_remove(dstFileName); 557 } } 558 559 { FILE* const f = fopen( dstFileName, "wb" ); 560 if (f == NULL) 561 DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno)); 562 return f; 563 } 564 } 565 566 567 /*! FIO_createDictBuffer() : 568 * creates a buffer, pointed by `*bufferPtr`, 569 * loads `filename` content into it, up to DICTSIZE_MAX bytes. 570 * @return : loaded size 571 * if fileName==NULL, returns 0 and a NULL pointer 572 */ 573 static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName) 574 { 575 FILE* fileHandle; 576 U64 fileSize; 577 578 assert(bufferPtr != NULL); 579 *bufferPtr = NULL; 580 if (fileName == NULL) return 0; 581 582 DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName); 583 fileHandle = fopen(fileName, "rb"); 584 if (fileHandle==NULL) EXM_THROW(31, "%s: %s", fileName, strerror(errno)); 585 586 fileSize = UTIL_getFileSize(fileName); 587 if (fileSize > DICTSIZE_MAX) { 588 EXM_THROW(32, "Dictionary file %s is too large (> %u MB)", 589 fileName, DICTSIZE_MAX >> 20); /* avoid extreme cases */ 590 } 591 *bufferPtr = malloc((size_t)fileSize); 592 if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno)); 593 { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle); 594 if (readSize != fileSize) 595 EXM_THROW(35, "Error reading dictionary file %s : %s", 596 fileName, strerror(errno)); 597 } 598 fclose(fileHandle); 599 return (size_t)fileSize; 600 } 601 602 #ifndef ZSTD_NOCOMPRESS 603 604 /* ********************************************************************** 605 * Compression 606 ************************************************************************/ 607 typedef struct { 608 FILE* srcFile; 609 FILE* dstFile; 610 void* srcBuffer; 611 size_t srcBufferSize; 612 void* dstBuffer; 613 size_t dstBufferSize; 614 const char* dictFileName; 615 ZSTD_CStream* cctx; 616 } cRess_t; 617 618 static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, 619 const char* dictFileName, int cLevel, 620 U64 srcSize, 621 ZSTD_compressionParameters comprParams) { 622 cRess_t ress; 623 memset(&ress, 0, sizeof(ress)); 624 625 DISPLAYLEVEL(6, "FIO_createCResources \n"); 626 ress.cctx = ZSTD_createCCtx(); 627 if (ress.cctx == NULL) 628 EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx", 629 strerror(errno)); 630 ress.srcBufferSize = ZSTD_CStreamInSize(); 631 ress.srcBuffer = malloc(ress.srcBufferSize); 632 ress.dstBufferSize = ZSTD_CStreamOutSize(); 633 ress.dstBuffer = malloc(ress.dstBufferSize); 634 if (!ress.srcBuffer || !ress.dstBuffer) 635 EXM_THROW(31, "allocation error : not enough memory"); 636 637 /* Advanced parameters, including dictionary */ 638 { void* dictBuffer; 639 size_t const dictBuffSize = FIO_createDictBuffer(&dictBuffer, dictFileName); /* works with dictFileName==NULL */ 640 if (dictFileName && (dictBuffer==NULL)) 641 EXM_THROW(32, "allocation error : can't create dictBuffer"); 642 ress.dictFileName = dictFileName; 643 644 if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog) 645 comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT; 646 647 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 1) ); /* always enable content size when available (note: supposed to be default) */ 648 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) ); 649 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) ); 650 /* compression level */ 651 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) ); 652 /* long distance matching */ 653 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) ); 654 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) ); 655 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) ); 656 if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) { 657 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) ); 658 } 659 if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) { 660 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) ); 661 } 662 /* compression parameters */ 663 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) ); 664 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) ); 665 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) ); 666 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) ); 667 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) ); 668 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) ); 669 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, comprParams.strategy) ); 670 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) ); 671 /* multi-threading */ 672 #ifdef ZSTD_MULTITHREAD 673 DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers); 674 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) ); 675 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) ); 676 if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) { 677 DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog); 678 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) ); 679 } 680 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) ); 681 #endif 682 /* dictionary */ 683 CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); /* set the value temporarily for dictionary loading, to adapt compression parameters */ 684 CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) ); 685 CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) ); /* reset */ 686 687 free(dictBuffer); 688 } 689 690 return ress; 691 } 692 693 static void FIO_freeCResources(cRess_t ress) 694 { 695 free(ress.srcBuffer); 696 free(ress.dstBuffer); 697 ZSTD_freeCStream(ress.cctx); /* never fails */ 698 } 699 700 701 #ifdef ZSTD_GZCOMPRESS 702 static unsigned long long 703 FIO_compressGzFrame(cRess_t* ress, 704 const char* srcFileName, U64 const srcFileSize, 705 int compressionLevel, U64* readsize) 706 { 707 unsigned long long inFileSize = 0, outFileSize = 0; 708 z_stream strm; 709 int ret; 710 711 if (compressionLevel > Z_BEST_COMPRESSION) 712 compressionLevel = Z_BEST_COMPRESSION; 713 714 strm.zalloc = Z_NULL; 715 strm.zfree = Z_NULL; 716 strm.opaque = Z_NULL; 717 718 ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED, 719 15 /* maxWindowLogSize */ + 16 /* gzip only */, 720 8, Z_DEFAULT_STRATEGY); /* see http://www.zlib.net/manual.html */ 721 if (ret != Z_OK) 722 EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret); 723 724 strm.next_in = 0; 725 strm.avail_in = 0; 726 strm.next_out = (Bytef*)ress->dstBuffer; 727 strm.avail_out = (uInt)ress->dstBufferSize; 728 729 while (1) { 730 if (strm.avail_in == 0) { 731 size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile); 732 if (inSize == 0) break; 733 inFileSize += inSize; 734 strm.next_in = (z_const unsigned char*)ress->srcBuffer; 735 strm.avail_in = (uInt)inSize; 736 } 737 ret = deflate(&strm, Z_NO_FLUSH); 738 if (ret != Z_OK) 739 EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret); 740 { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; 741 if (decompBytes) { 742 if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) 743 EXM_THROW(73, "Write error : cannot write to output file"); 744 outFileSize += decompBytes; 745 strm.next_out = (Bytef*)ress->dstBuffer; 746 strm.avail_out = (uInt)ress->dstBufferSize; 747 } 748 } 749 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) 750 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", 751 (unsigned)(inFileSize>>20), 752 (double)outFileSize/inFileSize*100) 753 else 754 DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", 755 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), 756 (double)outFileSize/inFileSize*100); 757 } 758 759 while (1) { 760 ret = deflate(&strm, Z_FINISH); 761 { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; 762 if (decompBytes) { 763 if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) 764 EXM_THROW(75, "Write error : %s", strerror(errno)); 765 outFileSize += decompBytes; 766 strm.next_out = (Bytef*)ress->dstBuffer; 767 strm.avail_out = (uInt)ress->dstBufferSize; 768 } } 769 if (ret == Z_STREAM_END) break; 770 if (ret != Z_BUF_ERROR) 771 EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret); 772 } 773 774 ret = deflateEnd(&strm); 775 if (ret != Z_OK) 776 EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret); 777 *readsize = inFileSize; 778 779 return outFileSize; 780 } 781 #endif 782 783 784 #ifdef ZSTD_LZMACOMPRESS 785 static unsigned long long 786 FIO_compressLzmaFrame(cRess_t* ress, 787 const char* srcFileName, U64 const srcFileSize, 788 int compressionLevel, U64* readsize, int plain_lzma) 789 { 790 unsigned long long inFileSize = 0, outFileSize = 0; 791 lzma_stream strm = LZMA_STREAM_INIT; 792 lzma_action action = LZMA_RUN; 793 lzma_ret ret; 794 795 if (compressionLevel < 0) compressionLevel = 0; 796 if (compressionLevel > 9) compressionLevel = 9; 797 798 if (plain_lzma) { 799 lzma_options_lzma opt_lzma; 800 if (lzma_lzma_preset(&opt_lzma, compressionLevel)) 801 EXM_THROW(71, "zstd: %s: lzma_lzma_preset error", srcFileName); 802 ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */ 803 if (ret != LZMA_OK) 804 EXM_THROW(71, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret); 805 } else { 806 ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */ 807 if (ret != LZMA_OK) 808 EXM_THROW(71, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret); 809 } 810 811 strm.next_in = 0; 812 strm.avail_in = 0; 813 strm.next_out = (BYTE*)ress->dstBuffer; 814 strm.avail_out = ress->dstBufferSize; 815 816 while (1) { 817 if (strm.avail_in == 0) { 818 size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile); 819 if (inSize == 0) action = LZMA_FINISH; 820 inFileSize += inSize; 821 strm.next_in = (BYTE const*)ress->srcBuffer; 822 strm.avail_in = inSize; 823 } 824 825 ret = lzma_code(&strm, action); 826 827 if (ret != LZMA_OK && ret != LZMA_STREAM_END) 828 EXM_THROW(72, "zstd: %s: lzma_code encoding error %d", srcFileName, ret); 829 { size_t const compBytes = ress->dstBufferSize - strm.avail_out; 830 if (compBytes) { 831 if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes) 832 EXM_THROW(73, "Write error : %s", strerror(errno)); 833 outFileSize += compBytes; 834 strm.next_out = (BYTE*)ress->dstBuffer; 835 strm.avail_out = ress->dstBufferSize; 836 } } 837 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) 838 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", 839 (unsigned)(inFileSize>>20), 840 (double)outFileSize/inFileSize*100) 841 else 842 DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", 843 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), 844 (double)outFileSize/inFileSize*100); 845 if (ret == LZMA_STREAM_END) break; 846 } 847 848 lzma_end(&strm); 849 *readsize = inFileSize; 850 851 return outFileSize; 852 } 853 #endif 854 855 #ifdef ZSTD_LZ4COMPRESS 856 857 #if LZ4_VERSION_NUMBER <= 10600 858 #define LZ4F_blockLinked blockLinked 859 #define LZ4F_max64KB max64KB 860 #endif 861 862 static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); } 863 864 static unsigned long long 865 FIO_compressLz4Frame(cRess_t* ress, 866 const char* srcFileName, U64 const srcFileSize, 867 int compressionLevel, int checksumFlag, 868 U64* readsize) 869 { 870 const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB); 871 unsigned long long inFileSize = 0, outFileSize = 0; 872 873 LZ4F_preferences_t prefs; 874 LZ4F_compressionContext_t ctx; 875 876 LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); 877 if (LZ4F_isError(errorCode)) 878 EXM_THROW(31, "zstd: failed to create lz4 compression context"); 879 880 memset(&prefs, 0, sizeof(prefs)); 881 882 assert(blockSize <= ress->srcBufferSize); 883 884 prefs.autoFlush = 1; 885 prefs.compressionLevel = compressionLevel; 886 prefs.frameInfo.blockMode = LZ4F_blockLinked; 887 prefs.frameInfo.blockSizeID = LZ4F_max64KB; 888 prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)checksumFlag; 889 #if LZ4_VERSION_NUMBER >= 10600 890 prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize; 891 #endif 892 assert(LZ4F_compressBound(blockSize, &prefs) <= ress->dstBufferSize); 893 894 { 895 size_t readSize; 896 size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs); 897 if (LZ4F_isError(headerSize)) 898 EXM_THROW(33, "File header generation failed : %s", 899 LZ4F_getErrorName(headerSize)); 900 if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize) 901 EXM_THROW(34, "Write error : %s (cannot write header)", strerror(errno)); 902 outFileSize += headerSize; 903 904 /* Read first block */ 905 readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); 906 inFileSize += readSize; 907 908 /* Main Loop */ 909 while (readSize>0) { 910 size_t const outSize = LZ4F_compressUpdate(ctx, 911 ress->dstBuffer, ress->dstBufferSize, 912 ress->srcBuffer, readSize, NULL); 913 if (LZ4F_isError(outSize)) 914 EXM_THROW(35, "zstd: %s: lz4 compression failed : %s", 915 srcFileName, LZ4F_getErrorName(outSize)); 916 outFileSize += outSize; 917 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) { 918 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", 919 (unsigned)(inFileSize>>20), 920 (double)outFileSize/inFileSize*100) 921 } else { 922 DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", 923 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), 924 (double)outFileSize/inFileSize*100); 925 } 926 927 /* Write Block */ 928 { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile); 929 if (sizeCheck != outSize) 930 EXM_THROW(36, "Write error : %s", strerror(errno)); 931 } 932 933 /* Read next block */ 934 readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); 935 inFileSize += readSize; 936 } 937 if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName); 938 939 /* End of Stream mark */ 940 headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL); 941 if (LZ4F_isError(headerSize)) 942 EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s", 943 srcFileName, LZ4F_getErrorName(headerSize)); 944 945 { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile); 946 if (sizeCheck != headerSize) 947 EXM_THROW(39, "Write error : %s (cannot write end of stream)", 948 strerror(errno)); 949 } 950 outFileSize += headerSize; 951 } 952 953 *readsize = inFileSize; 954 LZ4F_freeCompressionContext(ctx); 955 956 return outFileSize; 957 } 958 #endif 959 960 961 static unsigned long long 962 FIO_compressZstdFrame(FIO_prefs_t* const prefs, 963 const cRess_t* ressPtr, 964 const char* srcFileName, U64 fileSize, 965 int compressionLevel, U64* readsize) 966 { 967 cRess_t const ress = *ressPtr; 968 FILE* const srcFile = ress.srcFile; 969 FILE* const dstFile = ress.dstFile; 970 U64 compressedfilesize = 0; 971 ZSTD_EndDirective directive = ZSTD_e_continue; 972 973 /* stats */ 974 ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 }; 975 ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 }; 976 typedef enum { noChange, slower, faster } speedChange_e; 977 speedChange_e speedChange = noChange; 978 unsigned flushWaiting = 0; 979 unsigned inputPresented = 0; 980 unsigned inputBlocked = 0; 981 unsigned lastJobID = 0; 982 983 DISPLAYLEVEL(6, "compression using zstd format \n"); 984 985 /* init */ 986 if (fileSize != UTIL_FILESIZE_UNKNOWN) { 987 CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize)); 988 } 989 (void)srcFileName; 990 991 /* Main compression loop */ 992 do { 993 size_t stillToFlush; 994 /* Fill input Buffer */ 995 size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile); 996 ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 }; 997 DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize); 998 *readsize += inSize; 999 1000 if ((inSize == 0) || (*readsize == fileSize)) 1001 directive = ZSTD_e_end; 1002 1003 stillToFlush = 1; 1004 while ((inBuff.pos != inBuff.size) /* input buffer must be entirely ingested */ 1005 || (directive == ZSTD_e_end && stillToFlush != 0) ) { 1006 1007 size_t const oldIPos = inBuff.pos; 1008 ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 }; 1009 size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx); 1010 CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive)); 1011 1012 /* count stats */ 1013 inputPresented++; 1014 if (oldIPos == inBuff.pos) inputBlocked++; /* input buffer is full and can't take any more : input speed is faster than consumption rate */ 1015 if (!toFlushNow) flushWaiting = 1; 1016 1017 /* Write compressed stream */ 1018 DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n", 1019 (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos); 1020 if (outBuff.pos) { 1021 size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile); 1022 if (sizeCheck != outBuff.pos) 1023 EXM_THROW(25, "Write error : %s (cannot write compressed block)", 1024 strerror(errno)); 1025 compressedfilesize += outBuff.pos; 1026 } 1027 1028 /* display notification; and adapt compression level */ 1029 if (READY_FOR_UPDATE()) { 1030 ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx); 1031 double const cShare = (double)zfp.produced / (zfp.consumed + !zfp.consumed/*avoid div0*/) * 100; 1032 1033 /* display progress notifications */ 1034 if (g_display_prefs.displayLevel >= 3) { 1035 DISPLAYUPDATE(3, "\r(L%i) Buffered :%4u MB - Consumed :%4u MB - Compressed :%4u MB => %.2f%% ", 1036 compressionLevel, 1037 (unsigned)((zfp.ingested - zfp.consumed) >> 20), 1038 (unsigned)(zfp.consumed >> 20), 1039 (unsigned)(zfp.produced >> 20), 1040 cShare ); 1041 } else { /* summarized notifications if == 2; */ 1042 DISPLAYLEVEL(2, "\rRead : %u ", (unsigned)(zfp.consumed >> 20)); 1043 if (fileSize != UTIL_FILESIZE_UNKNOWN) 1044 DISPLAYLEVEL(2, "/ %u ", (unsigned)(fileSize >> 20)); 1045 DISPLAYLEVEL(2, "MB ==> %2.f%% ", cShare); 1046 DELAY_NEXT_UPDATE(); 1047 } 1048 1049 /* adaptive mode : statistics measurement and speed correction */ 1050 if (prefs->adaptiveMode) { 1051 1052 /* check output speed */ 1053 if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */ 1054 1055 unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced; 1056 unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed; 1057 assert(zfp.produced >= previous_zfp_update.produced); 1058 assert(prefs->nbWorkers >= 1); 1059 1060 /* test if compression is blocked 1061 * either because output is slow and all buffers are full 1062 * or because input is slow and no job can start while waiting for at least one buffer to be filled. 1063 * note : exclude starting part, since currentJobID > 1 */ 1064 if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/ 1065 && (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */ 1066 ) { 1067 DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n") 1068 speedChange = slower; 1069 } 1070 1071 previous_zfp_update = zfp; 1072 1073 if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */ 1074 && (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */ 1075 ) { 1076 DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed); 1077 speedChange = slower; 1078 } 1079 flushWaiting = 0; 1080 } 1081 1082 /* course correct only if there is at least one new job completed */ 1083 if (zfp.currentJobID > lastJobID) { 1084 DISPLAYLEVEL(6, "compression level adaptation check \n") 1085 1086 /* check input speed */ 1087 if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */ 1088 if (inputBlocked <= 0) { 1089 DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n"); 1090 speedChange = slower; 1091 } else if (speedChange == noChange) { 1092 unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested; 1093 unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed; 1094 unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced; 1095 unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed; 1096 previous_zfp_correction = zfp; 1097 assert(inputPresented > 0); 1098 DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n", 1099 inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100, 1100 (unsigned)newlyIngested, (unsigned)newlyConsumed, 1101 (unsigned)newlyFlushed, (unsigned)newlyProduced); 1102 if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */ 1103 && (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */ 1104 && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */ 1105 ) { 1106 DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n", 1107 newlyIngested, newlyConsumed, newlyProduced, newlyFlushed); 1108 speedChange = faster; 1109 } 1110 } 1111 inputBlocked = 0; 1112 inputPresented = 0; 1113 } 1114 1115 if (speedChange == slower) { 1116 DISPLAYLEVEL(6, "slower speed , higher compression \n") 1117 compressionLevel ++; 1118 if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel(); 1119 if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel; 1120 compressionLevel += (compressionLevel == 0); /* skip 0 */ 1121 ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); 1122 } 1123 if (speedChange == faster) { 1124 DISPLAYLEVEL(6, "faster speed , lighter compression \n") 1125 compressionLevel --; 1126 if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel; 1127 compressionLevel -= (compressionLevel == 0); /* skip 0 */ 1128 ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); 1129 } 1130 speedChange = noChange; 1131 1132 lastJobID = zfp.currentJobID; 1133 } /* if (zfp.currentJobID > lastJobID) */ 1134 } /* if (g_adaptiveMode) */ 1135 } /* if (READY_FOR_UPDATE()) */ 1136 } /* while ((inBuff.pos != inBuff.size) */ 1137 } while (directive != ZSTD_e_end); 1138 1139 if (ferror(srcFile)) { 1140 EXM_THROW(26, "Read error : I/O error"); 1141 } 1142 if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) { 1143 EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B", 1144 (unsigned long long)*readsize, (unsigned long long)fileSize); 1145 } 1146 1147 return compressedfilesize; 1148 } 1149 1150 /*! FIO_compressFilename_internal() : 1151 * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened. 1152 * @return : 0 : compression completed correctly, 1153 * 1 : missing or pb opening srcFileName 1154 */ 1155 static int 1156 FIO_compressFilename_internal(FIO_prefs_t* const prefs, 1157 cRess_t ress, 1158 const char* dstFileName, const char* srcFileName, 1159 int compressionLevel) 1160 { 1161 U64 readsize = 0; 1162 U64 compressedfilesize = 0; 1163 U64 const fileSize = UTIL_getFileSize(srcFileName); 1164 DISPLAYLEVEL(5, "%s: %u bytes \n", srcFileName, (unsigned)fileSize); 1165 1166 /* compression format selection */ 1167 switch (prefs->compressionType) { 1168 default: 1169 case FIO_zstdCompression: 1170 compressedfilesize = FIO_compressZstdFrame(prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize); 1171 break; 1172 1173 case FIO_gzipCompression: 1174 #ifdef ZSTD_GZCOMPRESS 1175 compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize); 1176 #else 1177 (void)compressionLevel; 1178 EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", 1179 srcFileName); 1180 #endif 1181 break; 1182 1183 case FIO_xzCompression: 1184 case FIO_lzmaCompression: 1185 #ifdef ZSTD_LZMACOMPRESS 1186 compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_lzmaCompression); 1187 #else 1188 (void)compressionLevel; 1189 EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n", 1190 srcFileName); 1191 #endif 1192 break; 1193 1194 case FIO_lz4Compression: 1195 #ifdef ZSTD_LZ4COMPRESS 1196 compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, prefs->checksumFlag, &readsize); 1197 #else 1198 (void)compressionLevel; 1199 EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n", 1200 srcFileName); 1201 #endif 1202 break; 1203 } 1204 1205 /* Status */ 1206 DISPLAYLEVEL(2, "\r%79s\r", ""); 1207 DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6llu => %6llu bytes, %s) \n", 1208 srcFileName, 1209 (double)compressedfilesize / (readsize+(!readsize)/*avoid div by zero*/) * 100, 1210 (unsigned long long)readsize, (unsigned long long) compressedfilesize, 1211 dstFileName); 1212 1213 return 0; 1214 } 1215 1216 1217 /*! FIO_compressFilename_dstFile() : 1218 * open dstFileName, or pass-through if ress.dstFile != NULL, 1219 * then start compression with FIO_compressFilename_internal(). 1220 * Manages source removal (--rm) and file permissions transfer. 1221 * note : ress.srcFile must be != NULL, 1222 * so reach this function through FIO_compressFilename_srcFile(). 1223 * @return : 0 : compression completed correctly, 1224 * 1 : pb 1225 */ 1226 static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs, 1227 cRess_t ress, 1228 const char* dstFileName, 1229 const char* srcFileName, 1230 int compressionLevel) 1231 { 1232 int closeDstFile = 0; 1233 int result; 1234 stat_t statbuf; 1235 int transfer_permissions = 0; 1236 1237 assert(ress.srcFile != NULL); 1238 1239 if (ress.dstFile == NULL) { 1240 closeDstFile = 1; 1241 DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s", dstFileName); 1242 ress.dstFile = FIO_openDstFile(prefs, srcFileName, dstFileName); 1243 if (ress.dstFile==NULL) return 1; /* could not open dstFileName */ 1244 /* Must only be added after FIO_openDstFile() succeeds. 1245 * Otherwise we may delete the destination file if it already exists, 1246 * and the user presses Ctrl-C when asked if they wish to overwrite. 1247 */ 1248 addHandler(dstFileName); 1249 1250 if ( strcmp (srcFileName, stdinmark) 1251 && UTIL_getFileStat(srcFileName, &statbuf)) 1252 transfer_permissions = 1; 1253 } 1254 1255 result = FIO_compressFilename_internal(prefs, ress, dstFileName, srcFileName, compressionLevel); 1256 1257 if (closeDstFile) { 1258 FILE* const dstFile = ress.dstFile; 1259 ress.dstFile = NULL; 1260 1261 clearHandler(); 1262 1263 if (fclose(dstFile)) { /* error closing dstFile */ 1264 DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); 1265 result=1; 1266 } 1267 if ( (result != 0) /* operation failure */ 1268 && strcmp(dstFileName, nulmark) /* special case : don't remove() /dev/null */ 1269 && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */ 1270 ) { 1271 FIO_remove(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */ 1272 } else if ( strcmp(dstFileName, stdoutmark) 1273 && strcmp(dstFileName, nulmark) 1274 && transfer_permissions) { 1275 UTIL_setFileStat(dstFileName, &statbuf); 1276 } 1277 } 1278 1279 return result; 1280 } 1281 1282 1283 /*! FIO_compressFilename_srcFile() : 1284 * @return : 0 : compression completed correctly, 1285 * 1 : missing or pb opening srcFileName 1286 */ 1287 static int 1288 FIO_compressFilename_srcFile(FIO_prefs_t* const prefs, 1289 cRess_t ress, 1290 const char* dstFileName, 1291 const char* srcFileName, 1292 int compressionLevel) 1293 { 1294 int result; 1295 1296 /* ensure src is not a directory */ 1297 if (UTIL_isDirectory(srcFileName)) { 1298 DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); 1299 return 1; 1300 } 1301 1302 /* ensure src is not the same as dict (if present) */ 1303 if (ress.dictFileName != NULL && UTIL_isSameFile(srcFileName, ress.dictFileName)) { 1304 DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName); 1305 return 1; 1306 } 1307 1308 ress.srcFile = FIO_openSrcFile(srcFileName); 1309 if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */ 1310 1311 result = FIO_compressFilename_dstFile(prefs, ress, dstFileName, srcFileName, compressionLevel); 1312 1313 fclose(ress.srcFile); 1314 ress.srcFile = NULL; 1315 if ( prefs->removeSrcFile /* --rm */ 1316 && result == 0 /* success */ 1317 && strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */ 1318 ) { 1319 /* We must clear the handler, since after this point calling it would 1320 * delete both the source and destination files. 1321 */ 1322 clearHandler(); 1323 if (FIO_remove(srcFileName)) 1324 EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno)); 1325 } 1326 return result; 1327 } 1328 1329 1330 int FIO_compressFilename(FIO_prefs_t* const prefs, 1331 const char* dstFileName, const char* srcFileName, 1332 const char* dictFileName, int compressionLevel, 1333 ZSTD_compressionParameters comprParams) 1334 { 1335 clock_t const start = clock(); 1336 U64 const fileSize = UTIL_getFileSize(srcFileName); 1337 U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize; 1338 1339 cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams); 1340 int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); 1341 1342 double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC; 1343 DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds); 1344 1345 FIO_freeCResources(ress); 1346 return result; 1347 } 1348 1349 1350 /* FIO_determineCompressedName() : 1351 * create a destination filename for compressed srcFileName. 1352 * @return a pointer to it. 1353 * This function never returns an error (it may abort() in case of pb) 1354 */ 1355 static const char* 1356 FIO_determineCompressedName(const char* srcFileName, const char* suffix) 1357 { 1358 static size_t dfnbCapacity = 0; 1359 static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ 1360 1361 size_t const sfnSize = strlen(srcFileName); 1362 size_t const suffixSize = strlen(suffix); 1363 1364 if (dfnbCapacity <= sfnSize+suffixSize+1) { 1365 /* resize buffer for dstName */ 1366 free(dstFileNameBuffer); 1367 dfnbCapacity = sfnSize + suffixSize + 30; 1368 dstFileNameBuffer = (char*)malloc(dfnbCapacity); 1369 if (!dstFileNameBuffer) { 1370 EXM_THROW(30, "zstd: %s", strerror(errno)); 1371 } } 1372 assert(dstFileNameBuffer != NULL); 1373 memcpy(dstFileNameBuffer, srcFileName, sfnSize); 1374 memcpy(dstFileNameBuffer+sfnSize, suffix, suffixSize+1 /* Include terminating null */); 1375 1376 return dstFileNameBuffer; 1377 } 1378 1379 1380 /* FIO_compressMultipleFilenames() : 1381 * compress nbFiles files 1382 * into one destination (outFileName) 1383 * or into one file each (outFileName == NULL, but suffix != NULL). 1384 */ 1385 int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, 1386 const char** inFileNamesTable, unsigned nbFiles, 1387 const char* outFileName, const char* suffix, 1388 const char* dictFileName, int compressionLevel, 1389 ZSTD_compressionParameters comprParams) 1390 { 1391 int error = 0; 1392 U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]); 1393 U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize; 1394 U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : firstSrcSize ; 1395 cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams); 1396 1397 /* init */ 1398 assert(outFileName != NULL || suffix != NULL); 1399 1400 if (outFileName != NULL) { /* output into a single destination (stdout typically) */ 1401 ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName); 1402 if (ress.dstFile == NULL) { /* could not open outFileName */ 1403 error = 1; 1404 } else { 1405 unsigned u; 1406 for (u=0; u<nbFiles; u++) 1407 error |= FIO_compressFilename_srcFile(prefs, ress, outFileName, inFileNamesTable[u], compressionLevel); 1408 if (fclose(ress.dstFile)) 1409 EXM_THROW(29, "Write error (%s) : cannot properly close %s", 1410 strerror(errno), outFileName); 1411 ress.dstFile = NULL; 1412 } 1413 } else { 1414 unsigned u; 1415 for (u=0; u<nbFiles; u++) { 1416 const char* const srcFileName = inFileNamesTable[u]; 1417 const char* const dstFileName = FIO_determineCompressedName(srcFileName, suffix); /* cannot fail */ 1418 error |= FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); 1419 } } 1420 1421 FIO_freeCResources(ress); 1422 return error; 1423 } 1424 1425 #endif /* #ifndef ZSTD_NOCOMPRESS */ 1426 1427 1428 1429 #ifndef ZSTD_NODECOMPRESS 1430 1431 /* ************************************************************************** 1432 * Decompression 1433 ***************************************************************************/ 1434 typedef struct { 1435 void* srcBuffer; 1436 size_t srcBufferSize; 1437 size_t srcBufferLoaded; 1438 void* dstBuffer; 1439 size_t dstBufferSize; 1440 ZSTD_DStream* dctx; 1441 FILE* dstFile; 1442 } dRess_t; 1443 1444 static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName) 1445 { 1446 dRess_t ress; 1447 memset(&ress, 0, sizeof(ress)); 1448 1449 /* Allocation */ 1450 ress.dctx = ZSTD_createDStream(); 1451 if (ress.dctx==NULL) 1452 EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno)); 1453 CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) ); 1454 ress.srcBufferSize = ZSTD_DStreamInSize(); 1455 ress.srcBuffer = malloc(ress.srcBufferSize); 1456 ress.dstBufferSize = ZSTD_DStreamOutSize(); 1457 ress.dstBuffer = malloc(ress.dstBufferSize); 1458 if (!ress.srcBuffer || !ress.dstBuffer) 1459 EXM_THROW(61, "Allocation error : not enough memory"); 1460 1461 /* dictionary */ 1462 { void* dictBuffer; 1463 size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName); 1464 CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) ); 1465 free(dictBuffer); 1466 } 1467 1468 return ress; 1469 } 1470 1471 static void FIO_freeDResources(dRess_t ress) 1472 { 1473 CHECK( ZSTD_freeDStream(ress.dctx) ); 1474 free(ress.srcBuffer); 1475 free(ress.dstBuffer); 1476 } 1477 1478 1479 /** FIO_fwriteSparse() : 1480 * @return : storedSkips, to be provided to next call to FIO_fwriteSparse() of LZ4IO_fwriteSparseEnd() */ 1481 static unsigned FIO_fwriteSparse(FIO_prefs_t* const prefs, FILE* file, const void* buffer, size_t bufferSize, unsigned storedSkips) 1482 { 1483 const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */ 1484 size_t bufferSizeT = bufferSize / sizeof(size_t); 1485 const size_t* const bufferTEnd = bufferT + bufferSizeT; 1486 const size_t* ptrT = bufferT; 1487 static const size_t segmentSizeT = (32 KB) / sizeof(size_t); /* 0-test re-attempted every 32 KB */ 1488 1489 if (!prefs->sparseFileSupport) { /* normal write */ 1490 size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file); 1491 if (sizeCheck != bufferSize) 1492 EXM_THROW(70, "Write error : %s (cannot write decoded block)", 1493 strerror(errno)); 1494 return 0; 1495 } 1496 1497 /* avoid int overflow */ 1498 if (storedSkips > 1 GB) { 1499 int const seekResult = LONG_SEEK(file, 1 GB, SEEK_CUR); 1500 if (seekResult != 0) 1501 EXM_THROW(71, "1 GB skip error (sparse file support)"); 1502 storedSkips -= 1 GB; 1503 } 1504 1505 while (ptrT < bufferTEnd) { 1506 size_t seg0SizeT = segmentSizeT; 1507 size_t nb0T; 1508 1509 /* count leading zeros */ 1510 if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT; 1511 bufferSizeT -= seg0SizeT; 1512 for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ; 1513 storedSkips += (unsigned)(nb0T * sizeof(size_t)); 1514 1515 if (nb0T != seg0SizeT) { /* not all 0s */ 1516 int const seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR); 1517 if (seekResult) EXM_THROW(72, "Sparse skip error ; try --no-sparse"); 1518 storedSkips = 0; 1519 seg0SizeT -= nb0T; 1520 ptrT += nb0T; 1521 { size_t const sizeCheck = fwrite(ptrT, sizeof(size_t), seg0SizeT, file); 1522 if (sizeCheck != seg0SizeT) 1523 EXM_THROW(73, "Write error : cannot write decoded block"); 1524 } } 1525 ptrT += seg0SizeT; 1526 } 1527 1528 { static size_t const maskT = sizeof(size_t)-1; 1529 if (bufferSize & maskT) { 1530 /* size not multiple of sizeof(size_t) : implies end of block */ 1531 const char* const restStart = (const char*)bufferTEnd; 1532 const char* restPtr = restStart; 1533 size_t restSize = bufferSize & maskT; 1534 const char* const restEnd = restStart + restSize; 1535 for ( ; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ; 1536 storedSkips += (unsigned) (restPtr - restStart); 1537 if (restPtr != restEnd) { 1538 int seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR); 1539 if (seekResult) 1540 EXM_THROW(74, "Sparse skip error ; try --no-sparse"); 1541 storedSkips = 0; 1542 { size_t const sizeCheck = fwrite(restPtr, 1, (size_t)(restEnd - restPtr), file); 1543 if (sizeCheck != (size_t)(restEnd - restPtr)) 1544 EXM_THROW(75, "Write error : cannot write decoded end of block"); 1545 } } } } 1546 1547 return storedSkips; 1548 } 1549 1550 static void 1551 FIO_fwriteSparseEnd(FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips) 1552 { 1553 if (storedSkips>0) { 1554 assert(prefs->sparseFileSupport > 0); /* storedSkips>0 implies sparse support is enabled */ 1555 (void)prefs; /* assert can be disabled, in which case prefs becomes unused */ 1556 if (LONG_SEEK(file, storedSkips-1, SEEK_CUR) != 0) 1557 EXM_THROW(69, "Final skip error (sparse file support)"); 1558 /* last zero must be explicitly written, 1559 * so that skipped ones get implicitly translated as zero by FS */ 1560 { const char lastZeroByte[1] = { 0 }; 1561 if (fwrite(lastZeroByte, 1, 1, file) != 1) 1562 EXM_THROW(69, "Write error : cannot write last zero"); 1563 } } 1564 } 1565 1566 1567 /** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode 1568 @return : 0 (no error) */ 1569 static int FIO_passThrough(FIO_prefs_t* const prefs, 1570 FILE* foutput, FILE* finput, 1571 void* buffer, size_t bufferSize, 1572 size_t alreadyLoaded) 1573 { 1574 size_t const blockSize = MIN(64 KB, bufferSize); 1575 size_t readFromInput = 1; 1576 unsigned storedSkips = 0; 1577 1578 /* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */ 1579 { size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput); 1580 if (sizeCheck != alreadyLoaded) { 1581 DISPLAYLEVEL(1, "Pass-through write error \n"); 1582 return 1; 1583 } } 1584 1585 while (readFromInput) { 1586 readFromInput = fread(buffer, 1, blockSize, finput); 1587 storedSkips = FIO_fwriteSparse(prefs, foutput, buffer, readFromInput, storedSkips); 1588 } 1589 1590 FIO_fwriteSparseEnd(prefs, foutput, storedSkips); 1591 return 0; 1592 } 1593 1594 /* FIO_highbit64() : 1595 * gives position of highest bit. 1596 * note : only works for v > 0 ! 1597 */ 1598 static unsigned FIO_highbit64(unsigned long long v) 1599 { 1600 unsigned count = 0; 1601 assert(v != 0); 1602 v >>= 1; 1603 while (v) { v >>= 1; count++; } 1604 return count; 1605 } 1606 1607 /* FIO_zstdErrorHelp() : 1608 * detailed error message when requested window size is too large */ 1609 static void FIO_zstdErrorHelp(FIO_prefs_t* const prefs, dRess_t* ress, size_t err, char const* srcFileName) 1610 { 1611 ZSTD_frameHeader header; 1612 1613 /* Help message only for one specific error */ 1614 if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge) 1615 return; 1616 1617 /* Try to decode the frame header */ 1618 err = ZSTD_getFrameHeader(&header, ress->srcBuffer, ress->srcBufferLoaded); 1619 if (err == 0) { 1620 unsigned long long const windowSize = header.windowSize; 1621 unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0); 1622 assert(prefs->memLimit > 0); 1623 DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u\n", 1624 srcFileName, windowSize, prefs->memLimit); 1625 if (windowLog <= ZSTD_WINDOWLOG_MAX) { 1626 unsigned const windowMB = (unsigned)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0)); 1627 assert(windowSize < (U64)(1ULL << 52)); /* ensure now overflow for windowMB */ 1628 DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB\n", 1629 srcFileName, windowLog, windowMB); 1630 return; 1631 } 1632 } 1633 DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported\n", 1634 srcFileName, ZSTD_WINDOWLOG_MAX); 1635 } 1636 1637 /** FIO_decompressFrame() : 1638 * @return : size of decoded zstd frame, or an error code 1639 */ 1640 #define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2)) 1641 static unsigned long long FIO_decompressZstdFrame( 1642 FIO_prefs_t* const prefs, 1643 dRess_t* ress, 1644 FILE* finput, 1645 const char* srcFileName, 1646 U64 alreadyDecoded) 1647 { 1648 U64 frameSize = 0; 1649 U32 storedSkips = 0; 1650 1651 size_t const srcFileLength = strlen(srcFileName); 1652 if (srcFileLength>20) srcFileName += srcFileLength-20; /* display last 20 characters only */ 1653 1654 ZSTD_resetDStream(ress->dctx); 1655 1656 /* Header loading : ensures ZSTD_getFrameHeader() will succeed */ 1657 { size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX; 1658 if (ress->srcBufferLoaded < toDecode) { 1659 size_t const toRead = toDecode - ress->srcBufferLoaded; 1660 void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded; 1661 ress->srcBufferLoaded += fread(startPosition, 1, toRead, finput); 1662 } } 1663 1664 /* Main decompression Loop */ 1665 while (1) { 1666 ZSTD_inBuffer inBuff = { ress->srcBuffer, ress->srcBufferLoaded, 0 }; 1667 ZSTD_outBuffer outBuff= { ress->dstBuffer, ress->dstBufferSize, 0 }; 1668 size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff); 1669 if (ZSTD_isError(readSizeHint)) { 1670 DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n", 1671 srcFileName, ZSTD_getErrorName(readSizeHint)); 1672 FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName); 1673 return FIO_ERROR_FRAME_DECODING; 1674 } 1675 1676 /* Write block */ 1677 storedSkips = FIO_fwriteSparse(prefs, ress->dstFile, ress->dstBuffer, outBuff.pos, storedSkips); 1678 frameSize += outBuff.pos; 1679 DISPLAYUPDATE(2, "\r%-20.20s : %u MB... ", 1680 srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) ); 1681 1682 if (inBuff.pos > 0) { 1683 memmove(ress->srcBuffer, (char*)ress->srcBuffer + inBuff.pos, inBuff.size - inBuff.pos); 1684 ress->srcBufferLoaded -= inBuff.pos; 1685 } 1686 1687 if (readSizeHint == 0) break; /* end of frame */ 1688 if (inBuff.size != inBuff.pos) { 1689 DISPLAYLEVEL(1, "%s : Decoding error (37) : should consume entire input \n", 1690 srcFileName); 1691 return FIO_ERROR_FRAME_DECODING; 1692 } 1693 1694 /* Fill input buffer */ 1695 { size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize); /* support large skippable frames */ 1696 if (ress->srcBufferLoaded < toDecode) { 1697 size_t const toRead = toDecode - ress->srcBufferLoaded; /* > 0 */ 1698 void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded; 1699 size_t const readSize = fread(startPosition, 1, toRead, finput); 1700 if (readSize==0) { 1701 DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n", 1702 srcFileName); 1703 return FIO_ERROR_FRAME_DECODING; 1704 } 1705 ress->srcBufferLoaded += readSize; 1706 } } } 1707 1708 FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips); 1709 1710 return frameSize; 1711 } 1712 1713 1714 #ifdef ZSTD_GZDECOMPRESS 1715 static unsigned long long FIO_decompressGzFrame(dRess_t* ress, 1716 FILE* srcFile, const char* srcFileName) 1717 { 1718 unsigned long long outFileSize = 0; 1719 z_stream strm; 1720 int flush = Z_NO_FLUSH; 1721 int decodingError = 0; 1722 1723 strm.zalloc = Z_NULL; 1724 strm.zfree = Z_NULL; 1725 strm.opaque = Z_NULL; 1726 strm.next_in = 0; 1727 strm.avail_in = 0; 1728 /* see http://www.zlib.net/manual.html */ 1729 if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK) 1730 return FIO_ERROR_FRAME_DECODING; 1731 1732 strm.next_out = (Bytef*)ress->dstBuffer; 1733 strm.avail_out = (uInt)ress->dstBufferSize; 1734 strm.avail_in = (uInt)ress->srcBufferLoaded; 1735 strm.next_in = (z_const unsigned char*)ress->srcBuffer; 1736 1737 for ( ; ; ) { 1738 int ret; 1739 if (strm.avail_in == 0) { 1740 ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); 1741 if (ress->srcBufferLoaded == 0) flush = Z_FINISH; 1742 strm.next_in = (z_const unsigned char*)ress->srcBuffer; 1743 strm.avail_in = (uInt)ress->srcBufferLoaded; 1744 } 1745 ret = inflate(&strm, flush); 1746 if (ret == Z_BUF_ERROR) { 1747 DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName); 1748 decodingError = 1; break; 1749 } 1750 if (ret != Z_OK && ret != Z_STREAM_END) { 1751 DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret); 1752 decodingError = 1; break; 1753 } 1754 { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; 1755 if (decompBytes) { 1756 if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) { 1757 DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno)); 1758 decodingError = 1; break; 1759 } 1760 outFileSize += decompBytes; 1761 strm.next_out = (Bytef*)ress->dstBuffer; 1762 strm.avail_out = (uInt)ress->dstBufferSize; 1763 } 1764 } 1765 if (ret == Z_STREAM_END) break; 1766 } 1767 1768 if (strm.avail_in > 0) 1769 memmove(ress->srcBuffer, strm.next_in, strm.avail_in); 1770 ress->srcBufferLoaded = strm.avail_in; 1771 if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */ 1772 && (decodingError==0) ) { 1773 DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName); 1774 decodingError = 1; 1775 } 1776 return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; 1777 } 1778 #endif 1779 1780 1781 #ifdef ZSTD_LZMADECOMPRESS 1782 static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName, int plain_lzma) 1783 { 1784 unsigned long long outFileSize = 0; 1785 lzma_stream strm = LZMA_STREAM_INIT; 1786 lzma_action action = LZMA_RUN; 1787 lzma_ret initRet; 1788 int decodingError = 0; 1789 1790 strm.next_in = 0; 1791 strm.avail_in = 0; 1792 if (plain_lzma) { 1793 initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */ 1794 } else { 1795 initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */ 1796 } 1797 1798 if (initRet != LZMA_OK) { 1799 DISPLAYLEVEL(1, "zstd: %s: %s error %d \n", 1800 plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder", 1801 srcFileName, initRet); 1802 return FIO_ERROR_FRAME_DECODING; 1803 } 1804 1805 strm.next_out = (BYTE*)ress->dstBuffer; 1806 strm.avail_out = ress->dstBufferSize; 1807 strm.next_in = (BYTE const*)ress->srcBuffer; 1808 strm.avail_in = ress->srcBufferLoaded; 1809 1810 for ( ; ; ) { 1811 lzma_ret ret; 1812 if (strm.avail_in == 0) { 1813 ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); 1814 if (ress->srcBufferLoaded == 0) action = LZMA_FINISH; 1815 strm.next_in = (BYTE const*)ress->srcBuffer; 1816 strm.avail_in = ress->srcBufferLoaded; 1817 } 1818 ret = lzma_code(&strm, action); 1819 1820 if (ret == LZMA_BUF_ERROR) { 1821 DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName); 1822 decodingError = 1; break; 1823 } 1824 if (ret != LZMA_OK && ret != LZMA_STREAM_END) { 1825 DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n", 1826 srcFileName, ret); 1827 decodingError = 1; break; 1828 } 1829 { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; 1830 if (decompBytes) { 1831 if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) { 1832 DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno)); 1833 decodingError = 1; break; 1834 } 1835 outFileSize += decompBytes; 1836 strm.next_out = (BYTE*)ress->dstBuffer; 1837 strm.avail_out = ress->dstBufferSize; 1838 } } 1839 if (ret == LZMA_STREAM_END) break; 1840 } 1841 1842 if (strm.avail_in > 0) 1843 memmove(ress->srcBuffer, strm.next_in, strm.avail_in); 1844 ress->srcBufferLoaded = strm.avail_in; 1845 lzma_end(&strm); 1846 return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; 1847 } 1848 #endif 1849 1850 #ifdef ZSTD_LZ4DECOMPRESS 1851 static unsigned long long FIO_decompressLz4Frame(dRess_t* ress, 1852 FILE* srcFile, const char* srcFileName) 1853 { 1854 unsigned long long filesize = 0; 1855 LZ4F_errorCode_t nextToLoad; 1856 LZ4F_decompressionContext_t dCtx; 1857 LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION); 1858 int decodingError = 0; 1859 1860 if (LZ4F_isError(errorCode)) { 1861 DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n"); 1862 return FIO_ERROR_FRAME_DECODING; 1863 } 1864 1865 /* Init feed with magic number (already consumed from FILE* sFile) */ 1866 { size_t inSize = 4; 1867 size_t outSize= 0; 1868 MEM_writeLE32(ress->srcBuffer, LZ4_MAGICNUMBER); 1869 nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &outSize, ress->srcBuffer, &inSize, NULL); 1870 if (LZ4F_isError(nextToLoad)) { 1871 DISPLAYLEVEL(1, "zstd: %s: lz4 header error : %s \n", 1872 srcFileName, LZ4F_getErrorName(nextToLoad)); 1873 LZ4F_freeDecompressionContext(dCtx); 1874 return FIO_ERROR_FRAME_DECODING; 1875 } } 1876 1877 /* Main Loop */ 1878 for (;nextToLoad;) { 1879 size_t readSize; 1880 size_t pos = 0; 1881 size_t decodedBytes = ress->dstBufferSize; 1882 1883 /* Read input */ 1884 if (nextToLoad > ress->srcBufferSize) nextToLoad = ress->srcBufferSize; 1885 readSize = fread(ress->srcBuffer, 1, nextToLoad, srcFile); 1886 if (!readSize) break; /* reached end of file or stream */ 1887 1888 while ((pos < readSize) || (decodedBytes == ress->dstBufferSize)) { /* still to read, or still to flush */ 1889 /* Decode Input (at least partially) */ 1890 size_t remaining = readSize - pos; 1891 decodedBytes = ress->dstBufferSize; 1892 nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL); 1893 if (LZ4F_isError(nextToLoad)) { 1894 DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n", 1895 srcFileName, LZ4F_getErrorName(nextToLoad)); 1896 decodingError = 1; nextToLoad = 0; break; 1897 } 1898 pos += remaining; 1899 1900 /* Write Block */ 1901 if (decodedBytes) { 1902 if (fwrite(ress->dstBuffer, 1, decodedBytes, ress->dstFile) != decodedBytes) { 1903 DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno)); 1904 decodingError = 1; nextToLoad = 0; break; 1905 } 1906 filesize += decodedBytes; 1907 DISPLAYUPDATE(2, "\rDecompressed : %u MB ", (unsigned)(filesize>>20)); 1908 } 1909 1910 if (!nextToLoad) break; 1911 } 1912 } 1913 /* can be out because readSize == 0, which could be an fread() error */ 1914 if (ferror(srcFile)) { 1915 DISPLAYLEVEL(1, "zstd: %s: read error \n", srcFileName); 1916 decodingError=1; 1917 } 1918 1919 if (nextToLoad!=0) { 1920 DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName); 1921 decodingError=1; 1922 } 1923 1924 LZ4F_freeDecompressionContext(dCtx); 1925 ress->srcBufferLoaded = 0; /* LZ4F will reach exact frame boundary */ 1926 1927 return decodingError ? FIO_ERROR_FRAME_DECODING : filesize; 1928 } 1929 #endif 1930 1931 1932 1933 /** FIO_decompressFrames() : 1934 * Find and decode frames inside srcFile 1935 * srcFile presumed opened and valid 1936 * @return : 0 : OK 1937 * 1 : error 1938 */ 1939 static int FIO_decompressFrames(FIO_prefs_t* const prefs, dRess_t ress, FILE* srcFile, 1940 const char* dstFileName, const char* srcFileName) 1941 { 1942 unsigned readSomething = 0; 1943 unsigned long long filesize = 0; 1944 assert(srcFile != NULL); 1945 1946 /* for each frame */ 1947 for ( ; ; ) { 1948 /* check magic number -> version */ 1949 size_t const toRead = 4; 1950 const BYTE* const buf = (const BYTE*)ress.srcBuffer; 1951 if (ress.srcBufferLoaded < toRead) /* load up to 4 bytes for header */ 1952 ress.srcBufferLoaded += fread((char*)ress.srcBuffer + ress.srcBufferLoaded, 1953 (size_t)1, toRead - ress.srcBufferLoaded, srcFile); 1954 if (ress.srcBufferLoaded==0) { 1955 if (readSomething==0) { /* srcFile is empty (which is invalid) */ 1956 DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName); 1957 return 1; 1958 } /* else, just reached frame boundary */ 1959 break; /* no more input */ 1960 } 1961 readSomething = 1; /* there is at least 1 byte in srcFile */ 1962 if (ress.srcBufferLoaded < toRead) { 1963 DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName); 1964 return 1; 1965 } 1966 if (ZSTD_isFrame(buf, ress.srcBufferLoaded)) { 1967 unsigned long long const frameSize = FIO_decompressZstdFrame(prefs, &ress, srcFile, srcFileName, filesize); 1968 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 1969 filesize += frameSize; 1970 } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */ 1971 #ifdef ZSTD_GZDECOMPRESS 1972 unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFile, srcFileName); 1973 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 1974 filesize += frameSize; 1975 #else 1976 DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName); 1977 return 1; 1978 #endif 1979 } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */ 1980 || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */ 1981 #ifdef ZSTD_LZMADECOMPRESS 1982 unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFile, srcFileName, buf[0] != 0xFD); 1983 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 1984 filesize += frameSize; 1985 #else 1986 DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName); 1987 return 1; 1988 #endif 1989 } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) { 1990 #ifdef ZSTD_LZ4DECOMPRESS 1991 unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFile, srcFileName); 1992 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 1993 filesize += frameSize; 1994 #else 1995 DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName); 1996 return 1; 1997 #endif 1998 } else if ((prefs->overwrite) && !strcmp (dstFileName, stdoutmark)) { /* pass-through mode */ 1999 return FIO_passThrough(prefs, 2000 ress.dstFile, srcFile, 2001 ress.srcBuffer, ress.srcBufferSize, 2002 ress.srcBufferLoaded); 2003 } else { 2004 DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName); 2005 return 1; 2006 } } /* for each frame */ 2007 2008 /* Final Status */ 2009 DISPLAYLEVEL(2, "\r%79s\r", ""); 2010 DISPLAYLEVEL(2, "%-20s: %llu bytes \n", srcFileName, filesize); 2011 2012 return 0; 2013 } 2014 2015 /** FIO_decompressDstFile() : 2016 open `dstFileName`, 2017 or path-through if ress.dstFile is already != 0, 2018 then start decompression process (FIO_decompressFrames()). 2019 @return : 0 : OK 2020 1 : operation aborted 2021 */ 2022 static int FIO_decompressDstFile(FIO_prefs_t* const prefs, 2023 dRess_t ress, FILE* srcFile, 2024 const char* dstFileName, const char* srcFileName) 2025 { 2026 int result; 2027 stat_t statbuf; 2028 int transfer_permissions = 0; 2029 int releaseDstFile = 0; 2030 2031 if (ress.dstFile == NULL) { 2032 releaseDstFile = 1; 2033 2034 ress.dstFile = FIO_openDstFile(prefs, srcFileName, dstFileName); 2035 if (ress.dstFile==0) return 1; 2036 2037 /* Must only be added after FIO_openDstFile() succeeds. 2038 * Otherwise we may delete the destination file if it already exists, 2039 * and the user presses Ctrl-C when asked if they wish to overwrite. 2040 */ 2041 addHandler(dstFileName); 2042 2043 if ( strcmp(srcFileName, stdinmark) /* special case : don't transfer permissions from stdin */ 2044 && UTIL_getFileStat(srcFileName, &statbuf) ) 2045 transfer_permissions = 1; 2046 } 2047 2048 2049 result = FIO_decompressFrames(prefs, ress, srcFile, dstFileName, srcFileName); 2050 2051 if (releaseDstFile) { 2052 FILE* const dstFile = ress.dstFile; 2053 clearHandler(); 2054 ress.dstFile = NULL; 2055 if (fclose(dstFile)) { 2056 DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); 2057 result = 1; 2058 } 2059 2060 if ( (result != 0) /* operation failure */ 2061 && strcmp(dstFileName, nulmark) /* special case : don't remove() /dev/null (#316) */ 2062 && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */ 2063 ) { 2064 FIO_remove(dstFileName); /* remove decompression artefact; note: don't do anything special if remove() fails */ 2065 } else { /* operation success */ 2066 if ( strcmp(dstFileName, stdoutmark) /* special case : don't chmod stdout */ 2067 && strcmp(dstFileName, nulmark) /* special case : don't chmod /dev/null */ 2068 && transfer_permissions ) /* file permissions correctly extracted from src */ 2069 UTIL_setFileStat(dstFileName, &statbuf); /* transfer file permissions from src into dst */ 2070 } 2071 } 2072 2073 return result; 2074 } 2075 2076 2077 /** FIO_decompressSrcFile() : 2078 Open `srcFileName`, transfer control to decompressDstFile() 2079 @return : 0 : OK 2080 1 : error 2081 */ 2082 static int FIO_decompressSrcFile(FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName) 2083 { 2084 FILE* srcFile; 2085 int result; 2086 2087 if (UTIL_isDirectory(srcFileName)) { 2088 DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); 2089 return 1; 2090 } 2091 2092 srcFile = FIO_openSrcFile(srcFileName); 2093 if (srcFile==NULL) return 1; 2094 ress.srcBufferLoaded = 0; 2095 2096 result = FIO_decompressDstFile(prefs, ress, srcFile, dstFileName, srcFileName); 2097 2098 /* Close file */ 2099 if (fclose(srcFile)) { 2100 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); /* error should not happen */ 2101 return 1; 2102 } 2103 if ( prefs->removeSrcFile /* --rm */ 2104 && (result==0) /* decompression successful */ 2105 && strcmp(srcFileName, stdinmark) ) /* not stdin */ { 2106 /* We must clear the handler, since after this point calling it would 2107 * delete both the source and destination files. 2108 */ 2109 clearHandler(); 2110 if (FIO_remove(srcFileName)) { 2111 /* failed to remove src file */ 2112 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); 2113 return 1; 2114 } } 2115 return result; 2116 } 2117 2118 2119 2120 int FIO_decompressFilename(FIO_prefs_t* const prefs, 2121 const char* dstFileName, const char* srcFileName, 2122 const char* dictFileName) 2123 { 2124 dRess_t const ress = FIO_createDResources(prefs, dictFileName); 2125 2126 int const decodingError = FIO_decompressSrcFile(prefs, ress, dstFileName, srcFileName); 2127 2128 FIO_freeDResources(ress); 2129 return decodingError; 2130 } 2131 2132 2133 /* FIO_determineDstName() : 2134 * create a destination filename from a srcFileName. 2135 * @return a pointer to it. 2136 * @return == NULL if there is an error */ 2137 static const char* 2138 FIO_determineDstName(const char* srcFileName) 2139 { 2140 static size_t dfnbCapacity = 0; 2141 static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ 2142 2143 size_t const sfnSize = strlen(srcFileName); 2144 size_t suffixSize; 2145 const char* const suffixPtr = strrchr(srcFileName, '.'); 2146 if (suffixPtr == NULL) { 2147 DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n", 2148 srcFileName); 2149 return NULL; 2150 } 2151 suffixSize = strlen(suffixPtr); 2152 2153 /* check suffix is authorized */ 2154 if (sfnSize <= suffixSize 2155 || ( strcmp(suffixPtr, ZSTD_EXTENSION) 2156 #ifdef ZSTD_GZDECOMPRESS 2157 && strcmp(suffixPtr, GZ_EXTENSION) 2158 #endif 2159 #ifdef ZSTD_LZMADECOMPRESS 2160 && strcmp(suffixPtr, XZ_EXTENSION) 2161 && strcmp(suffixPtr, LZMA_EXTENSION) 2162 #endif 2163 #ifdef ZSTD_LZ4DECOMPRESS 2164 && strcmp(suffixPtr, LZ4_EXTENSION) 2165 #endif 2166 ) ) { 2167 const char* suffixlist = ZSTD_EXTENSION 2168 #ifdef ZSTD_GZDECOMPRESS 2169 "/" GZ_EXTENSION 2170 #endif 2171 #ifdef ZSTD_LZMADECOMPRESS 2172 "/" XZ_EXTENSION "/" LZMA_EXTENSION 2173 #endif 2174 #ifdef ZSTD_LZ4DECOMPRESS 2175 "/" LZ4_EXTENSION 2176 #endif 2177 ; 2178 DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s expected) -- ignored \n", 2179 srcFileName, suffixlist); 2180 return NULL; 2181 } 2182 2183 /* allocate enough space to write dstFilename into it */ 2184 if (dfnbCapacity+suffixSize <= sfnSize+1) { 2185 free(dstFileNameBuffer); 2186 dfnbCapacity = sfnSize + 20; 2187 dstFileNameBuffer = (char*)malloc(dfnbCapacity); 2188 if (dstFileNameBuffer==NULL) 2189 EXM_THROW(74, "%s : not enough memory for dstFileName", strerror(errno)); 2190 } 2191 2192 /* return dst name == src name truncated from suffix */ 2193 assert(dstFileNameBuffer != NULL); 2194 memcpy(dstFileNameBuffer, srcFileName, sfnSize - suffixSize); 2195 dstFileNameBuffer[sfnSize-suffixSize] = '\0'; 2196 return dstFileNameBuffer; 2197 2198 /* note : dstFileNameBuffer memory is not going to be free */ 2199 } 2200 2201 2202 int 2203 FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs, 2204 const char* srcNamesTable[], unsigned nbFiles, 2205 const char* outFileName, 2206 const char* dictFileName) 2207 { 2208 int error = 0; 2209 dRess_t ress = FIO_createDResources(prefs, dictFileName); 2210 2211 if (outFileName) { 2212 unsigned u; 2213 ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName); 2214 if (ress.dstFile == 0) EXM_THROW(71, "cannot open %s", outFileName); 2215 for (u=0; u<nbFiles; u++) 2216 error |= FIO_decompressSrcFile(prefs, ress, outFileName, srcNamesTable[u]); 2217 if (fclose(ress.dstFile)) 2218 EXM_THROW(72, "Write error : %s : cannot properly close output file", 2219 strerror(errno)); 2220 } else { 2221 unsigned u; 2222 for (u=0; u<nbFiles; u++) { /* create dstFileName */ 2223 const char* const srcFileName = srcNamesTable[u]; 2224 const char* const dstFileName = FIO_determineDstName(srcFileName); 2225 if (dstFileName == NULL) { error=1; continue; } 2226 2227 error |= FIO_decompressSrcFile(prefs, ress, dstFileName, srcFileName); 2228 } 2229 } 2230 2231 FIO_freeDResources(ress); 2232 return error; 2233 } 2234 2235 2236 2237 /* ************************************************************************** 2238 * .zst file info (--list command) 2239 ***************************************************************************/ 2240 2241 typedef struct { 2242 U64 decompressedSize; 2243 U64 compressedSize; 2244 U64 windowSize; 2245 int numActualFrames; 2246 int numSkippableFrames; 2247 int decompUnavailable; 2248 int usesCheck; 2249 U32 nbFiles; 2250 } fileInfo_t; 2251 2252 typedef enum { 2253 info_success=0, 2254 info_frame_error=1, 2255 info_not_zstd=2, 2256 info_file_error=3, 2257 info_truncated_input=4, 2258 } InfoError; 2259 2260 #define ERROR_IF(c,n,...) { \ 2261 if (c) { \ 2262 DISPLAYLEVEL(1, __VA_ARGS__); \ 2263 DISPLAYLEVEL(1, " \n"); \ 2264 return n; \ 2265 } \ 2266 } 2267 2268 static InfoError 2269 FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile) 2270 { 2271 /* begin analyzing frame */ 2272 for ( ; ; ) { 2273 BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; 2274 size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile); 2275 if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN) { 2276 if ( feof(srcFile) 2277 && (numBytesRead == 0) 2278 && (info->compressedSize > 0) 2279 && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) { 2280 unsigned long long file_position = (unsigned long long) LONG_TELL(srcFile); 2281 unsigned long long file_size = (unsigned long long) info->compressedSize; 2282 ERROR_IF(file_position != file_size, info_truncated_input, 2283 "Error: seeked to position %llu, which is beyond file size of %llu\n", 2284 file_position, 2285 file_size); 2286 break; /* correct end of file => success */ 2287 } 2288 ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame"); 2289 ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames"); 2290 } 2291 { U32 const magicNumber = MEM_readLE32(headerBuffer); 2292 /* Zstandard frame */ 2293 if (magicNumber == ZSTD_MAGICNUMBER) { 2294 ZSTD_frameHeader header; 2295 U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead); 2296 if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR 2297 || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) { 2298 info->decompUnavailable = 1; 2299 } else { 2300 info->decompressedSize += frameContentSize; 2301 } 2302 ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0, 2303 info_frame_error, "Error: could not decode frame header"); 2304 info->windowSize = header.windowSize; 2305 /* move to the end of the frame header */ 2306 { size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead); 2307 ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size"); 2308 ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0, 2309 info_frame_error, "Error: could not move to end of frame header"); 2310 } 2311 2312 /* skip all blocks in the frame */ 2313 { int lastBlock = 0; 2314 do { 2315 BYTE blockHeaderBuffer[3]; 2316 ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3, 2317 info_frame_error, "Error while reading block header"); 2318 { U32 const blockHeader = MEM_readLE24(blockHeaderBuffer); 2319 U32 const blockTypeID = (blockHeader >> 1) & 3; 2320 U32 const isRLE = (blockTypeID == 1); 2321 U32 const isWrongBlock = (blockTypeID == 3); 2322 long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3); 2323 ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type"); 2324 lastBlock = blockHeader & 1; 2325 ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0, 2326 info_frame_error, "Error: could not skip to end of block"); 2327 } 2328 } while (lastBlock != 1); 2329 } 2330 2331 /* check if checksum is used */ 2332 { BYTE const frameHeaderDescriptor = headerBuffer[4]; 2333 int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2; 2334 if (contentChecksumFlag) { 2335 info->usesCheck = 1; 2336 ERROR_IF(fseek(srcFile, 4, SEEK_CUR) != 0, 2337 info_frame_error, "Error: could not skip past checksum"); 2338 } } 2339 info->numActualFrames++; 2340 } 2341 /* Skippable frame */ 2342 else if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { 2343 U32 const frameSize = MEM_readLE32(headerBuffer + 4); 2344 long const seek = (long)(8 + frameSize - numBytesRead); 2345 ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0, 2346 info_frame_error, "Error: could not find end of skippable frame"); 2347 info->numSkippableFrames++; 2348 } 2349 /* unknown content */ 2350 else { 2351 return info_not_zstd; 2352 } 2353 } /* magic number analysis */ 2354 } /* end analyzing frames */ 2355 return info_success; 2356 } 2357 2358 2359 static InfoError 2360 getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName) 2361 { 2362 InfoError status; 2363 FILE* const srcFile = FIO_openSrcFile(inFileName); 2364 ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName); 2365 2366 info->compressedSize = UTIL_getFileSize(inFileName); 2367 status = FIO_analyzeFrames(info, srcFile); 2368 2369 fclose(srcFile); 2370 info->nbFiles = 1; 2371 return status; 2372 } 2373 2374 2375 /** getFileInfo() : 2376 * Reads information from file, stores in *info 2377 * @return : InfoError status 2378 */ 2379 static InfoError 2380 getFileInfo(fileInfo_t* info, const char* srcFileName) 2381 { 2382 ERROR_IF(!UTIL_isRegularFile(srcFileName), 2383 info_file_error, "Error : %s is not a file", srcFileName); 2384 return getFileInfo_fileConfirmed(info, srcFileName); 2385 } 2386 2387 2388 static void 2389 displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel) 2390 { 2391 unsigned const unit = info->compressedSize < (1 MB) ? (1 KB) : (1 MB); 2392 const char* const unitStr = info->compressedSize < (1 MB) ? "KB" : "MB"; 2393 double const windowSizeUnit = (double)info->windowSize / unit; 2394 double const compressedSizeUnit = (double)info->compressedSize / unit; 2395 double const decompressedSizeUnit = (double)info->decompressedSize / unit; 2396 double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/info->compressedSize; 2397 const char* const checkString = (info->usesCheck ? "XXH64" : "None"); 2398 if (displayLevel <= 2) { 2399 if (!info->decompUnavailable) { 2400 DISPLAYOUT("%6d %5d %7.2f %2s %9.2f %2s %5.3f %5s %s\n", 2401 info->numSkippableFrames + info->numActualFrames, 2402 info->numSkippableFrames, 2403 compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr, 2404 ratio, checkString, inFileName); 2405 } else { 2406 DISPLAYOUT("%6d %5d %7.2f %2s %5s %s\n", 2407 info->numSkippableFrames + info->numActualFrames, 2408 info->numSkippableFrames, 2409 compressedSizeUnit, unitStr, 2410 checkString, inFileName); 2411 } 2412 } else { 2413 DISPLAYOUT("%s \n", inFileName); 2414 DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames); 2415 if (info->numSkippableFrames) 2416 DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames); 2417 DISPLAYOUT("Window Size: %.2f %2s (%llu B)\n", 2418 windowSizeUnit, unitStr, 2419 (unsigned long long)info->windowSize); 2420 DISPLAYOUT("Compressed Size: %.2f %2s (%llu B)\n", 2421 compressedSizeUnit, unitStr, 2422 (unsigned long long)info->compressedSize); 2423 if (!info->decompUnavailable) { 2424 DISPLAYOUT("Decompressed Size: %.2f %2s (%llu B)\n", 2425 decompressedSizeUnit, unitStr, 2426 (unsigned long long)info->decompressedSize); 2427 DISPLAYOUT("Ratio: %.4f\n", ratio); 2428 } 2429 DISPLAYOUT("Check: %s\n", checkString); 2430 DISPLAYOUT("\n"); 2431 } 2432 } 2433 2434 static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2) 2435 { 2436 fileInfo_t total; 2437 memset(&total, 0, sizeof(total)); 2438 total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames; 2439 total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames; 2440 total.compressedSize = fi1.compressedSize + fi2.compressedSize; 2441 total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize; 2442 total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable; 2443 total.usesCheck = fi1.usesCheck & fi2.usesCheck; 2444 total.nbFiles = fi1.nbFiles + fi2.nbFiles; 2445 return total; 2446 } 2447 2448 static int 2449 FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel) 2450 { 2451 fileInfo_t info; 2452 memset(&info, 0, sizeof(info)); 2453 { InfoError const error = getFileInfo(&info, inFileName); 2454 switch (error) { 2455 case info_frame_error: 2456 /* display error, but provide output */ 2457 DISPLAYLEVEL(1, "Error while parsing \"%s\" \n", inFileName); 2458 break; 2459 case info_not_zstd: 2460 DISPLAYOUT("File \"%s\" not compressed by zstd \n", inFileName); 2461 if (displayLevel > 2) DISPLAYOUT("\n"); 2462 return 1; 2463 case info_file_error: 2464 /* error occurred while opening the file */ 2465 if (displayLevel > 2) DISPLAYOUT("\n"); 2466 return 1; 2467 case info_truncated_input: 2468 DISPLAYOUT("File \"%s\" is truncated \n", inFileName); 2469 if (displayLevel > 2) DISPLAYOUT("\n"); 2470 return 1; 2471 case info_success: 2472 default: 2473 break; 2474 } 2475 2476 displayInfo(inFileName, &info, displayLevel); 2477 *total = FIO_addFInfo(*total, info); 2478 assert(error == info_success || error == info_frame_error); 2479 return error; 2480 } 2481 } 2482 2483 int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel) 2484 { 2485 /* ensure no specified input is stdin (needs fseek() capability) */ 2486 { unsigned u; 2487 for (u=0; u<numFiles;u++) { 2488 ERROR_IF(!strcmp (filenameTable[u], stdinmark), 2489 1, "zstd: --list does not support reading from standard input"); 2490 } } 2491 2492 if (numFiles == 0) { 2493 if (!IS_CONSOLE(stdin)) { 2494 DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n"); 2495 } 2496 DISPLAYLEVEL(1, "No files given \n"); 2497 return 1; 2498 } 2499 2500 if (displayLevel <= 2) { 2501 DISPLAYOUT("Frames Skips Compressed Uncompressed Ratio Check Filename\n"); 2502 } 2503 { int error = 0; 2504 fileInfo_t total; 2505 memset(&total, 0, sizeof(total)); 2506 total.usesCheck = 1; 2507 /* --list each file, and check for any error */ 2508 { unsigned u; 2509 for (u=0; u<numFiles;u++) { 2510 error |= FIO_listFile(&total, filenameTable[u], displayLevel); 2511 } } 2512 if (numFiles > 1 && displayLevel <= 2) { /* display total */ 2513 unsigned const unit = total.compressedSize < (1 MB) ? (1 KB) : (1 MB); 2514 const char* const unitStr = total.compressedSize < (1 MB) ? "KB" : "MB"; 2515 double const compressedSizeUnit = (double)total.compressedSize / unit; 2516 double const decompressedSizeUnit = (double)total.decompressedSize / unit; 2517 double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/total.compressedSize; 2518 const char* const checkString = (total.usesCheck ? "XXH64" : ""); 2519 DISPLAYOUT("----------------------------------------------------------------- \n"); 2520 if (total.decompUnavailable) { 2521 DISPLAYOUT("%6d %5d %7.2f %2s %5s %u files\n", 2522 total.numSkippableFrames + total.numActualFrames, 2523 total.numSkippableFrames, 2524 compressedSizeUnit, unitStr, 2525 checkString, (unsigned)total.nbFiles); 2526 } else { 2527 DISPLAYOUT("%6d %5d %7.2f %2s %9.2f %2s %5.3f %5s %u files\n", 2528 total.numSkippableFrames + total.numActualFrames, 2529 total.numSkippableFrames, 2530 compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr, 2531 ratio, checkString, (unsigned)total.nbFiles); 2532 } } 2533 return error; 2534 } 2535 } 2536 2537 2538 #endif /* #ifndef ZSTD_NODECOMPRESS */ 2539