1 /* 2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 12 /* ************************************* 13 * Compiler Options 14 ***************************************/ 15 #ifdef _MSC_VER /* Visual */ 16 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 17 # pragma warning(disable : 4204) /* non-constant aggregate initializer */ 18 #endif 19 #if defined(__MINGW32__) && !defined(_POSIX_SOURCE) 20 # define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */ 21 #endif 22 23 24 /*-************************************* 25 * Includes 26 ***************************************/ 27 #include "platform.h" /* Large Files support, SET_BINARY_MODE */ 28 #include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile */ 29 #include <stdio.h> /* fprintf, fopen, fread, _fileno, stdin, stdout */ 30 #include <stdlib.h> /* malloc, free */ 31 #include <string.h> /* strcmp, strlen */ 32 #include <errno.h> /* errno */ 33 34 #if defined (_MSC_VER) 35 # include <sys/stat.h> 36 # include <io.h> 37 #endif 38 39 #include "mem.h" 40 #include "fileio.h" 41 #include "util.h" 42 43 #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */ 44 #include "zstd.h" 45 #include "zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */ 46 47 #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS) 48 # include <zlib.h> 49 # if !defined(z_const) 50 # define z_const 51 # endif 52 #endif 53 54 #if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS) 55 # include <lzma.h> 56 #endif 57 58 #define LZ4_MAGICNUMBER 0x184D2204 59 #if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS) 60 # define LZ4F_ENABLE_OBSOLETE_ENUMS 61 # include <lz4frame.h> 62 # include <lz4.h> 63 #endif 64 65 66 /*-************************************* 67 * Constants 68 ***************************************/ 69 #define KB *(1<<10) 70 #define MB *(1<<20) 71 #define GB *(1U<<30) 72 73 #define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */ 74 75 #define FNSPACE 30 76 77 78 /*-************************************* 79 * Macros 80 ***************************************/ 81 #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) 82 #define DISPLAYOUT(...) fprintf(stdout, __VA_ARGS__) 83 #define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } } 84 static int g_displayLevel = 2; /* 0 : no display; 1: errors; 2: + result + interaction + warnings; 3: + progression; 4: + information */ 85 void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; } 86 87 static const U64 g_refreshRate = SEC_TO_MICRO / 6; 88 static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; 89 90 #define READY_FOR_UPDATE() (UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) 91 #define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); } 92 #define DISPLAYUPDATE(l, ...) { \ 93 if (g_displayLevel>=l) { \ 94 if (READY_FOR_UPDATE() || (g_displayLevel>=4)) { \ 95 DELAY_NEXT_UPDATE(); \ 96 DISPLAY(__VA_ARGS__); \ 97 if (g_displayLevel>=4) fflush(stderr); \ 98 } } } 99 100 #undef MIN /* in case it would be already defined */ 101 #define MIN(a,b) ((a) < (b) ? (a) : (b)) 102 103 104 /*-************************************* 105 * Debug 106 ***************************************/ 107 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1) 108 # include <assert.h> 109 #else 110 # ifndef assert 111 # define assert(condition) ((void)0) 112 # endif 113 #endif 114 115 #ifndef ZSTD_DEBUG 116 # define ZSTD_DEBUG 0 117 #endif 118 #define DEBUGLOG(l,...) if (l<=ZSTD_DEBUG) DISPLAY(__VA_ARGS__); 119 #define EXM_THROW(error, ...) \ 120 { \ 121 DISPLAYLEVEL(1, "zstd: "); \ 122 DEBUGLOG(1, "Error defined at %s, line %i : \n", __FILE__, __LINE__); \ 123 DISPLAYLEVEL(1, "error %i : ", error); \ 124 DISPLAYLEVEL(1, __VA_ARGS__); \ 125 DISPLAYLEVEL(1, " \n"); \ 126 exit(error); \ 127 } 128 129 #define CHECK_V(v, f) \ 130 v = f; \ 131 if (ZSTD_isError(v)) { \ 132 DEBUGLOG(1, "%s \n", #f); \ 133 EXM_THROW(11, "%s", ZSTD_getErrorName(v)); \ 134 } 135 #define CHECK(f) { size_t err; CHECK_V(err, f); } 136 137 138 /*-************************************ 139 * Signal (Ctrl-C trapping) 140 **************************************/ 141 #include <signal.h> 142 143 static const char* g_artefact = NULL; 144 static void INThandler(int sig) 145 { 146 assert(sig==SIGINT); (void)sig; 147 #if !defined(_MSC_VER) 148 signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */ 149 #endif 150 if (g_artefact) { 151 assert(UTIL_isRegularFile(g_artefact)); 152 remove(g_artefact); 153 } 154 DISPLAY("\n"); 155 exit(2); 156 } 157 static void addHandler(char const* dstFileName) 158 { 159 if (UTIL_isRegularFile(dstFileName)) { 160 g_artefact = dstFileName; 161 signal(SIGINT, INThandler); 162 } else { 163 g_artefact = NULL; 164 } 165 } 166 /* Idempotent */ 167 static void clearHandler(void) 168 { 169 if (g_artefact) signal(SIGINT, SIG_DFL); 170 g_artefact = NULL; 171 } 172 173 174 /* ************************************************************ 175 * Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW 176 ***************************************************************/ 177 #if defined(_MSC_VER) && _MSC_VER >= 1400 178 # define LONG_SEEK _fseeki64 179 #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ 180 # define LONG_SEEK fseeko 181 #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__) 182 # define LONG_SEEK fseeko64 183 #elif defined(_WIN32) && !defined(__DJGPP__) 184 # include <windows.h> 185 static int LONG_SEEK(FILE* file, __int64 offset, int origin) { 186 LARGE_INTEGER off; 187 DWORD method; 188 off.QuadPart = offset; 189 if (origin == SEEK_END) 190 method = FILE_END; 191 else if (origin == SEEK_CUR) 192 method = FILE_CURRENT; 193 else 194 method = FILE_BEGIN; 195 196 if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method)) 197 return 0; 198 else 199 return -1; 200 } 201 #else 202 # define LONG_SEEK fseek 203 #endif 204 205 206 /*-************************************* 207 * Local Parameters - Not thread safe 208 ***************************************/ 209 static FIO_compressionType_t g_compressionType = FIO_zstdCompression; 210 void FIO_setCompressionType(FIO_compressionType_t compressionType) { g_compressionType = compressionType; } 211 static U32 g_overwrite = 0; 212 void FIO_overwriteMode(void) { g_overwrite=1; } 213 static U32 g_sparseFileSupport = 1; /* 0: no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */ 214 void FIO_setSparseWrite(unsigned sparse) { g_sparseFileSupport=sparse; } 215 static U32 g_dictIDFlag = 1; 216 void FIO_setDictIDFlag(unsigned dictIDFlag) { g_dictIDFlag = dictIDFlag; } 217 static U32 g_checksumFlag = 1; 218 void FIO_setChecksumFlag(unsigned checksumFlag) { g_checksumFlag = checksumFlag; } 219 static U32 g_removeSrcFile = 0; 220 void FIO_setRemoveSrcFile(unsigned flag) { g_removeSrcFile = (flag>0); } 221 static U32 g_memLimit = 0; 222 void FIO_setMemLimit(unsigned memLimit) { g_memLimit = memLimit; } 223 static U32 g_nbWorkers = 1; 224 void FIO_setNbWorkers(unsigned nbWorkers) { 225 #ifndef ZSTD_MULTITHREAD 226 if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n"); 227 #endif 228 g_nbWorkers = nbWorkers; 229 } 230 static U32 g_blockSize = 0; 231 void FIO_setBlockSize(unsigned blockSize) { 232 if (blockSize && g_nbWorkers==0) 233 DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n"); 234 g_blockSize = blockSize; 235 } 236 #define FIO_OVERLAP_LOG_NOTSET 9999 237 static U32 g_overlapLog = FIO_OVERLAP_LOG_NOTSET; 238 void FIO_setOverlapLog(unsigned overlapLog){ 239 if (overlapLog && g_nbWorkers==0) 240 DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n"); 241 g_overlapLog = overlapLog; 242 } 243 static U32 g_ldmFlag = 0; 244 void FIO_setLdmFlag(unsigned ldmFlag) { 245 g_ldmFlag = (ldmFlag>0); 246 } 247 static U32 g_ldmHashLog = 0; 248 void FIO_setLdmHashLog(unsigned ldmHashLog) { 249 g_ldmHashLog = ldmHashLog; 250 } 251 static U32 g_ldmMinMatch = 0; 252 void FIO_setLdmMinMatch(unsigned ldmMinMatch) { 253 g_ldmMinMatch = ldmMinMatch; 254 } 255 256 #define FIO_LDM_PARAM_NOTSET 9999 257 static U32 g_ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; 258 void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog) { 259 g_ldmBucketSizeLog = ldmBucketSizeLog; 260 } 261 262 static U32 g_ldmHashEveryLog = FIO_LDM_PARAM_NOTSET; 263 void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog) { 264 g_ldmHashEveryLog = ldmHashEveryLog; 265 } 266 267 268 269 /*-************************************* 270 * Functions 271 ***************************************/ 272 /** FIO_remove() : 273 * @result : Unlink `fileName`, even if it's read-only */ 274 static int FIO_remove(const char* path) 275 { 276 if (!UTIL_isRegularFile(path)) { 277 DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path); 278 return 0; 279 } 280 #if defined(_WIN32) || defined(WIN32) 281 /* windows doesn't allow remove read-only files, 282 * so try to make it writable first */ 283 chmod(path, _S_IWRITE); 284 #endif 285 return remove(path); 286 } 287 288 /** FIO_openSrcFile() : 289 * condition : `srcFileName` must be non-NULL. 290 * @result : FILE* to `srcFileName`, or NULL if it fails */ 291 static FILE* FIO_openSrcFile(const char* srcFileName) 292 { 293 assert(srcFileName != NULL); 294 if (!strcmp (srcFileName, stdinmark)) { 295 DISPLAYLEVEL(4,"Using stdin for input\n"); 296 SET_BINARY_MODE(stdin); 297 return stdin; 298 } 299 300 if (!UTIL_isRegularFile(srcFileName)) { 301 DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n", 302 srcFileName); 303 return NULL; 304 } 305 306 { FILE* const f = fopen(srcFileName, "rb"); 307 if (f == NULL) 308 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); 309 return f; 310 } 311 } 312 313 /** FIO_openDstFile() : 314 * condition : `dstFileName` must be non-NULL. 315 * @result : FILE* to `dstFileName`, or NULL if it fails */ 316 static FILE* FIO_openDstFile(const char* dstFileName) 317 { 318 assert(dstFileName != NULL); 319 if (!strcmp (dstFileName, stdoutmark)) { 320 DISPLAYLEVEL(4,"Using stdout for output\n"); 321 SET_BINARY_MODE(stdout); 322 if (g_sparseFileSupport==1) { 323 g_sparseFileSupport = 0; 324 DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n"); 325 } 326 return stdout; 327 } 328 329 if (g_sparseFileSupport == 1) { 330 g_sparseFileSupport = ZSTD_SPARSE_DEFAULT; 331 } 332 333 if (UTIL_isRegularFile(dstFileName)) { 334 FILE* fCheck; 335 if (!strcmp(dstFileName, nulmark)) { 336 EXM_THROW(40, "%s is unexpectedly a regular file", dstFileName); 337 } 338 /* Check if destination file already exists */ 339 fCheck = fopen( dstFileName, "rb" ); 340 if (fCheck != NULL) { /* dst file exists, authorization prompt */ 341 fclose(fCheck); 342 if (!g_overwrite) { 343 if (g_displayLevel <= 1) { 344 /* No interaction possible */ 345 DISPLAY("zstd: %s already exists; not overwritten \n", 346 dstFileName); 347 return NULL; 348 } 349 DISPLAY("zstd: %s already exists; overwrite (y/N) ? ", 350 dstFileName); 351 { int ch = getchar(); 352 if ((ch!='Y') && (ch!='y')) { 353 DISPLAY(" not overwritten \n"); 354 return NULL; 355 } 356 /* flush rest of input line */ 357 while ((ch!=EOF) && (ch!='\n')) ch = getchar(); 358 } } 359 /* need to unlink */ 360 FIO_remove(dstFileName); 361 } } 362 363 { FILE* const f = fopen( dstFileName, "wb" ); 364 if (f == NULL) 365 DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno)); 366 return f; 367 } 368 } 369 370 371 /*! FIO_createDictBuffer() : 372 * creates a buffer, pointed by `*bufferPtr`, 373 * loads `filename` content into it, up to DICTSIZE_MAX bytes. 374 * @return : loaded size 375 * if fileName==NULL, returns 0 and a NULL pointer 376 */ 377 static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName) 378 { 379 FILE* fileHandle; 380 U64 fileSize; 381 382 assert(bufferPtr != NULL); 383 *bufferPtr = NULL; 384 if (fileName == NULL) return 0; 385 386 DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName); 387 fileHandle = fopen(fileName, "rb"); 388 if (fileHandle==NULL) EXM_THROW(31, "%s: %s", fileName, strerror(errno)); 389 fileSize = UTIL_getFileSize(fileName); 390 if (fileSize > DICTSIZE_MAX) { 391 EXM_THROW(32, "Dictionary file %s is too large (> %u MB)", 392 fileName, DICTSIZE_MAX >> 20); /* avoid extreme cases */ 393 } 394 *bufferPtr = malloc((size_t)fileSize); 395 if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno)); 396 { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle); 397 if (readSize!=fileSize) 398 EXM_THROW(35, "Error reading dictionary file %s", fileName); 399 } 400 fclose(fileHandle); 401 return (size_t)fileSize; 402 } 403 404 #ifndef ZSTD_NOCOMPRESS 405 406 /* ********************************************************************** 407 * Compression 408 ************************************************************************/ 409 typedef struct { 410 FILE* srcFile; 411 FILE* dstFile; 412 void* srcBuffer; 413 size_t srcBufferSize; 414 void* dstBuffer; 415 size_t dstBufferSize; 416 ZSTD_CStream* cctx; 417 } cRess_t; 418 419 static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, 420 U64 srcSize, 421 ZSTD_compressionParameters* comprParams) { 422 cRess_t ress; 423 memset(&ress, 0, sizeof(ress)); 424 425 DISPLAYLEVEL(6, "FIO_createCResources \n"); 426 ress.cctx = ZSTD_createCCtx(); 427 if (ress.cctx == NULL) 428 EXM_THROW(30, "allocation error : can't create ZSTD_CCtx"); 429 ress.srcBufferSize = ZSTD_CStreamInSize(); 430 ress.srcBuffer = malloc(ress.srcBufferSize); 431 ress.dstBufferSize = ZSTD_CStreamOutSize(); 432 ress.dstBuffer = malloc(ress.dstBufferSize); 433 if (!ress.srcBuffer || !ress.dstBuffer) 434 EXM_THROW(31, "allocation error : not enough memory"); 435 436 /* Advanced parameters, including dictionary */ 437 { void* dictBuffer; 438 size_t const dictBuffSize = FIO_createDictBuffer(&dictBuffer, dictFileName); /* works with dictFileName==NULL */ 439 if (dictFileName && (dictBuffer==NULL)) 440 EXM_THROW(32, "allocation error : can't create dictBuffer"); 441 442 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_contentSizeFlag, 1) ); /* always enable content size when available (note: supposed to be default) */ 443 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_dictIDFlag, g_dictIDFlag) ); 444 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_checksumFlag, g_checksumFlag) ); 445 /* compression level */ 446 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, (unsigned)cLevel) ); 447 /* long distance matching */ 448 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag) ); 449 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashLog, g_ldmHashLog) ); 450 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch) ); 451 if (g_ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) { 452 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmBucketSizeLog, g_ldmBucketSizeLog) ); 453 } 454 if (g_ldmHashEveryLog != FIO_LDM_PARAM_NOTSET) { 455 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog) ); 456 } 457 /* compression parameters */ 458 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_windowLog, comprParams->windowLog) ); 459 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_chainLog, comprParams->chainLog) ); 460 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_hashLog, comprParams->hashLog) ); 461 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_searchLog, comprParams->searchLog) ); 462 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_minMatch, comprParams->searchLength) ); 463 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_targetLength, comprParams->targetLength) ); 464 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionStrategy, (U32)comprParams->strategy) ); 465 /* multi-threading */ 466 #ifdef ZSTD_MULTITHREAD 467 DISPLAYLEVEL(5,"set nb workers = %u \n", g_nbWorkers); 468 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_nbWorkers, g_nbWorkers) ); 469 #endif 470 /* dictionary */ 471 CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); /* set the value temporarily for dictionary loading, to adapt compression parameters */ 472 CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) ); 473 CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) ); /* reset */ 474 475 free(dictBuffer); 476 } 477 478 return ress; 479 } 480 481 static void FIO_freeCResources(cRess_t ress) 482 { 483 free(ress.srcBuffer); 484 free(ress.dstBuffer); 485 ZSTD_freeCStream(ress.cctx); /* never fails */ 486 } 487 488 489 #ifdef ZSTD_GZCOMPRESS 490 static unsigned long long FIO_compressGzFrame(cRess_t* ress, 491 const char* srcFileName, U64 const srcFileSize, 492 int compressionLevel, U64* readsize) 493 { 494 unsigned long long inFileSize = 0, outFileSize = 0; 495 z_stream strm; 496 int ret; 497 498 if (compressionLevel > Z_BEST_COMPRESSION) 499 compressionLevel = Z_BEST_COMPRESSION; 500 501 strm.zalloc = Z_NULL; 502 strm.zfree = Z_NULL; 503 strm.opaque = Z_NULL; 504 505 ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED, 506 15 /* maxWindowLogSize */ + 16 /* gzip only */, 507 8, Z_DEFAULT_STRATEGY); /* see http://www.zlib.net/manual.html */ 508 if (ret != Z_OK) 509 EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret); 510 511 strm.next_in = 0; 512 strm.avail_in = 0; 513 strm.next_out = (Bytef*)ress->dstBuffer; 514 strm.avail_out = (uInt)ress->dstBufferSize; 515 516 while (1) { 517 if (strm.avail_in == 0) { 518 size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile); 519 if (inSize == 0) break; 520 inFileSize += inSize; 521 strm.next_in = (z_const unsigned char*)ress->srcBuffer; 522 strm.avail_in = (uInt)inSize; 523 } 524 ret = deflate(&strm, Z_NO_FLUSH); 525 if (ret != Z_OK) 526 EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret); 527 { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; 528 if (decompBytes) { 529 if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) 530 EXM_THROW(73, "Write error : cannot write to output file"); 531 outFileSize += decompBytes; 532 strm.next_out = (Bytef*)ress->dstBuffer; 533 strm.avail_out = (uInt)ress->dstBufferSize; 534 } 535 } 536 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) 537 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", 538 (U32)(inFileSize>>20), 539 (double)outFileSize/inFileSize*100) 540 else 541 DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", 542 (U32)(inFileSize>>20), (U32)(srcFileSize>>20), 543 (double)outFileSize/inFileSize*100); 544 } 545 546 while (1) { 547 ret = deflate(&strm, Z_FINISH); 548 { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; 549 if (decompBytes) { 550 if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) 551 EXM_THROW(75, "Write error : cannot write to output file"); 552 outFileSize += decompBytes; 553 strm.next_out = (Bytef*)ress->dstBuffer; 554 strm.avail_out = (uInt)ress->dstBufferSize; 555 } } 556 if (ret == Z_STREAM_END) break; 557 if (ret != Z_BUF_ERROR) 558 EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret); 559 } 560 561 ret = deflateEnd(&strm); 562 if (ret != Z_OK) 563 EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret); 564 *readsize = inFileSize; 565 566 return outFileSize; 567 } 568 #endif 569 570 571 #ifdef ZSTD_LZMACOMPRESS 572 static unsigned long long FIO_compressLzmaFrame(cRess_t* ress, 573 const char* srcFileName, U64 const srcFileSize, 574 int compressionLevel, U64* readsize, int plain_lzma) 575 { 576 unsigned long long inFileSize = 0, outFileSize = 0; 577 lzma_stream strm = LZMA_STREAM_INIT; 578 lzma_action action = LZMA_RUN; 579 lzma_ret ret; 580 581 if (compressionLevel < 0) compressionLevel = 0; 582 if (compressionLevel > 9) compressionLevel = 9; 583 584 if (plain_lzma) { 585 lzma_options_lzma opt_lzma; 586 if (lzma_lzma_preset(&opt_lzma, compressionLevel)) 587 EXM_THROW(71, "zstd: %s: lzma_lzma_preset error", srcFileName); 588 ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */ 589 if (ret != LZMA_OK) 590 EXM_THROW(71, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret); 591 } else { 592 ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */ 593 if (ret != LZMA_OK) 594 EXM_THROW(71, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret); 595 } 596 597 strm.next_in = 0; 598 strm.avail_in = 0; 599 strm.next_out = (BYTE*)ress->dstBuffer; 600 strm.avail_out = ress->dstBufferSize; 601 602 while (1) { 603 if (strm.avail_in == 0) { 604 size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile); 605 if (inSize == 0) action = LZMA_FINISH; 606 inFileSize += inSize; 607 strm.next_in = (BYTE const*)ress->srcBuffer; 608 strm.avail_in = inSize; 609 } 610 611 ret = lzma_code(&strm, action); 612 613 if (ret != LZMA_OK && ret != LZMA_STREAM_END) 614 EXM_THROW(72, "zstd: %s: lzma_code encoding error %d", srcFileName, ret); 615 { size_t const compBytes = ress->dstBufferSize - strm.avail_out; 616 if (compBytes) { 617 if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes) 618 EXM_THROW(73, "Write error : cannot write to output file"); 619 outFileSize += compBytes; 620 strm.next_out = (BYTE*)ress->dstBuffer; 621 strm.avail_out = ress->dstBufferSize; 622 } } 623 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) 624 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", 625 (U32)(inFileSize>>20), 626 (double)outFileSize/inFileSize*100) 627 else 628 DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", 629 (U32)(inFileSize>>20), (U32)(srcFileSize>>20), 630 (double)outFileSize/inFileSize*100); 631 if (ret == LZMA_STREAM_END) break; 632 } 633 634 lzma_end(&strm); 635 *readsize = inFileSize; 636 637 return outFileSize; 638 } 639 #endif 640 641 #ifdef ZSTD_LZ4COMPRESS 642 #if LZ4_VERSION_NUMBER <= 10600 643 #define LZ4F_blockLinked blockLinked 644 #define LZ4F_max64KB max64KB 645 #endif 646 static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); } 647 static unsigned long long FIO_compressLz4Frame(cRess_t* ress, 648 const char* srcFileName, U64 const srcFileSize, 649 int compressionLevel, U64* readsize) 650 { 651 const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB); 652 unsigned long long inFileSize = 0, outFileSize = 0; 653 654 LZ4F_preferences_t prefs; 655 LZ4F_compressionContext_t ctx; 656 657 LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); 658 if (LZ4F_isError(errorCode)) 659 EXM_THROW(31, "zstd: failed to create lz4 compression context"); 660 661 memset(&prefs, 0, sizeof(prefs)); 662 663 assert(blockSize <= ress->srcBufferSize); 664 665 prefs.autoFlush = 1; 666 prefs.compressionLevel = compressionLevel; 667 prefs.frameInfo.blockMode = LZ4F_blockLinked; 668 prefs.frameInfo.blockSizeID = LZ4F_max64KB; 669 prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)g_checksumFlag; 670 #if LZ4_VERSION_NUMBER >= 10600 671 prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize; 672 #endif 673 assert(LZ4F_compressBound(blockSize, &prefs) <= ress->dstBufferSize); 674 675 { 676 size_t readSize; 677 size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs); 678 if (LZ4F_isError(headerSize)) 679 EXM_THROW(33, "File header generation failed : %s", 680 LZ4F_getErrorName(headerSize)); 681 if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize) 682 EXM_THROW(34, "Write error : cannot write header"); 683 outFileSize += headerSize; 684 685 /* Read first block */ 686 readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); 687 inFileSize += readSize; 688 689 /* Main Loop */ 690 while (readSize>0) { 691 size_t outSize; 692 693 /* Compress Block */ 694 outSize = LZ4F_compressUpdate(ctx, ress->dstBuffer, ress->dstBufferSize, ress->srcBuffer, readSize, NULL); 695 if (LZ4F_isError(outSize)) 696 EXM_THROW(35, "zstd: %s: lz4 compression failed : %s", 697 srcFileName, LZ4F_getErrorName(outSize)); 698 outFileSize += outSize; 699 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) 700 DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", 701 (U32)(inFileSize>>20), 702 (double)outFileSize/inFileSize*100) 703 else 704 DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", 705 (U32)(inFileSize>>20), (U32)(srcFileSize>>20), 706 (double)outFileSize/inFileSize*100); 707 708 /* Write Block */ 709 { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile); 710 if (sizeCheck!=outSize) EXM_THROW(36, "Write error : cannot write compressed block"); } 711 712 /* Read next block */ 713 readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); 714 inFileSize += readSize; 715 } 716 if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName); 717 718 /* End of Stream mark */ 719 headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL); 720 if (LZ4F_isError(headerSize)) 721 EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s", 722 srcFileName, LZ4F_getErrorName(headerSize)); 723 724 { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile); 725 if (sizeCheck!=headerSize) EXM_THROW(39, "Write error : cannot write end of stream"); } 726 outFileSize += headerSize; 727 } 728 729 *readsize = inFileSize; 730 LZ4F_freeCompressionContext(ctx); 731 732 return outFileSize; 733 } 734 #endif 735 736 737 /*! FIO_compressFilename_internal() : 738 * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened. 739 * @return : 0 : compression completed correctly, 740 * 1 : missing or pb opening srcFileName 741 */ 742 static unsigned long long 743 FIO_compressZstdFrame(const cRess_t* ressPtr, 744 const char* srcFileName, U64 fileSize, 745 int compressionLevel, U64* readsize) 746 { 747 cRess_t const ress = *ressPtr; 748 FILE* const srcFile = ress.srcFile; 749 FILE* const dstFile = ress.dstFile; 750 U64 compressedfilesize = 0; 751 ZSTD_EndDirective directive = ZSTD_e_continue; 752 DISPLAYLEVEL(6, "compression using zstd format \n"); 753 754 /* init */ 755 if (fileSize != UTIL_FILESIZE_UNKNOWN) 756 ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize); 757 (void)compressionLevel; (void)srcFileName; 758 759 /* Main compression loop */ 760 do { 761 size_t result; 762 /* Fill input Buffer */ 763 size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile); 764 ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 }; 765 DISPLAYLEVEL(6, "fread %u bytes from source \n", (U32)inSize); 766 *readsize += inSize; 767 768 if ((inSize == 0) || (*readsize == fileSize)) 769 directive = ZSTD_e_end; 770 771 result = 1; 772 while (inBuff.pos != inBuff.size || (directive == ZSTD_e_end && result != 0)) { 773 ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 }; 774 CHECK_V(result, ZSTD_compress_generic(ress.cctx, &outBuff, &inBuff, directive)); 775 776 /* Write compressed stream */ 777 DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => intput pos(%u)<=(%u)size ; output generated %u bytes \n", 778 (U32)directive, (U32)inBuff.pos, (U32)inBuff.size, (U32)outBuff.pos); 779 if (outBuff.pos) { 780 size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile); 781 if (sizeCheck!=outBuff.pos) 782 EXM_THROW(25, "Write error : cannot write compressed block"); 783 compressedfilesize += outBuff.pos; 784 } 785 if (READY_FOR_UPDATE()) { 786 ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx); 787 double const cShare = (double)zfp.produced / (zfp.consumed + !zfp.consumed/*avoid div0*/) * 100; 788 if (g_displayLevel >= 3) { 789 DISPLAYUPDATE(3, "\r(L%i) Buffered :%4u MB - Consumed :%4u MB - Compressed :%4u MB => %.2f%%", 790 compressionLevel, 791 (U32)((zfp.ingested - zfp.consumed) >> 20), 792 (U32)(zfp.consumed >> 20), 793 (U32)(zfp.produced >> 20), 794 cShare ); 795 } else { /* g_displayLevel == 2 */ 796 DISPLAYLEVEL(2, "\rRead : %u ", (U32)(zfp.consumed >> 20)); 797 if (fileSize != UTIL_FILESIZE_UNKNOWN) 798 DISPLAYLEVEL(2, "/ %u ", (U32)(fileSize >> 20)); 799 DISPLAYLEVEL(2, "MB ==> %2.f%% ", cShare); 800 DELAY_NEXT_UPDATE(); 801 } 802 } 803 } 804 } while (directive != ZSTD_e_end); 805 806 return compressedfilesize; 807 } 808 809 /*! FIO_compressFilename_internal() : 810 * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened. 811 * @return : 0 : compression completed correctly, 812 * 1 : missing or pb opening srcFileName 813 */ 814 static int 815 FIO_compressFilename_internal(cRess_t ress, 816 const char* dstFileName, const char* srcFileName, 817 int compressionLevel) 818 { 819 U64 readsize = 0; 820 U64 compressedfilesize = 0; 821 U64 const fileSize = UTIL_getFileSize(srcFileName); 822 DISPLAYLEVEL(5, "%s: %u bytes \n", srcFileName, (U32)fileSize); 823 824 /* compression format selection */ 825 switch (g_compressionType) { 826 default: 827 case FIO_zstdCompression: 828 compressedfilesize = FIO_compressZstdFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize); 829 break; 830 831 case FIO_gzipCompression: 832 #ifdef ZSTD_GZCOMPRESS 833 compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize); 834 #else 835 (void)compressionLevel; 836 EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", 837 srcFileName); 838 #endif 839 break; 840 841 case FIO_xzCompression: 842 case FIO_lzmaCompression: 843 #ifdef ZSTD_LZMACOMPRESS 844 compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, g_compressionType==FIO_lzmaCompression); 845 #else 846 (void)compressionLevel; 847 EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n", 848 srcFileName); 849 #endif 850 break; 851 852 case FIO_lz4Compression: 853 #ifdef ZSTD_LZ4COMPRESS 854 compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, &readsize); 855 #else 856 (void)compressionLevel; 857 EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n", 858 srcFileName); 859 #endif 860 break; 861 } 862 863 /* Status */ 864 DISPLAYLEVEL(2, "\r%79s\r", ""); 865 DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6llu => %6llu bytes, %s) \n", 866 srcFileName, 867 (double)compressedfilesize / (readsize+(!readsize)/*avoid div by zero*/) * 100, 868 (unsigned long long)readsize, (unsigned long long) compressedfilesize, 869 dstFileName); 870 871 return 0; 872 } 873 874 875 /*! FIO_compressFilename_srcFile() : 876 * note : ress.destFile already opened 877 * @return : 0 : compression completed correctly, 878 * 1 : missing or pb opening srcFileName 879 */ 880 static int FIO_compressFilename_srcFile(cRess_t ress, 881 const char* dstFileName, const char* srcFileName, 882 int compressionLevel) 883 { 884 int result; 885 886 /* File check */ 887 if (UTIL_isDirectory(srcFileName)) { 888 DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); 889 return 1; 890 } 891 892 ress.srcFile = FIO_openSrcFile(srcFileName); 893 if (!ress.srcFile) return 1; /* srcFile could not be opened */ 894 895 result = FIO_compressFilename_internal(ress, dstFileName, srcFileName, compressionLevel); 896 897 fclose(ress.srcFile); 898 if (g_removeSrcFile /* --rm */ && !result && strcmp(srcFileName, stdinmark)) { 899 /* We must clear the handler, since after this point calling it would 900 * delete both the source and destination files. 901 */ 902 clearHandler(); 903 if (FIO_remove(srcFileName)) 904 EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno)); 905 } 906 return result; 907 } 908 909 910 /*! FIO_compressFilename_dstFile() : 911 * @return : 0 : compression completed correctly, 912 * 1 : pb 913 */ 914 static int FIO_compressFilename_dstFile(cRess_t ress, 915 const char* dstFileName, 916 const char* srcFileName, 917 int compressionLevel) 918 { 919 int result; 920 stat_t statbuf; 921 int stat_result = 0; 922 923 DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s", dstFileName); 924 ress.dstFile = FIO_openDstFile(dstFileName); 925 if (ress.dstFile==NULL) return 1; /* could not open dstFileName */ 926 /* Must ony be added after FIO_openDstFile() succeeds. 927 * Otherwise we may delete the destination file if at already exists, and 928 * the user presses Ctrl-C when asked if they wish to overwrite. 929 */ 930 addHandler(dstFileName); 931 932 if (strcmp (srcFileName, stdinmark) && UTIL_getFileStat(srcFileName, &statbuf)) 933 stat_result = 1; 934 result = FIO_compressFilename_srcFile(ress, dstFileName, srcFileName, compressionLevel); 935 clearHandler(); 936 937 if (fclose(ress.dstFile)) { /* error closing dstFile */ 938 DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); 939 result=1; 940 } 941 if ( (result != 0) /* operation failure */ 942 && strcmp(dstFileName, nulmark) /* special case : don't remove() /dev/null */ 943 && strcmp(dstFileName, stdoutmark) ) /* special case : don't remove() stdout */ 944 FIO_remove(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */ 945 else if ( strcmp(dstFileName, stdoutmark) 946 && strcmp(dstFileName, nulmark) 947 && stat_result) 948 UTIL_setFileStat(dstFileName, &statbuf); 949 950 return result; 951 } 952 953 954 int FIO_compressFilename(const char* dstFileName, const char* srcFileName, 955 const char* dictFileName, int compressionLevel, ZSTD_compressionParameters* comprParams) 956 { 957 clock_t const start = clock(); 958 U64 const fileSize = UTIL_getFileSize(srcFileName); 959 U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize; 960 961 cRess_t const ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams); 962 int const result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel); 963 964 double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC; 965 DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds); 966 967 FIO_freeCResources(ress); 968 return result; 969 } 970 971 972 int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFiles, 973 const char* outFileName, const char* suffix, 974 const char* dictFileName, int compressionLevel, 975 ZSTD_compressionParameters* comprParams) 976 { 977 int missed_files = 0; 978 size_t dfnSize = FNSPACE; 979 char* dstFileName = (char*)malloc(FNSPACE); 980 size_t const suffixSize = suffix ? strlen(suffix) : 0; 981 U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]); 982 U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize; 983 U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : firstSrcSize ; 984 cRess_t ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams); 985 986 /* init */ 987 if (dstFileName==NULL) 988 EXM_THROW(27, "FIO_compressMultipleFilenames : allocation error for dstFileName"); 989 if (outFileName == NULL && suffix == NULL) 990 EXM_THROW(28, "FIO_compressMultipleFilenames : dst unknown"); /* should never happen */ 991 992 /* loop on each file */ 993 if (outFileName != NULL) { 994 unsigned u; 995 ress.dstFile = FIO_openDstFile(outFileName); 996 if (ress.dstFile==NULL) { /* could not open outFileName */ 997 missed_files = nbFiles; 998 } else { 999 for (u=0; u<nbFiles; u++) 1000 missed_files += FIO_compressFilename_srcFile(ress, outFileName, inFileNamesTable[u], compressionLevel); 1001 if (fclose(ress.dstFile)) 1002 EXM_THROW(29, "Write error : cannot properly close stdout"); 1003 } 1004 } else { 1005 unsigned u; 1006 for (u=0; u<nbFiles; u++) { 1007 size_t const ifnSize = strlen(inFileNamesTable[u]); 1008 if (dfnSize <= ifnSize+suffixSize+1) { /* resize name buffer */ 1009 free(dstFileName); 1010 dfnSize = ifnSize + 20; 1011 dstFileName = (char*)malloc(dfnSize); 1012 if (!dstFileName) { 1013 EXM_THROW(30, "zstd: %s", strerror(errno)); 1014 } } 1015 strcpy(dstFileName, inFileNamesTable[u]); 1016 strcat(dstFileName, suffix); 1017 missed_files += FIO_compressFilename_dstFile(ress, dstFileName, inFileNamesTable[u], compressionLevel); 1018 } } 1019 1020 FIO_freeCResources(ress); 1021 free(dstFileName); 1022 return missed_files; 1023 } 1024 1025 #endif /* #ifndef ZSTD_NOCOMPRESS */ 1026 1027 1028 1029 #ifndef ZSTD_NODECOMPRESS 1030 1031 /* ************************************************************************** 1032 * Decompression 1033 ***************************************************************************/ 1034 typedef struct { 1035 void* srcBuffer; 1036 size_t srcBufferSize; 1037 size_t srcBufferLoaded; 1038 void* dstBuffer; 1039 size_t dstBufferSize; 1040 ZSTD_DStream* dctx; 1041 FILE* dstFile; 1042 } dRess_t; 1043 1044 static dRess_t FIO_createDResources(const char* dictFileName) 1045 { 1046 dRess_t ress; 1047 memset(&ress, 0, sizeof(ress)); 1048 1049 /* Allocation */ 1050 ress.dctx = ZSTD_createDStream(); 1051 if (ress.dctx==NULL) EXM_THROW(60, "Can't create ZSTD_DStream"); 1052 CHECK( ZSTD_setDStreamParameter(ress.dctx, DStream_p_maxWindowSize, g_memLimit) ); 1053 ress.srcBufferSize = ZSTD_DStreamInSize(); 1054 ress.srcBuffer = malloc(ress.srcBufferSize); 1055 ress.dstBufferSize = ZSTD_DStreamOutSize(); 1056 ress.dstBuffer = malloc(ress.dstBufferSize); 1057 if (!ress.srcBuffer || !ress.dstBuffer) 1058 EXM_THROW(61, "Allocation error : not enough memory"); 1059 1060 /* dictionary */ 1061 { void* dictBuffer; 1062 size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName); 1063 CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) ); 1064 free(dictBuffer); 1065 } 1066 1067 return ress; 1068 } 1069 1070 static void FIO_freeDResources(dRess_t ress) 1071 { 1072 CHECK( ZSTD_freeDStream(ress.dctx) ); 1073 free(ress.srcBuffer); 1074 free(ress.dstBuffer); 1075 } 1076 1077 1078 /** FIO_fwriteSparse() : 1079 * @return : storedSkips, to be provided to next call to FIO_fwriteSparse() of LZ4IO_fwriteSparseEnd() */ 1080 static unsigned FIO_fwriteSparse(FILE* file, const void* buffer, size_t bufferSize, unsigned storedSkips) 1081 { 1082 const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */ 1083 size_t bufferSizeT = bufferSize / sizeof(size_t); 1084 const size_t* const bufferTEnd = bufferT + bufferSizeT; 1085 const size_t* ptrT = bufferT; 1086 static const size_t segmentSizeT = (32 KB) / sizeof(size_t); /* 0-test re-attempted every 32 KB */ 1087 1088 if (!g_sparseFileSupport) { /* normal write */ 1089 size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file); 1090 if (sizeCheck != bufferSize) EXM_THROW(70, "Write error : cannot write decoded block"); 1091 return 0; 1092 } 1093 1094 /* avoid int overflow */ 1095 if (storedSkips > 1 GB) { 1096 int const seekResult = LONG_SEEK(file, 1 GB, SEEK_CUR); 1097 if (seekResult != 0) EXM_THROW(71, "1 GB skip error (sparse file support)"); 1098 storedSkips -= 1 GB; 1099 } 1100 1101 while (ptrT < bufferTEnd) { 1102 size_t seg0SizeT = segmentSizeT; 1103 size_t nb0T; 1104 1105 /* count leading zeros */ 1106 if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT; 1107 bufferSizeT -= seg0SizeT; 1108 for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ; 1109 storedSkips += (unsigned)(nb0T * sizeof(size_t)); 1110 1111 if (nb0T != seg0SizeT) { /* not all 0s */ 1112 int const seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR); 1113 if (seekResult) EXM_THROW(72, "Sparse skip error ; try --no-sparse"); 1114 storedSkips = 0; 1115 seg0SizeT -= nb0T; 1116 ptrT += nb0T; 1117 { size_t const sizeCheck = fwrite(ptrT, sizeof(size_t), seg0SizeT, file); 1118 if (sizeCheck != seg0SizeT) 1119 EXM_THROW(73, "Write error : cannot write decoded block"); 1120 } } 1121 ptrT += seg0SizeT; 1122 } 1123 1124 { static size_t const maskT = sizeof(size_t)-1; 1125 if (bufferSize & maskT) { 1126 /* size not multiple of sizeof(size_t) : implies end of block */ 1127 const char* const restStart = (const char*)bufferTEnd; 1128 const char* restPtr = restStart; 1129 size_t restSize = bufferSize & maskT; 1130 const char* const restEnd = restStart + restSize; 1131 for ( ; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ; 1132 storedSkips += (unsigned) (restPtr - restStart); 1133 if (restPtr != restEnd) { 1134 int seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR); 1135 if (seekResult) 1136 EXM_THROW(74, "Sparse skip error ; try --no-sparse"); 1137 storedSkips = 0; 1138 { size_t const sizeCheck = fwrite(restPtr, 1, restEnd - restPtr, file); 1139 if (sizeCheck != (size_t)(restEnd - restPtr)) 1140 EXM_THROW(75, "Write error : cannot write decoded end of block"); 1141 } } } } 1142 1143 return storedSkips; 1144 } 1145 1146 static void FIO_fwriteSparseEnd(FILE* file, unsigned storedSkips) 1147 { 1148 if (storedSkips-->0) { /* implies g_sparseFileSupport>0 */ 1149 int const seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR); 1150 if (seekResult != 0) EXM_THROW(69, "Final skip error (sparse file)"); 1151 { const char lastZeroByte[1] = { 0 }; 1152 size_t const sizeCheck = fwrite(lastZeroByte, 1, 1, file); 1153 if (sizeCheck != 1) 1154 EXM_THROW(69, "Write error : cannot write last zero"); 1155 } } 1156 } 1157 1158 1159 /** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode 1160 @return : 0 (no error) */ 1161 static unsigned FIO_passThrough(FILE* foutput, FILE* finput, void* buffer, size_t bufferSize, size_t alreadyLoaded) 1162 { 1163 size_t const blockSize = MIN(64 KB, bufferSize); 1164 size_t readFromInput = 1; 1165 unsigned storedSkips = 0; 1166 1167 /* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */ 1168 { size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput); 1169 if (sizeCheck != alreadyLoaded) { 1170 DISPLAYLEVEL(1, "Pass-through write error \n"); 1171 return 1; 1172 } } 1173 1174 while (readFromInput) { 1175 readFromInput = fread(buffer, 1, blockSize, finput); 1176 storedSkips = FIO_fwriteSparse(foutput, buffer, readFromInput, storedSkips); 1177 } 1178 1179 FIO_fwriteSparseEnd(foutput, storedSkips); 1180 return 0; 1181 } 1182 1183 /* FIO_highbit64() : 1184 * gives position of highest bit. 1185 * note : only works for v > 0 ! 1186 */ 1187 static unsigned FIO_highbit64(unsigned long long v) 1188 { 1189 unsigned count = 0; 1190 assert(v != 0); 1191 v >>= 1; 1192 while (v) { v >>= 1; count++; } 1193 return count; 1194 } 1195 1196 /* FIO_zstdErrorHelp() : 1197 * detailed error message when requested window size is too large */ 1198 static void FIO_zstdErrorHelp(dRess_t* ress, size_t err, char const* srcFileName) 1199 { 1200 ZSTD_frameHeader header; 1201 1202 /* Help message only for one specific error */ 1203 if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge) 1204 return; 1205 1206 /* Try to decode the frame header */ 1207 err = ZSTD_getFrameHeader(&header, ress->srcBuffer, ress->srcBufferLoaded); 1208 if (err == 0) { 1209 unsigned long long const windowSize = header.windowSize; 1210 U32 const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0); 1211 U32 const windowMB = (U32)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0)); 1212 assert(windowSize < (U64)(1ULL << 52)); 1213 assert(g_memLimit > 0); 1214 DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u\n", 1215 srcFileName, windowSize, g_memLimit); 1216 if (windowLog <= ZSTD_WINDOWLOG_MAX) { 1217 DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB\n", 1218 srcFileName, windowLog, windowMB); 1219 return; 1220 } 1221 } 1222 DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported\n", 1223 srcFileName, ZSTD_WINDOWLOG_MAX); 1224 } 1225 1226 /** FIO_decompressFrame() : 1227 * @return : size of decoded zstd frame, or an error code 1228 */ 1229 #define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2)) 1230 unsigned long long FIO_decompressZstdFrame(dRess_t* ress, 1231 FILE* finput, 1232 const char* srcFileName, 1233 U64 alreadyDecoded) 1234 { 1235 U64 frameSize = 0; 1236 U32 storedSkips = 0; 1237 1238 size_t const srcFileLength = strlen(srcFileName); 1239 if (srcFileLength>20) srcFileName += srcFileLength-20; /* display last 20 characters only */ 1240 1241 ZSTD_resetDStream(ress->dctx); 1242 1243 /* Header loading : ensures ZSTD_getFrameHeader() will succeed */ 1244 { size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX; 1245 if (ress->srcBufferLoaded < toDecode) { 1246 size_t const toRead = toDecode - ress->srcBufferLoaded; 1247 void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded; 1248 ress->srcBufferLoaded += fread(startPosition, 1, toRead, finput); 1249 } } 1250 1251 /* Main decompression Loop */ 1252 while (1) { 1253 ZSTD_inBuffer inBuff = { ress->srcBuffer, ress->srcBufferLoaded, 0 }; 1254 ZSTD_outBuffer outBuff= { ress->dstBuffer, ress->dstBufferSize, 0 }; 1255 size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff); 1256 if (ZSTD_isError(readSizeHint)) { 1257 DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n", 1258 srcFileName, ZSTD_getErrorName(readSizeHint)); 1259 FIO_zstdErrorHelp(ress, readSizeHint, srcFileName); 1260 return FIO_ERROR_FRAME_DECODING; 1261 } 1262 1263 /* Write block */ 1264 storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, storedSkips); 1265 frameSize += outBuff.pos; 1266 DISPLAYUPDATE(2, "\r%-20.20s : %u MB... ", 1267 srcFileName, (U32)((alreadyDecoded+frameSize)>>20) ); 1268 1269 if (inBuff.pos > 0) { 1270 memmove(ress->srcBuffer, (char*)ress->srcBuffer + inBuff.pos, inBuff.size - inBuff.pos); 1271 ress->srcBufferLoaded -= inBuff.pos; 1272 } 1273 1274 if (readSizeHint == 0) break; /* end of frame */ 1275 if (inBuff.size != inBuff.pos) { 1276 DISPLAYLEVEL(1, "%s : Decoding error (37) : should consume entire input \n", 1277 srcFileName); 1278 return FIO_ERROR_FRAME_DECODING; 1279 } 1280 1281 /* Fill input buffer */ 1282 { size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize); /* support large skippable frames */ 1283 if (ress->srcBufferLoaded < toDecode) { 1284 size_t const toRead = toDecode - ress->srcBufferLoaded; /* > 0 */ 1285 void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded; 1286 size_t const readSize = fread(startPosition, 1, toRead, finput); 1287 if (readSize==0) { 1288 DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n", 1289 srcFileName); 1290 return FIO_ERROR_FRAME_DECODING; 1291 } 1292 ress->srcBufferLoaded += readSize; 1293 } } } 1294 1295 FIO_fwriteSparseEnd(ress->dstFile, storedSkips); 1296 1297 return frameSize; 1298 } 1299 1300 1301 #ifdef ZSTD_GZDECOMPRESS 1302 static unsigned long long FIO_decompressGzFrame(dRess_t* ress, 1303 FILE* srcFile, const char* srcFileName) 1304 { 1305 unsigned long long outFileSize = 0; 1306 z_stream strm; 1307 int flush = Z_NO_FLUSH; 1308 int decodingError = 0; 1309 1310 strm.zalloc = Z_NULL; 1311 strm.zfree = Z_NULL; 1312 strm.opaque = Z_NULL; 1313 strm.next_in = 0; 1314 strm.avail_in = 0; 1315 /* see http://www.zlib.net/manual.html */ 1316 if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK) 1317 return FIO_ERROR_FRAME_DECODING; 1318 1319 strm.next_out = (Bytef*)ress->dstBuffer; 1320 strm.avail_out = (uInt)ress->dstBufferSize; 1321 strm.avail_in = (uInt)ress->srcBufferLoaded; 1322 strm.next_in = (z_const unsigned char*)ress->srcBuffer; 1323 1324 for ( ; ; ) { 1325 int ret; 1326 if (strm.avail_in == 0) { 1327 ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); 1328 if (ress->srcBufferLoaded == 0) flush = Z_FINISH; 1329 strm.next_in = (z_const unsigned char*)ress->srcBuffer; 1330 strm.avail_in = (uInt)ress->srcBufferLoaded; 1331 } 1332 ret = inflate(&strm, flush); 1333 if (ret == Z_BUF_ERROR) { 1334 DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName); 1335 decodingError = 1; break; 1336 } 1337 if (ret != Z_OK && ret != Z_STREAM_END) { 1338 DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret); 1339 decodingError = 1; break; 1340 } 1341 { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; 1342 if (decompBytes) { 1343 if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) { 1344 DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno)); 1345 decodingError = 1; break; 1346 } 1347 outFileSize += decompBytes; 1348 strm.next_out = (Bytef*)ress->dstBuffer; 1349 strm.avail_out = (uInt)ress->dstBufferSize; 1350 } 1351 } 1352 if (ret == Z_STREAM_END) break; 1353 } 1354 1355 if (strm.avail_in > 0) 1356 memmove(ress->srcBuffer, strm.next_in, strm.avail_in); 1357 ress->srcBufferLoaded = strm.avail_in; 1358 if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */ 1359 && (decodingError==0) ) { 1360 DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName); 1361 decodingError = 1; 1362 } 1363 return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; 1364 } 1365 #endif 1366 1367 1368 #ifdef ZSTD_LZMADECOMPRESS 1369 static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName, int plain_lzma) 1370 { 1371 unsigned long long outFileSize = 0; 1372 lzma_stream strm = LZMA_STREAM_INIT; 1373 lzma_action action = LZMA_RUN; 1374 lzma_ret initRet; 1375 int decodingError = 0; 1376 1377 strm.next_in = 0; 1378 strm.avail_in = 0; 1379 if (plain_lzma) { 1380 initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */ 1381 } else { 1382 initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */ 1383 } 1384 1385 if (initRet != LZMA_OK) { 1386 DISPLAYLEVEL(1, "zstd: %s: %s error %d \n", 1387 plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder", 1388 srcFileName, initRet); 1389 return FIO_ERROR_FRAME_DECODING; 1390 } 1391 1392 strm.next_out = (BYTE*)ress->dstBuffer; 1393 strm.avail_out = ress->dstBufferSize; 1394 strm.next_in = (BYTE const*)ress->srcBuffer; 1395 strm.avail_in = ress->srcBufferLoaded; 1396 1397 for ( ; ; ) { 1398 lzma_ret ret; 1399 if (strm.avail_in == 0) { 1400 ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); 1401 if (ress->srcBufferLoaded == 0) action = LZMA_FINISH; 1402 strm.next_in = (BYTE const*)ress->srcBuffer; 1403 strm.avail_in = ress->srcBufferLoaded; 1404 } 1405 ret = lzma_code(&strm, action); 1406 1407 if (ret == LZMA_BUF_ERROR) { 1408 DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName); 1409 decodingError = 1; break; 1410 } 1411 if (ret != LZMA_OK && ret != LZMA_STREAM_END) { 1412 DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n", 1413 srcFileName, ret); 1414 decodingError = 1; break; 1415 } 1416 { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; 1417 if (decompBytes) { 1418 if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) { 1419 DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno)); 1420 decodingError = 1; break; 1421 } 1422 outFileSize += decompBytes; 1423 strm.next_out = (BYTE*)ress->dstBuffer; 1424 strm.avail_out = ress->dstBufferSize; 1425 } } 1426 if (ret == LZMA_STREAM_END) break; 1427 } 1428 1429 if (strm.avail_in > 0) 1430 memmove(ress->srcBuffer, strm.next_in, strm.avail_in); 1431 ress->srcBufferLoaded = strm.avail_in; 1432 lzma_end(&strm); 1433 return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; 1434 } 1435 #endif 1436 1437 #ifdef ZSTD_LZ4DECOMPRESS 1438 static unsigned long long FIO_decompressLz4Frame(dRess_t* ress, 1439 FILE* srcFile, const char* srcFileName) 1440 { 1441 unsigned long long filesize = 0; 1442 LZ4F_errorCode_t nextToLoad; 1443 LZ4F_decompressionContext_t dCtx; 1444 LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION); 1445 int decodingError = 0; 1446 1447 if (LZ4F_isError(errorCode)) { 1448 DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n"); 1449 return FIO_ERROR_FRAME_DECODING; 1450 } 1451 1452 /* Init feed with magic number (already consumed from FILE* sFile) */ 1453 { size_t inSize = 4; 1454 size_t outSize= 0; 1455 MEM_writeLE32(ress->srcBuffer, LZ4_MAGICNUMBER); 1456 nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &outSize, ress->srcBuffer, &inSize, NULL); 1457 if (LZ4F_isError(nextToLoad)) { 1458 DISPLAYLEVEL(1, "zstd: %s: lz4 header error : %s \n", 1459 srcFileName, LZ4F_getErrorName(nextToLoad)); 1460 LZ4F_freeDecompressionContext(dCtx); 1461 return FIO_ERROR_FRAME_DECODING; 1462 } } 1463 1464 /* Main Loop */ 1465 for (;nextToLoad;) { 1466 size_t readSize; 1467 size_t pos = 0; 1468 size_t decodedBytes = ress->dstBufferSize; 1469 1470 /* Read input */ 1471 if (nextToLoad > ress->srcBufferSize) nextToLoad = ress->srcBufferSize; 1472 readSize = fread(ress->srcBuffer, 1, nextToLoad, srcFile); 1473 if (!readSize) break; /* reached end of file or stream */ 1474 1475 while ((pos < readSize) || (decodedBytes == ress->dstBufferSize)) { /* still to read, or still to flush */ 1476 /* Decode Input (at least partially) */ 1477 size_t remaining = readSize - pos; 1478 decodedBytes = ress->dstBufferSize; 1479 nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL); 1480 if (LZ4F_isError(nextToLoad)) { 1481 DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n", 1482 srcFileName, LZ4F_getErrorName(nextToLoad)); 1483 decodingError = 1; break; 1484 } 1485 pos += remaining; 1486 1487 /* Write Block */ 1488 if (decodedBytes) { 1489 if (fwrite(ress->dstBuffer, 1, decodedBytes, ress->dstFile) != decodedBytes) { 1490 DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno)); 1491 decodingError = 1; break; 1492 } 1493 filesize += decodedBytes; 1494 DISPLAYUPDATE(2, "\rDecompressed : %u MB ", (unsigned)(filesize>>20)); 1495 } 1496 1497 if (!nextToLoad) break; 1498 } 1499 } 1500 /* can be out because readSize == 0, which could be an fread() error */ 1501 if (ferror(srcFile)) { 1502 DISPLAYLEVEL(1, "zstd: %s: read error \n", srcFileName); 1503 decodingError=1; 1504 } 1505 1506 if (nextToLoad!=0) { 1507 DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName); 1508 decodingError=1; 1509 } 1510 1511 LZ4F_freeDecompressionContext(dCtx); 1512 ress->srcBufferLoaded = 0; /* LZ4F will reach exact frame boundary */ 1513 1514 return decodingError ? FIO_ERROR_FRAME_DECODING : filesize; 1515 } 1516 #endif 1517 1518 1519 1520 /** FIO_decompressFrames() : 1521 * Find and decode frames inside srcFile 1522 * srcFile presumed opened and valid 1523 * @return : 0 : OK 1524 * 1 : error 1525 */ 1526 static int FIO_decompressFrames(dRess_t ress, FILE* srcFile, 1527 const char* dstFileName, const char* srcFileName) 1528 { 1529 unsigned readSomething = 0; 1530 unsigned long long filesize = 0; 1531 assert(srcFile != NULL); 1532 1533 /* for each frame */ 1534 for ( ; ; ) { 1535 /* check magic number -> version */ 1536 size_t const toRead = 4; 1537 const BYTE* const buf = (const BYTE*)ress.srcBuffer; 1538 if (ress.srcBufferLoaded < toRead) /* load up to 4 bytes for header */ 1539 ress.srcBufferLoaded += fread((char*)ress.srcBuffer + ress.srcBufferLoaded, 1540 (size_t)1, toRead - ress.srcBufferLoaded, srcFile); 1541 if (ress.srcBufferLoaded==0) { 1542 if (readSomething==0) { /* srcFile is empty (which is invalid) */ 1543 DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName); 1544 return 1; 1545 } /* else, just reached frame boundary */ 1546 break; /* no more input */ 1547 } 1548 readSomething = 1; /* there is at least 1 byte in srcFile */ 1549 if (ress.srcBufferLoaded < toRead) { 1550 DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName); 1551 return 1; 1552 } 1553 if (ZSTD_isFrame(buf, ress.srcBufferLoaded)) { 1554 unsigned long long const frameSize = FIO_decompressZstdFrame(&ress, srcFile, srcFileName, filesize); 1555 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 1556 filesize += frameSize; 1557 } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */ 1558 #ifdef ZSTD_GZDECOMPRESS 1559 unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFile, srcFileName); 1560 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 1561 filesize += frameSize; 1562 #else 1563 DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName); 1564 return 1; 1565 #endif 1566 } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */ 1567 || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */ 1568 #ifdef ZSTD_LZMADECOMPRESS 1569 unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFile, srcFileName, buf[0] != 0xFD); 1570 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 1571 filesize += frameSize; 1572 #else 1573 DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName); 1574 return 1; 1575 #endif 1576 } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) { 1577 #ifdef ZSTD_LZ4DECOMPRESS 1578 unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFile, srcFileName); 1579 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; 1580 filesize += frameSize; 1581 #else 1582 DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName); 1583 return 1; 1584 #endif 1585 } else if ((g_overwrite) && !strcmp (dstFileName, stdoutmark)) { /* pass-through mode */ 1586 return FIO_passThrough(ress.dstFile, srcFile, 1587 ress.srcBuffer, ress.srcBufferSize, ress.srcBufferLoaded); 1588 } else { 1589 DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName); 1590 return 1; 1591 } } /* for each frame */ 1592 1593 /* Final Status */ 1594 DISPLAYLEVEL(2, "\r%79s\r", ""); 1595 DISPLAYLEVEL(2, "%-20s: %llu bytes \n", srcFileName, filesize); 1596 1597 return 0; 1598 } 1599 1600 1601 /** FIO_decompressSrcFile() : 1602 Decompression `srcFileName` into `ress.dstFile` 1603 @return : 0 : OK 1604 1 : operation not started 1605 */ 1606 static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const char* srcFileName) 1607 { 1608 FILE* srcFile; 1609 int result; 1610 1611 if (UTIL_isDirectory(srcFileName)) { 1612 DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); 1613 return 1; 1614 } 1615 1616 srcFile = FIO_openSrcFile(srcFileName); 1617 if (srcFile==NULL) return 1; 1618 1619 result = FIO_decompressFrames(ress, srcFile, dstFileName, srcFileName); 1620 1621 /* Close file */ 1622 if (fclose(srcFile)) { 1623 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); /* error should not happen */ 1624 return 1; 1625 } 1626 if ( g_removeSrcFile /* --rm */ 1627 && (result==0) /* decompression successful */ 1628 && strcmp(srcFileName, stdinmark) ) /* not stdin */ { 1629 /* We must clear the handler, since after this point calling it would 1630 * delete both the source and destination files. 1631 */ 1632 clearHandler(); 1633 if (FIO_remove(srcFileName)) { 1634 /* failed to remove src file */ 1635 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); 1636 return 1; 1637 } } 1638 return result; 1639 } 1640 1641 1642 /** FIO_decompressFile_extRess() : 1643 decompress `srcFileName` into `dstFileName` 1644 @return : 0 : OK 1645 1 : operation aborted (src not available, dst already taken, etc.) 1646 */ 1647 static int FIO_decompressDstFile(dRess_t ress, 1648 const char* dstFileName, const char* srcFileName) 1649 { 1650 int result; 1651 stat_t statbuf; 1652 int stat_result = 0; 1653 1654 ress.dstFile = FIO_openDstFile(dstFileName); 1655 if (ress.dstFile==0) return 1; 1656 /* Must ony be added after FIO_openDstFile() succeeds. 1657 * Otherwise we may delete the destination file if at already exists, and 1658 * the user presses Ctrl-C when asked if they wish to overwrite. 1659 */ 1660 addHandler(dstFileName); 1661 1662 if ( strcmp(srcFileName, stdinmark) 1663 && UTIL_getFileStat(srcFileName, &statbuf) ) 1664 stat_result = 1; 1665 result = FIO_decompressSrcFile(ress, dstFileName, srcFileName); 1666 clearHandler(); 1667 1668 if (fclose(ress.dstFile)) { 1669 DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); 1670 result = 1; 1671 } 1672 1673 if ( (result != 0) /* operation failure */ 1674 && strcmp(dstFileName, nulmark) /* special case : don't remove() /dev/null (#316) */ 1675 && strcmp(dstFileName, stdoutmark) ) /* special case : don't remove() stdout */ 1676 FIO_remove(dstFileName); /* remove decompression artefact; note don't do anything special if remove() fails */ 1677 else { /* operation success */ 1678 if ( strcmp(dstFileName, stdoutmark) /* special case : don't chmod stdout */ 1679 && strcmp(dstFileName, nulmark) /* special case : don't chmod /dev/null */ 1680 && stat_result ) /* file permissions correctly extracted from src */ 1681 UTIL_setFileStat(dstFileName, &statbuf); /* transfer file permissions from src into dst */ 1682 } 1683 1684 signal(SIGINT, SIG_DFL); 1685 1686 return result; 1687 } 1688 1689 1690 int FIO_decompressFilename(const char* dstFileName, const char* srcFileName, 1691 const char* dictFileName) 1692 { 1693 dRess_t const ress = FIO_createDResources(dictFileName); 1694 1695 int const decodingError = FIO_decompressDstFile(ress, dstFileName, srcFileName); 1696 1697 FIO_freeDResources(ress); 1698 return decodingError; 1699 } 1700 1701 1702 #define MAXSUFFIXSIZE 8 1703 int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles, 1704 const char* outFileName, 1705 const char* dictFileName) 1706 { 1707 int skippedFiles = 0; 1708 int missingFiles = 0; 1709 dRess_t ress = FIO_createDResources(dictFileName); 1710 1711 if (outFileName) { 1712 unsigned u; 1713 ress.dstFile = FIO_openDstFile(outFileName); 1714 if (ress.dstFile == 0) EXM_THROW(71, "cannot open %s", outFileName); 1715 for (u=0; u<nbFiles; u++) 1716 missingFiles += FIO_decompressSrcFile(ress, outFileName, srcNamesTable[u]); 1717 if (fclose(ress.dstFile)) 1718 EXM_THROW(72, "Write error : cannot properly close output file"); 1719 } else { 1720 size_t suffixSize; 1721 size_t dfnSize = FNSPACE; 1722 unsigned u; 1723 char* dstFileName = (char*)malloc(FNSPACE); 1724 if (dstFileName==NULL) 1725 EXM_THROW(73, "not enough memory for dstFileName"); 1726 for (u=0; u<nbFiles; u++) { /* create dstFileName */ 1727 const char* const srcFileName = srcNamesTable[u]; 1728 const char* const suffixPtr = strrchr(srcFileName, '.'); 1729 size_t const sfnSize = strlen(srcFileName); 1730 if (!suffixPtr) { 1731 DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n", 1732 srcFileName); 1733 skippedFiles++; 1734 continue; 1735 } 1736 suffixSize = strlen(suffixPtr); 1737 if (dfnSize+suffixSize <= sfnSize+1) { 1738 free(dstFileName); 1739 dfnSize = sfnSize + 20; 1740 dstFileName = (char*)malloc(dfnSize); 1741 if (dstFileName==NULL) 1742 EXM_THROW(74, "not enough memory for dstFileName"); 1743 } 1744 if (sfnSize <= suffixSize 1745 || (strcmp(suffixPtr, GZ_EXTENSION) 1746 && strcmp(suffixPtr, XZ_EXTENSION) 1747 && strcmp(suffixPtr, ZSTD_EXTENSION) 1748 && strcmp(suffixPtr, LZMA_EXTENSION) 1749 && strcmp(suffixPtr, LZ4_EXTENSION)) ) { 1750 DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s/%s/%s/%s/%s expected) -- ignored \n", 1751 srcFileName, GZ_EXTENSION, XZ_EXTENSION, ZSTD_EXTENSION, LZMA_EXTENSION, LZ4_EXTENSION); 1752 skippedFiles++; 1753 continue; 1754 } else { 1755 memcpy(dstFileName, srcFileName, sfnSize - suffixSize); 1756 dstFileName[sfnSize-suffixSize] = '\0'; 1757 } 1758 missingFiles += FIO_decompressDstFile(ress, dstFileName, srcFileName); 1759 } 1760 free(dstFileName); 1761 } 1762 1763 FIO_freeDResources(ress); 1764 return missingFiles + skippedFiles; 1765 } 1766 1767 1768 1769 /* ************************************************************************** 1770 * .zst file info (--list command) 1771 ***************************************************************************/ 1772 1773 typedef struct { 1774 U64 decompressedSize; 1775 U64 compressedSize; 1776 U64 windowSize; 1777 int numActualFrames; 1778 int numSkippableFrames; 1779 int decompUnavailable; 1780 int usesCheck; 1781 U32 nbFiles; 1782 } fileInfo_t; 1783 1784 /** getFileInfo() : 1785 * Reads information from file, stores in *info 1786 * @return : 0 if successful 1787 * 1 for frame analysis error 1788 * 2 for file not compressed with zstd 1789 * 3 for cases in which file could not be opened. 1790 */ 1791 static int getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName){ 1792 int detectError = 0; 1793 FILE* const srcFile = FIO_openSrcFile(inFileName); 1794 if (srcFile == NULL) { 1795 DISPLAY("Error: could not open source file %s\n", inFileName); 1796 return 3; 1797 } 1798 info->compressedSize = UTIL_getFileSize(inFileName); 1799 1800 /* begin analyzing frame */ 1801 for ( ; ; ) { 1802 BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; 1803 size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile); 1804 if (numBytesRead < ZSTD_frameHeaderSize_min) { 1805 if ( feof(srcFile) 1806 && (numBytesRead == 0) 1807 && (info->compressedSize > 0) 1808 && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) { 1809 break; 1810 } 1811 else if (feof(srcFile)) { 1812 DISPLAY("Error: reached end of file with incomplete frame\n"); 1813 detectError = 2; 1814 break; 1815 } 1816 else { 1817 DISPLAY("Error: did not reach end of file but ran out of frames\n"); 1818 detectError = 1; 1819 break; 1820 } 1821 } 1822 { U32 const magicNumber = MEM_readLE32(headerBuffer); 1823 /* Zstandard frame */ 1824 if (magicNumber == ZSTD_MAGICNUMBER) { 1825 ZSTD_frameHeader header; 1826 U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead); 1827 if (frameContentSize == ZSTD_CONTENTSIZE_ERROR || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN) { 1828 info->decompUnavailable = 1; 1829 } else { 1830 info->decompressedSize += frameContentSize; 1831 } 1832 if (ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0) { 1833 DISPLAY("Error: could not decode frame header\n"); 1834 detectError = 1; 1835 break; 1836 } 1837 info->windowSize = header.windowSize; 1838 /* move to the end of the frame header */ 1839 { size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead); 1840 if (ZSTD_isError(headerSize)) { 1841 DISPLAY("Error: could not determine frame header size\n"); 1842 detectError = 1; 1843 break; 1844 } 1845 { int const ret = fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR); 1846 if (ret != 0) { 1847 DISPLAY("Error: could not move to end of frame header\n"); 1848 detectError = 1; 1849 break; 1850 } } } 1851 1852 /* skip the rest of the blocks in the frame */ 1853 { int lastBlock = 0; 1854 do { 1855 BYTE blockHeaderBuffer[3]; 1856 size_t const readBytes = fread(blockHeaderBuffer, 1, 3, srcFile); 1857 if (readBytes != 3) { 1858 DISPLAY("There was a problem reading the block header\n"); 1859 detectError = 1; 1860 break; 1861 } 1862 { U32 const blockHeader = MEM_readLE24(blockHeaderBuffer); 1863 U32 const blockTypeID = (blockHeader >> 1) & 3; 1864 U32 const isRLE = (blockTypeID == 1); 1865 U32 const isWrongBlock = (blockTypeID == 3); 1866 long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3); 1867 if (isWrongBlock) { 1868 DISPLAY("Error: unsupported block type \n"); 1869 detectError = 1; 1870 break; 1871 } 1872 lastBlock = blockHeader & 1; 1873 { int const ret = fseek(srcFile, blockSize, SEEK_CUR); 1874 if (ret != 0) { 1875 DISPLAY("Error: could not skip to end of block\n"); 1876 detectError = 1; 1877 break; 1878 } } } 1879 } while (lastBlock != 1); 1880 1881 if (detectError) break; 1882 } 1883 1884 /* check if checksum is used */ 1885 { BYTE const frameHeaderDescriptor = headerBuffer[4]; 1886 int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2; 1887 if (contentChecksumFlag) { 1888 int const ret = fseek(srcFile, 4, SEEK_CUR); 1889 info->usesCheck = 1; 1890 if (ret != 0) { 1891 DISPLAY("Error: could not skip past checksum\n"); 1892 detectError = 1; 1893 break; 1894 } } } 1895 info->numActualFrames++; 1896 } 1897 /* Skippable frame */ 1898 else if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { 1899 U32 const frameSize = MEM_readLE32(headerBuffer + 4); 1900 long const seek = (long)(8 + frameSize - numBytesRead); 1901 int const ret = LONG_SEEK(srcFile, seek, SEEK_CUR); 1902 if (ret != 0) { 1903 DISPLAY("Error: could not find end of skippable frame\n"); 1904 detectError = 1; 1905 break; 1906 } 1907 info->numSkippableFrames++; 1908 } 1909 /* unknown content */ 1910 else { 1911 detectError = 2; 1912 break; 1913 } 1914 } 1915 } /* end analyzing frame */ 1916 fclose(srcFile); 1917 info->nbFiles = 1; 1918 return detectError; 1919 } 1920 1921 static int getFileInfo(fileInfo_t* info, const char* srcFileName) 1922 { 1923 int const isAFile = UTIL_isRegularFile(srcFileName); 1924 if (!isAFile) { 1925 DISPLAY("Error : %s is not a file", srcFileName); 1926 return 3; 1927 } 1928 return getFileInfo_fileConfirmed(info, srcFileName); 1929 } 1930 1931 1932 static void displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel){ 1933 unsigned const unit = info->compressedSize < (1 MB) ? (1 KB) : (1 MB); 1934 const char* const unitStr = info->compressedSize < (1 MB) ? "KB" : "MB"; 1935 double const windowSizeUnit = (double)info->windowSize / unit; 1936 double const compressedSizeUnit = (double)info->compressedSize / unit; 1937 double const decompressedSizeUnit = (double)info->decompressedSize / unit; 1938 double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/info->compressedSize; 1939 const char* const checkString = (info->usesCheck ? "XXH64" : "None"); 1940 if (displayLevel <= 2) { 1941 if (!info->decompUnavailable) { 1942 DISPLAYOUT("%6d %5d %7.2f %2s %9.2f %2s %5.3f %5s %s\n", 1943 info->numSkippableFrames + info->numActualFrames, 1944 info->numSkippableFrames, 1945 compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr, 1946 ratio, checkString, inFileName); 1947 } else { 1948 DISPLAYOUT("%6d %5d %7.2f %2s %5s %s\n", 1949 info->numSkippableFrames + info->numActualFrames, 1950 info->numSkippableFrames, 1951 compressedSizeUnit, unitStr, 1952 checkString, inFileName); 1953 } 1954 } else { 1955 DISPLAYOUT("%s \n", inFileName); 1956 DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames); 1957 if (info->numSkippableFrames) 1958 DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames); 1959 DISPLAYOUT("Window Size: %.2f %2s (%llu B)\n", 1960 windowSizeUnit, unitStr, 1961 (unsigned long long)info->windowSize); 1962 DISPLAYOUT("Compressed Size: %.2f %2s (%llu B)\n", 1963 compressedSizeUnit, unitStr, 1964 (unsigned long long)info->compressedSize); 1965 if (!info->decompUnavailable) { 1966 DISPLAYOUT("Decompressed Size: %.2f %2s (%llu B)\n", 1967 decompressedSizeUnit, unitStr, 1968 (unsigned long long)info->decompressedSize); 1969 DISPLAYOUT("Ratio: %.4f\n", ratio); 1970 } 1971 DISPLAYOUT("Check: %s\n", checkString); 1972 DISPLAYOUT("\n"); 1973 } 1974 } 1975 1976 static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2) 1977 { 1978 fileInfo_t total; 1979 memset(&total, 0, sizeof(total)); 1980 total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames; 1981 total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames; 1982 total.compressedSize = fi1.compressedSize + fi2.compressedSize; 1983 total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize; 1984 total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable; 1985 total.usesCheck = fi1.usesCheck & fi2.usesCheck; 1986 total.nbFiles = fi1.nbFiles + fi2.nbFiles; 1987 return total; 1988 } 1989 1990 static int FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel){ 1991 fileInfo_t info; 1992 memset(&info, 0, sizeof(info)); 1993 { int const error = getFileInfo(&info, inFileName); 1994 if (error == 1) { 1995 /* display error, but provide output */ 1996 DISPLAY("An error occurred while getting file info \n"); 1997 } 1998 else if (error == 2) { 1999 DISPLAYOUT("File %s not compressed by zstd \n", inFileName); 2000 if (displayLevel > 2) DISPLAYOUT("\n"); 2001 return 1; 2002 } 2003 else if (error == 3) { 2004 /* error occurred while opening the file */ 2005 if (displayLevel > 2) DISPLAYOUT("\n"); 2006 return 1; 2007 } 2008 displayInfo(inFileName, &info, displayLevel); 2009 *total = FIO_addFInfo(*total, info); 2010 return error; 2011 } 2012 } 2013 2014 int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel){ 2015 if (numFiles == 0) { 2016 DISPLAYOUT("No files given\n"); 2017 return 0; 2018 } 2019 if (displayLevel <= 2) { 2020 DISPLAYOUT("Frames Skips Compressed Uncompressed Ratio Check Filename\n"); 2021 } 2022 { int error = 0; 2023 unsigned u; 2024 fileInfo_t total; 2025 memset(&total, 0, sizeof(total)); 2026 total.usesCheck = 1; 2027 for (u=0; u<numFiles;u++) { 2028 error |= FIO_listFile(&total, filenameTable[u], displayLevel); 2029 } 2030 if (numFiles > 1 && displayLevel <= 2) { /* display total */ 2031 unsigned const unit = total.compressedSize < (1 MB) ? (1 KB) : (1 MB); 2032 const char* const unitStr = total.compressedSize < (1 MB) ? "KB" : "MB"; 2033 double const compressedSizeUnit = (double)total.compressedSize / unit; 2034 double const decompressedSizeUnit = (double)total.decompressedSize / unit; 2035 double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/total.compressedSize; 2036 const char* const checkString = (total.usesCheck ? "XXH64" : ""); 2037 DISPLAYOUT("----------------------------------------------------------------- \n"); 2038 if (total.decompUnavailable) { 2039 DISPLAYOUT("%6d %5d %7.2f %2s %5s %u files\n", 2040 total.numSkippableFrames + total.numActualFrames, 2041 total.numSkippableFrames, 2042 compressedSizeUnit, unitStr, 2043 checkString, total.nbFiles); 2044 } else { 2045 DISPLAYOUT("%6d %5d %7.2f %2s %9.2f %2s %5.3f %5s %u files\n", 2046 total.numSkippableFrames + total.numActualFrames, 2047 total.numSkippableFrames, 2048 compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr, 2049 ratio, checkString, total.nbFiles); 2050 } } 2051 return error; 2052 } 2053 } 2054 2055 2056 #endif /* #ifndef ZSTD_NODECOMPRESS */ 2057