1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 /* ************************************** 12 * Tuning parameters 13 ****************************************/ 14 #ifndef BMK_TIMETEST_DEFAULT_S /* default minimum time per test */ 15 # define BMK_TIMETEST_DEFAULT_S 3 16 #endif 17 18 /* ************************************* 19 * Includes 20 ***************************************/ 21 /* this must be included first */ 22 #include "platform.h" /* Large Files support, compiler specifics */ 23 24 /* then following system includes */ 25 #include <assert.h> /* assert */ 26 #include <errno.h> 27 #include <stdio.h> /* fprintf, fopen */ 28 #include <stdlib.h> /* malloc, free */ 29 #include <string.h> /* memset, strerror */ 30 #include "util.h" /* UTIL_getFileSize, UTIL_sleep */ 31 #include "../lib/common/mem.h" 32 #include "benchfn.h" 33 #include "timefn.h" /* UTIL_time_t */ 34 #ifndef ZSTD_STATIC_LINKING_ONLY 35 # define ZSTD_STATIC_LINKING_ONLY 36 #endif 37 #include "../lib/zstd.h" 38 #include "datagen.h" /* RDG_genBuffer */ 39 #include "lorem.h" /* LOREM_genBuffer */ 40 #ifndef XXH_INLINE_ALL 41 # define XXH_INLINE_ALL 42 #endif 43 #include "../lib/common/xxhash.h" 44 #include "../lib/zstd_errors.h" 45 #include "benchzstd.h" 46 47 /* ************************************* 48 * Constants 49 ***************************************/ 50 #ifndef ZSTD_GIT_COMMIT 51 # define ZSTD_GIT_COMMIT_STRING "" 52 #else 53 # define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT) 54 #endif 55 56 #define TIMELOOP_MICROSEC (1 * 1000000ULL) /* 1 second */ 57 #define TIMELOOP_NANOSEC (1 * 1000000000ULL) /* 1 second */ 58 #define ACTIVEPERIOD_MICROSEC (70 * TIMELOOP_MICROSEC) /* 70 seconds */ 59 #define COOLPERIOD_SEC 10 60 61 #define KB *(1 << 10) 62 #define MB *(1 << 20) 63 #define GB *(1U << 30) 64 65 #define BMK_RUNTEST_DEFAULT_MS 1000 66 67 static const size_t maxMemory = (sizeof(size_t) == 4) 68 ? 69 /* 32-bit */ (2 GB - 64 MB) 70 : 71 /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t) * 8) - 31)); 72 73 /* ************************************* 74 * console display 75 ***************************************/ 76 #define DISPLAY(...) \ 77 { \ 78 fprintf(stderr, __VA_ARGS__); \ 79 fflush(NULL); \ 80 } 81 #define DISPLAYLEVEL(l, ...) \ 82 if (displayLevel >= l) { \ 83 DISPLAY(__VA_ARGS__); \ 84 } 85 /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + 86 * progression; 4 : + information */ 87 #define OUTPUT(...) \ 88 { \ 89 fprintf(stdout, __VA_ARGS__); \ 90 fflush(NULL); \ 91 } 92 #define OUTPUTLEVEL(l, ...) \ 93 if (displayLevel >= l) { \ 94 OUTPUT(__VA_ARGS__); \ 95 } 96 97 /* ************************************* 98 * Exceptions 99 ***************************************/ 100 #ifndef DEBUG 101 # define DEBUG 0 102 #endif 103 #define DEBUGOUTPUT(...) \ 104 { \ 105 if (DEBUG) \ 106 DISPLAY(__VA_ARGS__); \ 107 } 108 109 #define RETURN_ERROR_INT(errorNum, ...) \ 110 { \ 111 DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ 112 DISPLAYLEVEL(1, "Error %i : ", errorNum); \ 113 DISPLAYLEVEL(1, __VA_ARGS__); \ 114 DISPLAYLEVEL(1, " \n"); \ 115 return errorNum; \ 116 } 117 118 #define CHECK_Z(zf) \ 119 { \ 120 size_t const zerr = zf; \ 121 if (ZSTD_isError(zerr)) { \ 122 DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ 123 DISPLAY("Error : "); \ 124 DISPLAY("%s failed : %s", #zf, ZSTD_getErrorName(zerr)); \ 125 DISPLAY(" \n"); \ 126 exit(1); \ 127 } \ 128 } 129 130 #define RETURN_ERROR(errorNum, retType, ...) \ 131 { \ 132 retType r; \ 133 memset(&r, 0, sizeof(retType)); \ 134 DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ 135 DISPLAYLEVEL(1, "Error %i : ", errorNum); \ 136 DISPLAYLEVEL(1, __VA_ARGS__); \ 137 DISPLAYLEVEL(1, " \n"); \ 138 r.tag = errorNum; \ 139 return r; \ 140 } 141 142 static size_t uintSize(unsigned value) 143 { 144 size_t size = 1; 145 while (value >= 10) { 146 size++; 147 value /= 10; 148 } 149 return size; 150 } 151 152 /* Note: presume @buffer is large enough */ 153 static void writeUint_varLen(char* buffer, size_t capacity, unsigned value) 154 { 155 int endPos = (int)uintSize(value) - 1; 156 assert(uintSize(value) >= 1); 157 assert(uintSize(value) < capacity); (void)capacity; 158 while (endPos >= 0) { 159 char c = '0' + (char)(value % 10); 160 buffer[endPos--] = c; 161 value /= 10; 162 } 163 } 164 165 /* replacement for snprintf(), which is not supported by C89. 166 * sprintf() would be the supported one, but it's labelled unsafe: 167 * modern static analyzer will flag sprintf() as dangerous, making it unusable. 168 * formatString_u() replaces snprintf() for the specific case where there is only one %u argument */ 169 static int formatString_u(char* buffer, size_t buffer_size, const char* formatString, unsigned int value) 170 { 171 size_t const valueSize = uintSize(value); 172 size_t written = 0; 173 int i; 174 175 for (i = 0; formatString[i] != '\0' && written < buffer_size - 1; i++) { 176 if (formatString[i] != '%') { 177 buffer[written++] = formatString[i]; 178 continue; 179 } 180 181 i++; 182 if (formatString[i] == 'u') { 183 if (written + valueSize >= buffer_size) abort(); /* buffer not large enough */ 184 writeUint_varLen(buffer + written, buffer_size - written, value); 185 written += valueSize; 186 } else if (formatString[i] == '%') { /* Check for escaped percent sign */ 187 buffer[written++] = '%'; 188 } else { 189 abort(); /* unsupported format */ 190 } 191 } 192 193 if (written < buffer_size) { 194 buffer[written] = '\0'; 195 } else { 196 abort(); /* buffer not large enough */ 197 } 198 199 return (int)written; 200 } 201 202 /* ************************************* 203 * Benchmark Parameters 204 ***************************************/ 205 206 BMK_advancedParams_t BMK_initAdvancedParams(void) 207 { 208 BMK_advancedParams_t const res = { 209 BMK_both, /* mode */ 210 BMK_TIMETEST_DEFAULT_S, /* nbSeconds */ 211 0, /* blockSize */ 212 0, /* targetCBlockSize */ 213 0, /* nbWorkers */ 214 0, /* realTime */ 215 0, /* additionalParam */ 216 0, /* ldmFlag */ 217 0, /* ldmMinMatch */ 218 0, /* ldmHashLog */ 219 0, /* ldmBuckSizeLog */ 220 0, /* ldmHashRateLog */ 221 ZSTD_ps_auto, /* literalCompressionMode */ 222 0 /* useRowMatchFinder */ 223 }; 224 return res; 225 } 226 227 /* ******************************************************** 228 * Bench functions 229 **********************************************************/ 230 typedef struct { 231 const void* srcPtr; 232 size_t srcSize; 233 void* cPtr; 234 size_t cRoom; 235 size_t cSize; 236 void* resPtr; 237 size_t resSize; 238 } blockParam_t; 239 240 #undef MIN 241 #undef MAX 242 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 243 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 244 245 static void BMK_initCCtx( 246 ZSTD_CCtx* ctx, 247 const void* dictBuffer, 248 size_t dictBufferSize, 249 int cLevel, 250 const ZSTD_compressionParameters* comprParams, 251 const BMK_advancedParams_t* adv) 252 { 253 ZSTD_CCtx_reset(ctx, ZSTD_reset_session_and_parameters); 254 if (adv->nbWorkers == 1) { 255 CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, 0)); 256 } else { 257 CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers)); 258 } 259 CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel)); 260 CHECK_Z(ZSTD_CCtx_setParameter( 261 ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder)); 262 CHECK_Z(ZSTD_CCtx_setParameter( 263 ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag)); 264 CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch)); 265 CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog)); 266 CHECK_Z(ZSTD_CCtx_setParameter( 267 ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog)); 268 CHECK_Z(ZSTD_CCtx_setParameter( 269 ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog)); 270 CHECK_Z(ZSTD_CCtx_setParameter( 271 ctx, ZSTD_c_windowLog, (int)comprParams->windowLog)); 272 CHECK_Z(ZSTD_CCtx_setParameter( 273 ctx, ZSTD_c_hashLog, (int)comprParams->hashLog)); 274 CHECK_Z(ZSTD_CCtx_setParameter( 275 ctx, ZSTD_c_chainLog, (int)comprParams->chainLog)); 276 CHECK_Z(ZSTD_CCtx_setParameter( 277 ctx, ZSTD_c_searchLog, (int)comprParams->searchLog)); 278 CHECK_Z(ZSTD_CCtx_setParameter( 279 ctx, ZSTD_c_minMatch, (int)comprParams->minMatch)); 280 CHECK_Z(ZSTD_CCtx_setParameter( 281 ctx, ZSTD_c_targetLength, (int)comprParams->targetLength)); 282 CHECK_Z(ZSTD_CCtx_setParameter( 283 ctx, 284 ZSTD_c_literalCompressionMode, 285 (int)adv->literalCompressionMode)); 286 CHECK_Z(ZSTD_CCtx_setParameter( 287 ctx, ZSTD_c_strategy, (int)comprParams->strategy)); 288 CHECK_Z(ZSTD_CCtx_setParameter( 289 ctx, ZSTD_c_targetCBlockSize, (int)adv->targetCBlockSize)); 290 CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize)); 291 } 292 293 static void 294 BMK_initDCtx(ZSTD_DCtx* dctx, const void* dictBuffer, size_t dictBufferSize) 295 { 296 CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters)); 297 CHECK_Z(ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize)); 298 } 299 300 typedef struct { 301 ZSTD_CCtx* cctx; 302 const void* dictBuffer; 303 size_t dictBufferSize; 304 int cLevel; 305 const ZSTD_compressionParameters* comprParams; 306 const BMK_advancedParams_t* adv; 307 } BMK_initCCtxArgs; 308 309 static size_t local_initCCtx(void* payload) 310 { 311 BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload; 312 BMK_initCCtx( 313 ag->cctx, 314 ag->dictBuffer, 315 ag->dictBufferSize, 316 ag->cLevel, 317 ag->comprParams, 318 ag->adv); 319 return 0; 320 } 321 322 typedef struct { 323 ZSTD_DCtx* dctx; 324 const void* dictBuffer; 325 size_t dictBufferSize; 326 } BMK_initDCtxArgs; 327 328 static size_t local_initDCtx(void* payload) 329 { 330 BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload; 331 BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize); 332 return 0; 333 } 334 335 /* `addArgs` is the context */ 336 static size_t local_defaultCompress( 337 const void* srcBuffer, 338 size_t srcSize, 339 void* dstBuffer, 340 size_t dstSize, 341 void* addArgs) 342 { 343 ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs; 344 return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize); 345 } 346 347 /* `addArgs` is the context */ 348 static size_t local_defaultDecompress( 349 const void* srcBuffer, 350 size_t srcSize, 351 void* dstBuffer, 352 size_t dstCapacity, 353 void* addArgs) 354 { 355 size_t moreToFlush = 1; 356 ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs; 357 ZSTD_inBuffer in; 358 ZSTD_outBuffer out; 359 in.src = srcBuffer; 360 in.size = srcSize; 361 in.pos = 0; 362 out.dst = dstBuffer; 363 out.size = dstCapacity; 364 out.pos = 0; 365 while (moreToFlush) { 366 if (out.pos == out.size) { 367 return (size_t)-ZSTD_error_dstSize_tooSmall; 368 } 369 moreToFlush = ZSTD_decompressStream(dctx, &out, &in); 370 if (ZSTD_isError(moreToFlush)) { 371 return moreToFlush; 372 } 373 } 374 return out.pos; 375 } 376 377 /* ================================================================= */ 378 /* Benchmark Zstandard, mem-to-mem scenarios */ 379 /* ================================================================= */ 380 381 int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome) 382 { 383 return outcome.tag == 0; 384 } 385 386 BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome) 387 { 388 assert(outcome.tag == 0); 389 return outcome.internal_never_use_directly; 390 } 391 392 static BMK_benchOutcome_t BMK_benchOutcome_error(void) 393 { 394 BMK_benchOutcome_t b; 395 memset(&b, 0, sizeof(b)); 396 b.tag = 1; 397 return b; 398 } 399 400 static BMK_benchOutcome_t BMK_benchOutcome_setValidResult( 401 BMK_benchResult_t result) 402 { 403 BMK_benchOutcome_t b; 404 b.tag = 0; 405 b.internal_never_use_directly = result; 406 return b; 407 } 408 409 /* benchMem with no allocation */ 410 static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc( 411 const void** srcPtrs, 412 size_t* srcSizes, 413 void** cPtrs, 414 size_t* cCapacities, 415 size_t* cSizes, 416 void** resPtrs, 417 size_t* resSizes, 418 void** resultBufferPtr, 419 void* compressedBuffer, 420 size_t maxCompressedSize, 421 BMK_timedFnState_t* timeStateCompress, 422 BMK_timedFnState_t* timeStateDecompress, 423 424 const void* srcBuffer, 425 size_t srcSize, 426 const size_t* fileSizes, 427 unsigned nbFiles, 428 const int cLevel, 429 const ZSTD_compressionParameters* comprParams, 430 const void* dictBuffer, 431 size_t dictBufferSize, 432 ZSTD_CCtx* cctx, 433 ZSTD_DCtx* dctx, 434 int displayLevel, 435 const char* displayName, 436 const BMK_advancedParams_t* adv) 437 { 438 size_t const blockSize = 439 ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly)) 440 ? adv->blockSize 441 : srcSize) 442 + (!srcSize); /* avoid div by 0 */ 443 BMK_benchResult_t benchResult; 444 size_t const loadedCompressedSize = srcSize; 445 size_t cSize = 0; 446 double ratio = 0.; 447 U32 nbBlocks; 448 449 assert(cctx != NULL); 450 assert(dctx != NULL); 451 452 /* init */ 453 memset(&benchResult, 0, sizeof(benchResult)); 454 if (strlen(displayName) > 17) 455 displayName += 456 strlen(displayName) - 17; /* display last 17 characters */ 457 if (adv->mode == BMK_decodeOnly) { 458 /* benchmark only decompression : source must be already compressed */ 459 const char* srcPtr = (const char*)srcBuffer; 460 U64 totalDSize64 = 0; 461 U32 fileNb; 462 for (fileNb = 0; fileNb < nbFiles; fileNb++) { 463 U64 const fSize64 = 464 ZSTD_findDecompressedSize(srcPtr, fileSizes[fileNb]); 465 if (fSize64 == ZSTD_CONTENTSIZE_UNKNOWN) { 466 RETURN_ERROR( 467 32, 468 BMK_benchOutcome_t, 469 "Decompressed size cannot be determined: cannot benchmark"); 470 } 471 if (fSize64 == ZSTD_CONTENTSIZE_ERROR) { 472 RETURN_ERROR( 473 32, 474 BMK_benchOutcome_t, 475 "Error while trying to assess decompressed size: data may be invalid"); 476 } 477 totalDSize64 += fSize64; 478 srcPtr += fileSizes[fileNb]; 479 } 480 { 481 size_t const decodedSize = (size_t)totalDSize64; 482 assert((U64)decodedSize == totalDSize64); /* check overflow */ 483 free(*resultBufferPtr); 484 if (totalDSize64 > decodedSize) { /* size_t overflow */ 485 RETURN_ERROR( 486 32, 487 BMK_benchOutcome_t, 488 "decompressed size is too large for local system"); 489 } 490 *resultBufferPtr = malloc(decodedSize); 491 if (!(*resultBufferPtr)) { 492 RETURN_ERROR( 493 33, 494 BMK_benchOutcome_t, 495 "allocation error: not enough memory"); 496 } 497 cSize = srcSize; 498 srcSize = decodedSize; 499 ratio = (double)srcSize / (double)cSize; 500 } 501 } 502 503 /* Init data blocks */ 504 { 505 const char* srcPtr = (const char*)srcBuffer; 506 char* cPtr = (char*)compressedBuffer; 507 char* resPtr = (char*)(*resultBufferPtr); 508 U32 fileNb; 509 for (nbBlocks = 0, fileNb = 0; fileNb < nbFiles; fileNb++) { 510 size_t remaining = fileSizes[fileNb]; 511 U32 const nbBlocksforThisFile = (adv->mode == BMK_decodeOnly) 512 ? 1 513 : (U32)((remaining + (blockSize - 1)) / blockSize); 514 U32 const blockEnd = nbBlocks + nbBlocksforThisFile; 515 for (; nbBlocks < blockEnd; nbBlocks++) { 516 size_t const thisBlockSize = MIN(remaining, blockSize); 517 srcPtrs[nbBlocks] = srcPtr; 518 srcSizes[nbBlocks] = thisBlockSize; 519 cPtrs[nbBlocks] = cPtr; 520 cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly) 521 ? thisBlockSize 522 : ZSTD_compressBound(thisBlockSize); 523 resPtrs[nbBlocks] = resPtr; 524 resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) 525 ? (size_t)ZSTD_findDecompressedSize( 526 srcPtr, thisBlockSize) 527 : thisBlockSize; 528 srcPtr += thisBlockSize; 529 cPtr += cCapacities[nbBlocks]; 530 resPtr += thisBlockSize; 531 remaining -= thisBlockSize; 532 if (adv->mode == BMK_decodeOnly) { 533 cSizes[nbBlocks] = thisBlockSize; 534 benchResult.cSize = thisBlockSize; 535 } 536 } 537 } 538 } 539 540 /* warming up `compressedBuffer` */ 541 if (adv->mode == BMK_decodeOnly) { 542 memcpy(compressedBuffer, srcBuffer, loadedCompressedSize); 543 } else { 544 RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1); 545 } 546 547 if (!UTIL_support_MT_measurements() && adv->nbWorkers > 1) { 548 OUTPUTLEVEL( 549 2, 550 "Warning : time measurements may be incorrect in multithreading mode... \n") 551 } 552 553 /* Bench */ 554 { 555 U64 const crcOrig = (adv->mode == BMK_decodeOnly) 556 ? 0 557 : XXH64(srcBuffer, srcSize, 0); 558 #define NB_MARKS 4 559 const char* marks[NB_MARKS] = { " |", " /", " =", " \\" }; 560 U32 markNb = 0; 561 int compressionCompleted = (adv->mode == BMK_decodeOnly); 562 int decompressionCompleted = (adv->mode == BMK_compressOnly); 563 BMK_benchParams_t cbp, dbp; 564 BMK_initCCtxArgs cctxprep; 565 BMK_initDCtxArgs dctxprep; 566 567 cbp.benchFn = local_defaultCompress; /* ZSTD_compress2 */ 568 cbp.benchPayload = cctx; 569 cbp.initFn = local_initCCtx; /* BMK_initCCtx */ 570 cbp.initPayload = &cctxprep; 571 cbp.errorFn = ZSTD_isError; 572 cbp.blockCount = nbBlocks; 573 cbp.srcBuffers = srcPtrs; 574 cbp.srcSizes = srcSizes; 575 cbp.dstBuffers = cPtrs; 576 cbp.dstCapacities = cCapacities; 577 cbp.blockResults = cSizes; 578 579 cctxprep.cctx = cctx; 580 cctxprep.dictBuffer = dictBuffer; 581 cctxprep.dictBufferSize = dictBufferSize; 582 cctxprep.cLevel = cLevel; 583 cctxprep.comprParams = comprParams; 584 cctxprep.adv = adv; 585 586 dbp.benchFn = local_defaultDecompress; 587 dbp.benchPayload = dctx; 588 dbp.initFn = local_initDCtx; 589 dbp.initPayload = &dctxprep; 590 dbp.errorFn = ZSTD_isError; 591 dbp.blockCount = nbBlocks; 592 dbp.srcBuffers = (const void* const*)cPtrs; 593 dbp.srcSizes = cSizes; 594 dbp.dstBuffers = resPtrs; 595 dbp.dstCapacities = resSizes; 596 dbp.blockResults = NULL; 597 598 dctxprep.dctx = dctx; 599 dctxprep.dictBuffer = dictBuffer; 600 dctxprep.dictBufferSize = dictBufferSize; 601 602 OUTPUTLEVEL(2, "\r%70s\r", ""); /* blank line */ 603 assert(srcSize < UINT_MAX); 604 OUTPUTLEVEL( 605 2, 606 "%2s-%-17.17s :%10u -> \r", 607 marks[markNb], 608 displayName, 609 (unsigned)srcSize); 610 611 while (!(compressionCompleted && decompressionCompleted)) { 612 if (!compressionCompleted) { 613 BMK_runOutcome_t const cOutcome = 614 BMK_benchTimedFn(timeStateCompress, cbp); 615 616 if (!BMK_isSuccessful_runOutcome(cOutcome)) { 617 RETURN_ERROR(30, BMK_benchOutcome_t, "compression error"); 618 } 619 620 { 621 BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome); 622 cSize = cResult.sumOfReturn; 623 ratio = (double)srcSize / (double)cSize; 624 { 625 BMK_benchResult_t newResult; 626 newResult.cSpeed = 627 (U64)((double)srcSize * TIMELOOP_NANOSEC 628 / cResult.nanoSecPerRun); 629 benchResult.cSize = cSize; 630 if (newResult.cSpeed > benchResult.cSpeed) 631 benchResult.cSpeed = newResult.cSpeed; 632 } 633 } 634 635 { 636 int const ratioDigits = 1 + (ratio < 100.) + (ratio < 10.); 637 assert(cSize < UINT_MAX); 638 OUTPUTLEVEL( 639 2, 640 "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s \r", 641 marks[markNb], 642 displayName, 643 (unsigned)srcSize, 644 (unsigned)cSize, 645 ratioDigits, 646 ratio, 647 benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1, 648 (double)benchResult.cSpeed / MB_UNIT); 649 } 650 compressionCompleted = 651 BMK_isCompleted_TimedFn(timeStateCompress); 652 } 653 654 if (!decompressionCompleted) { 655 BMK_runOutcome_t const dOutcome = 656 BMK_benchTimedFn(timeStateDecompress, dbp); 657 658 if (!BMK_isSuccessful_runOutcome(dOutcome)) { 659 RETURN_ERROR(30, BMK_benchOutcome_t, "decompression error"); 660 } 661 662 { 663 BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome); 664 U64 const newDSpeed = 665 (U64)((double)srcSize * TIMELOOP_NANOSEC 666 / dResult.nanoSecPerRun); 667 if (newDSpeed > benchResult.dSpeed) 668 benchResult.dSpeed = newDSpeed; 669 } 670 671 { 672 int const ratioDigits = 1 + (ratio < 100.) + (ratio < 10.); 673 OUTPUTLEVEL( 674 2, 675 "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s, %6.1f MB/s\r", 676 marks[markNb], 677 displayName, 678 (unsigned)srcSize, 679 (unsigned)cSize, 680 ratioDigits, 681 ratio, 682 benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1, 683 (double)benchResult.cSpeed / MB_UNIT, 684 (double)benchResult.dSpeed / MB_UNIT); 685 } 686 decompressionCompleted = 687 BMK_isCompleted_TimedFn(timeStateDecompress); 688 } 689 markNb = (markNb + 1) % NB_MARKS; 690 } /* while (!(compressionCompleted && decompressionCompleted)) */ 691 692 /* CRC Checking */ 693 { 694 const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr); 695 U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); 696 if ((adv->mode == BMK_both) && (crcOrig != crcCheck)) { 697 size_t u; 698 DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", 699 displayName, 700 (unsigned)crcOrig, 701 (unsigned)crcCheck); 702 for (u = 0; u < srcSize; u++) { 703 if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) { 704 unsigned segNb, bNb, pos; 705 size_t bacc = 0; 706 DISPLAY("Decoding error at pos %u ", (unsigned)u); 707 for (segNb = 0; segNb < nbBlocks; segNb++) { 708 if (bacc + srcSizes[segNb] > u) 709 break; 710 bacc += srcSizes[segNb]; 711 } 712 pos = (U32)(u - bacc); 713 bNb = pos / (128 KB); 714 DISPLAY("(sample %u, block %u, pos %u) \n", 715 segNb, 716 bNb, 717 pos); 718 { 719 size_t const lowest = (u > 5) ? 5 : u; 720 size_t n; 721 DISPLAY("origin: "); 722 for (n = lowest; n > 0; n--) 723 DISPLAY("%02X ", 724 ((const BYTE*)srcBuffer)[u - n]); 725 DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]); 726 for (n = 1; n < 3; n++) 727 DISPLAY("%02X ", 728 ((const BYTE*)srcBuffer)[u + n]); 729 DISPLAY(" \n"); 730 DISPLAY("decode: "); 731 for (n = lowest; n > 0; n--) 732 DISPLAY("%02X ", resultBuffer[u - n]); 733 DISPLAY(" :%02X: ", resultBuffer[u]); 734 for (n = 1; n < 3; n++) 735 DISPLAY("%02X ", resultBuffer[u + n]); 736 DISPLAY(" \n"); 737 } 738 break; 739 } 740 if (u == srcSize - 1) { /* should never happen */ 741 DISPLAY("no difference detected\n"); 742 } 743 } /* for (u=0; u<srcSize; u++) */ 744 } /* if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) */ 745 } /* CRC Checking */ 746 747 if (displayLevel 748 == 1) { /* hidden display mode -q, used by python speed benchmark */ 749 double const cSpeed = (double)benchResult.cSpeed / MB_UNIT; 750 double const dSpeed = (double)benchResult.dSpeed / MB_UNIT; 751 if (adv->additionalParam) { 752 OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", 753 cLevel, 754 (int)cSize, 755 ratio, 756 cSpeed, 757 dSpeed, 758 displayName, 759 adv->additionalParam); 760 } else { 761 OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", 762 cLevel, 763 (int)cSize, 764 ratio, 765 cSpeed, 766 dSpeed, 767 displayName); 768 } 769 } 770 771 OUTPUTLEVEL(2, "%2i#\n", cLevel); 772 } /* Bench */ 773 774 benchResult.cMem = 775 (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx); 776 return BMK_benchOutcome_setValidResult(benchResult); 777 } 778 779 BMK_benchOutcome_t BMK_benchMemAdvanced( 780 const void* srcBuffer, 781 size_t srcSize, 782 void* dstBuffer, 783 size_t dstCapacity, 784 const size_t* fileSizes, 785 unsigned nbFiles, 786 int cLevel, 787 const ZSTD_compressionParameters* comprParams, 788 const void* dictBuffer, 789 size_t dictBufferSize, 790 int displayLevel, 791 const char* displayName, 792 const BMK_advancedParams_t* adv) 793 794 { 795 int const dstParamsError = 796 !dstBuffer ^ !dstCapacity; /* must be both NULL or none */ 797 798 size_t const blockSize = 799 ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly)) 800 ? adv->blockSize 801 : srcSize) 802 + (!srcSize) /* avoid div by 0 */; 803 U32 const maxNbBlocks = 804 (U32)((srcSize + (blockSize - 1)) / blockSize) + nbFiles; 805 806 /* these are the blockTable parameters, just split up */ 807 const void** const srcPtrs = 808 (const void**)malloc(maxNbBlocks * sizeof(void*)); 809 size_t* const srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); 810 811 void** const cPtrs = (void**)malloc(maxNbBlocks * sizeof(void*)); 812 size_t* const cSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); 813 size_t* const cCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); 814 815 void** const resPtrs = (void**)malloc(maxNbBlocks * sizeof(void*)); 816 size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); 817 818 BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState( 819 adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS); 820 BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState( 821 adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS); 822 823 ZSTD_CCtx* const cctx = ZSTD_createCCtx(); 824 ZSTD_DCtx* const dctx = ZSTD_createDCtx(); 825 826 const size_t maxCompressedSize = dstCapacity 827 ? dstCapacity 828 : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); 829 830 void* const internalDstBuffer = 831 dstBuffer ? NULL : malloc(maxCompressedSize); 832 void* const compressedBuffer = dstBuffer ? dstBuffer : internalDstBuffer; 833 834 BMK_benchOutcome_t outcome = 835 BMK_benchOutcome_error(); /* error by default */ 836 837 void* resultBuffer = srcSize ? malloc(srcSize) : NULL; 838 839 int const allocationincomplete = !srcPtrs || !srcSizes || !cPtrs || !cSizes 840 || !cCapacities || !resPtrs || !resSizes || !timeStateCompress 841 || !timeStateDecompress || !cctx || !dctx || !compressedBuffer 842 || !resultBuffer; 843 844 if (!allocationincomplete && !dstParamsError) { 845 outcome = BMK_benchMemAdvancedNoAlloc( 846 srcPtrs, 847 srcSizes, 848 cPtrs, 849 cCapacities, 850 cSizes, 851 resPtrs, 852 resSizes, 853 &resultBuffer, 854 compressedBuffer, 855 maxCompressedSize, 856 timeStateCompress, 857 timeStateDecompress, 858 srcBuffer, 859 srcSize, 860 fileSizes, 861 nbFiles, 862 cLevel, 863 comprParams, 864 dictBuffer, 865 dictBufferSize, 866 cctx, 867 dctx, 868 displayLevel, 869 displayName, 870 adv); 871 } 872 873 /* clean up */ 874 BMK_freeTimedFnState(timeStateCompress); 875 BMK_freeTimedFnState(timeStateDecompress); 876 877 ZSTD_freeCCtx(cctx); 878 ZSTD_freeDCtx(dctx); 879 880 free(internalDstBuffer); 881 free(resultBuffer); 882 883 free((void*)srcPtrs); 884 free(srcSizes); 885 free(cPtrs); 886 free(cSizes); 887 free(cCapacities); 888 free(resPtrs); 889 free(resSizes); 890 891 if (allocationincomplete) { 892 RETURN_ERROR( 893 31, BMK_benchOutcome_t, "allocation error : not enough memory"); 894 } 895 896 if (dstParamsError) { 897 RETURN_ERROR(32, BMK_benchOutcome_t, "Dst parameters not coherent"); 898 } 899 return outcome; 900 } 901 902 BMK_benchOutcome_t BMK_benchMem( 903 const void* srcBuffer, 904 size_t srcSize, 905 const size_t* fileSizes, 906 unsigned nbFiles, 907 int cLevel, 908 const ZSTD_compressionParameters* comprParams, 909 const void* dictBuffer, 910 size_t dictBufferSize, 911 int displayLevel, 912 const char* displayName) 913 { 914 BMK_advancedParams_t const adv = BMK_initAdvancedParams(); 915 return BMK_benchMemAdvanced( 916 srcBuffer, 917 srcSize, 918 NULL, 919 0, 920 fileSizes, 921 nbFiles, 922 cLevel, 923 comprParams, 924 dictBuffer, 925 dictBufferSize, 926 displayLevel, 927 displayName, 928 &adv); 929 } 930 931 /* @return: 0 on success, !0 if error */ 932 static int BMK_benchCLevels( 933 const void* srcBuffer, 934 size_t benchedSize, 935 const size_t* fileSizes, 936 unsigned nbFiles, 937 int startCLevel, int endCLevel, 938 const ZSTD_compressionParameters* comprParams, 939 const void* dictBuffer, 940 size_t dictBufferSize, 941 int displayLevel, 942 const char* displayName, 943 BMK_advancedParams_t const* const adv) 944 { 945 int level; 946 const char* pch = strrchr(displayName, '\\'); /* Windows */ 947 if (!pch) 948 pch = strrchr(displayName, '/'); /* Linux */ 949 if (pch) 950 displayName = pch + 1; 951 952 if (endCLevel > ZSTD_maxCLevel()) { 953 DISPLAYLEVEL(1, "Invalid Compression Level \n"); 954 return 15; 955 } 956 if (endCLevel < startCLevel) { 957 DISPLAYLEVEL(1, "Invalid Compression Level Range \n"); 958 return 15; 959 } 960 961 if (adv->realTime) { 962 DISPLAYLEVEL(2, "Note : switching to real-time priority \n"); 963 SET_REALTIME_PRIORITY; 964 } 965 966 if (displayLevel == 1 && !adv->additionalParam) /* --quiet mode */ 967 OUTPUT("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", 968 ZSTD_VERSION_STRING, 969 ZSTD_GIT_COMMIT_STRING, 970 (unsigned)benchedSize, 971 adv->nbSeconds, 972 (unsigned)(adv->blockSize >> 10)); 973 974 for (level = startCLevel; level <= endCLevel; level++) { 975 BMK_benchOutcome_t res = BMK_benchMemAdvanced( 976 srcBuffer, 977 benchedSize, 978 NULL, 979 0, 980 fileSizes, 981 nbFiles, 982 level, 983 comprParams, 984 dictBuffer, 985 dictBufferSize, 986 displayLevel, 987 displayName, 988 adv); 989 if (!BMK_isSuccessful_benchOutcome(res)) return 1; 990 } 991 return 0; 992 } 993 994 int BMK_syntheticTest( 995 double compressibility, 996 int startingCLevel, int endCLevel, 997 const ZSTD_compressionParameters* compressionParams, 998 int displayLevel, 999 const BMK_advancedParams_t* adv) 1000 { 1001 char nameBuff[20] = { 0 }; 1002 const char* name = nameBuff; 1003 size_t const benchedSize = adv->blockSize ? adv->blockSize : 10000000; 1004 1005 /* Memory allocation */ 1006 void* const srcBuffer = malloc(benchedSize); 1007 if (!srcBuffer) { 1008 DISPLAYLEVEL(1, "allocation error : not enough memory \n"); 1009 return 16; 1010 } 1011 1012 /* Fill input buffer */ 1013 if (compressibility < 0.0) { 1014 LOREM_genBuffer(srcBuffer, benchedSize, 0); 1015 name = "Lorem ipsum"; 1016 } else { 1017 RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); 1018 formatString_u( 1019 nameBuff, 1020 sizeof(nameBuff), 1021 "Synthetic %u%%", 1022 (unsigned)(compressibility * 100)); 1023 } 1024 1025 /* Bench */ 1026 { int res = BMK_benchCLevels( 1027 srcBuffer, 1028 benchedSize, 1029 &benchedSize, 1030 1, 1031 startingCLevel, endCLevel, 1032 compressionParams, 1033 NULL, 1034 0, /* dictionary */ 1035 displayLevel, 1036 name, 1037 adv); 1038 free(srcBuffer); 1039 return res; 1040 } 1041 } 1042 1043 static size_t BMK_findMaxMem(U64 requiredMem) 1044 { 1045 size_t const step = 64 MB; 1046 BYTE* testmem = NULL; 1047 1048 requiredMem = (((requiredMem >> 26) + 1) << 26); 1049 requiredMem += step; 1050 if (requiredMem > maxMemory) 1051 requiredMem = maxMemory; 1052 1053 do { 1054 testmem = (BYTE*)malloc((size_t)requiredMem); 1055 requiredMem -= step; 1056 } while (!testmem && requiredMem > 0); 1057 1058 free(testmem); 1059 return (size_t)(requiredMem); 1060 } 1061 1062 /*! BMK_loadFiles() : 1063 * Loads `buffer` with content of files listed within `fileNamesTable`. 1064 * At most, fills `buffer` entirely. */ 1065 static int BMK_loadFiles( 1066 void* buffer, 1067 size_t bufferSize, 1068 size_t* fileSizes, 1069 const char* const* fileNamesTable, 1070 unsigned nbFiles, 1071 int displayLevel) 1072 { 1073 size_t pos = 0, totalSize = 0; 1074 unsigned n; 1075 for (n = 0; n < nbFiles; n++) { 1076 const char* const filename = fileNamesTable[n]; 1077 U64 fileSize = UTIL_getFileSize( 1078 filename); /* last file may be shortened */ 1079 if (UTIL_isDirectory(filename)) { 1080 DISPLAYLEVEL( 1081 2, "Ignoring %s directory... \n", filename); 1082 fileSizes[n] = 0; 1083 continue; 1084 } 1085 if (fileSize == UTIL_FILESIZE_UNKNOWN) { 1086 DISPLAYLEVEL( 1087 2, 1088 "Cannot evaluate size of %s, ignoring ... \n", 1089 filename); 1090 fileSizes[n] = 0; 1091 continue; 1092 } 1093 if (fileSize > bufferSize - pos) { 1094 /* buffer too small - limit quantity loaded */ 1095 fileSize = bufferSize - pos; 1096 nbFiles = n; /* stop after this file */ 1097 } 1098 1099 { FILE* const f = fopen(filename, "rb"); 1100 if (f == NULL) { 1101 RETURN_ERROR_INT( 1102 10, "cannot open file %s", filename); 1103 } 1104 OUTPUTLEVEL(2, "Loading %s... \r", filename); 1105 { size_t const readSize = 1106 fread(((char*)buffer) + pos, 1, (size_t)fileSize, f); 1107 if (readSize != (size_t)fileSize) { 1108 fclose(f); 1109 RETURN_ERROR_INT( 1110 11, "invalid read %s", filename); 1111 } 1112 pos += readSize; 1113 } 1114 fileSizes[n] = (size_t)fileSize; 1115 totalSize += (size_t)fileSize; 1116 fclose(f); 1117 } 1118 } 1119 1120 if (totalSize == 0) 1121 RETURN_ERROR_INT(12, "no data to bench"); 1122 return 0; 1123 } 1124 1125 int BMK_benchFilesAdvanced( 1126 const char* const* fileNamesTable, 1127 unsigned nbFiles, 1128 const char* dictFileName, 1129 int startCLevel, int endCLevel, 1130 const ZSTD_compressionParameters* compressionParams, 1131 int displayLevel, 1132 const BMK_advancedParams_t* adv) 1133 { 1134 void* srcBuffer = NULL; 1135 size_t benchedSize; 1136 void* dictBuffer = NULL; 1137 size_t dictBufferSize = 0; 1138 size_t* fileSizes = NULL; 1139 int res = 1; 1140 U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles); 1141 1142 if (!nbFiles) { 1143 DISPLAYLEVEL(1, "No Files to Benchmark"); 1144 return 13; 1145 } 1146 1147 if (endCLevel > ZSTD_maxCLevel()) { 1148 DISPLAYLEVEL(1, "Invalid Compression Level"); 1149 return 14; 1150 } 1151 1152 if (totalSizeToLoad == UTIL_FILESIZE_UNKNOWN) { 1153 DISPLAYLEVEL(1, "Error loading files"); 1154 return 15; 1155 } 1156 1157 fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t)); 1158 if (!fileSizes) { 1159 DISPLAYLEVEL(1, "not enough memory for fileSizes"); 1160 return 16; 1161 } 1162 1163 /* Load dictionary */ 1164 if (dictFileName != NULL) { 1165 U64 const dictFileSize = UTIL_getFileSize(dictFileName); 1166 if (dictFileSize == UTIL_FILESIZE_UNKNOWN) { 1167 DISPLAYLEVEL( 1168 1, 1169 "error loading %s : %s \n", 1170 dictFileName, 1171 strerror(errno)); 1172 free(fileSizes); 1173 DISPLAYLEVEL(1, "benchmark aborted"); 1174 return 17; 1175 } 1176 if (dictFileSize > 64 MB) { 1177 free(fileSizes); 1178 DISPLAYLEVEL(1, "dictionary file %s too large", dictFileName); 1179 return 18; 1180 } 1181 dictBufferSize = (size_t)dictFileSize; 1182 dictBuffer = malloc(dictBufferSize); 1183 if (dictBuffer == NULL) { 1184 free(fileSizes); 1185 DISPLAYLEVEL( 1186 1, 1187 "not enough memory for dictionary (%u bytes)", 1188 (unsigned)dictBufferSize); 1189 return 19; 1190 } 1191 1192 { 1193 int const errorCode = BMK_loadFiles( 1194 dictBuffer, 1195 dictBufferSize, 1196 fileSizes, 1197 &dictFileName /*?*/, 1198 1 /*?*/, 1199 displayLevel); 1200 if (errorCode) { 1201 goto _cleanUp; 1202 } 1203 } 1204 } 1205 1206 /* Memory allocation & restrictions */ 1207 benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3; 1208 if ((U64)benchedSize > totalSizeToLoad) 1209 benchedSize = (size_t)totalSizeToLoad; 1210 if (benchedSize < totalSizeToLoad) 1211 DISPLAY("Not enough memory; testing %u MB only...\n", 1212 (unsigned)(benchedSize >> 20)); 1213 1214 srcBuffer = benchedSize ? malloc(benchedSize) : NULL; 1215 if (!srcBuffer) { 1216 free(dictBuffer); 1217 free(fileSizes); 1218 DISPLAYLEVEL(1, "not enough memory for srcBuffer"); 1219 return 20; 1220 } 1221 1222 /* Load input buffer */ 1223 { 1224 int const errorCode = BMK_loadFiles( 1225 srcBuffer, 1226 benchedSize, 1227 fileSizes, 1228 fileNamesTable, 1229 nbFiles, 1230 displayLevel); 1231 if (errorCode) { 1232 goto _cleanUp; 1233 } 1234 } 1235 1236 /* Bench */ 1237 { 1238 char mfName[20] = { 0 }; 1239 formatString_u(mfName, sizeof(mfName), " %u files", nbFiles); 1240 { const char* const displayName = 1241 (nbFiles > 1) ? mfName : fileNamesTable[0]; 1242 res = BMK_benchCLevels( 1243 srcBuffer, 1244 benchedSize, 1245 fileSizes, 1246 nbFiles, 1247 startCLevel, endCLevel, 1248 compressionParams, 1249 dictBuffer, 1250 dictBufferSize, 1251 displayLevel, 1252 displayName, 1253 adv); 1254 } 1255 } 1256 1257 _cleanUp: 1258 free(srcBuffer); 1259 free(dictBuffer); 1260 free(fileSizes); 1261 return res; 1262 } 1263 1264 int BMK_benchFiles( 1265 const char* const* fileNamesTable, 1266 unsigned nbFiles, 1267 const char* dictFileName, 1268 int cLevel, 1269 const ZSTD_compressionParameters* compressionParams, 1270 int displayLevel) 1271 { 1272 BMK_advancedParams_t const adv = BMK_initAdvancedParams(); 1273 return BMK_benchFilesAdvanced( 1274 fileNamesTable, 1275 nbFiles, 1276 dictFileName, 1277 cLevel, cLevel, 1278 compressionParams, 1279 displayLevel, 1280 &adv); 1281 } 1282