1 /* crc32.c -- compute the CRC-32 of a data stream 2 * Copyright (C) 1995-2022 Mark Adler 3 * For conditions of distribution and use, see copyright notice in zlib.h 4 * 5 * This interleaved implementation of a CRC makes use of pipelined multiple 6 * arithmetic-logic units, commonly found in modern CPU cores. It is due to 7 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. 8 */ 9 10 /* 11 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore 12 protection on the static variables used to control the first-use generation 13 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should 14 first call get_crc_table() to initialize the tables before allowing more than 15 one thread to use crc32(). 16 17 MAKECRCH can be #defined to write out crc32.h. A main() routine is also 18 produced, so that this one source file can be compiled to an executable. 19 */ 20 21 #ifdef MAKECRCH 22 # include <stdio.h> 23 # ifndef DYNAMIC_CRC_TABLE 24 # define DYNAMIC_CRC_TABLE 25 # endif /* !DYNAMIC_CRC_TABLE */ 26 #endif /* MAKECRCH */ 27 28 #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ 29 30 /* 31 A CRC of a message is computed on N braids of words in the message, where 32 each word consists of W bytes (4 or 8). If N is 3, for example, then three 33 running sparse CRCs are calculated respectively on each braid, at these 34 indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... 35 This is done starting at a word boundary, and continues until as many blocks 36 of N * W bytes as are available have been processed. The results are combined 37 into a single CRC at the end. For this code, N must be in the range 1..6 and 38 W must be 4 or 8. The upper limit on N can be increased if desired by adding 39 more #if blocks, extending the patterns apparent in the code. In addition, 40 crc32.h would need to be regenerated, if the maximum N value is increased. 41 42 N and W are chosen empirically by benchmarking the execution time on a given 43 processor. The choices for N and W below were based on testing on Intel Kaby 44 Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 45 Octeon II processors. The Intel, AMD, and ARM processors were all fastest 46 with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. 47 They were all tested with either gcc or clang, all using the -O3 optimization 48 level. Your mileage may vary. 49 */ 50 51 /* Define N */ 52 #ifdef Z_TESTN 53 # define N Z_TESTN 54 #else 55 # define N 5 56 #endif 57 #if N < 1 || N > 6 58 # error N must be in 1..6 59 #endif 60 61 /* 62 z_crc_t must be at least 32 bits. z_word_t must be at least as long as 63 z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and 64 that bytes are eight bits. 65 */ 66 67 /* 68 Define W and the associated z_word_t type. If W is not defined, then a 69 braided calculation is not used, and the associated tables and code are not 70 compiled. 71 */ 72 #ifdef Z_TESTW 73 # if Z_TESTW-1 != -1 74 # define W Z_TESTW 75 # endif 76 #else 77 # ifdef MAKECRCH 78 # define W 8 /* required for MAKECRCH */ 79 # else 80 # if defined(__x86_64__) || defined(__aarch64__) 81 # define W 8 82 # else 83 # define W 4 84 # endif 85 # endif 86 #endif 87 #ifdef W 88 # if W == 8 && defined(Z_U8) 89 typedef Z_U8 z_word_t; 90 # elif defined(Z_U4) 91 # undef W 92 # define W 4 93 typedef Z_U4 z_word_t; 94 # else 95 # undef W 96 # endif 97 #endif 98 99 /* Local functions. */ 100 local z_crc_t multmodp OF((z_crc_t a, z_crc_t b)); 101 local z_crc_t x2nmodp OF((z_off64_t n, unsigned k)); 102 103 /* If available, use the ARM processor CRC32 instruction. */ 104 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 105 # define ARMCRC32 106 #endif 107 108 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) 109 /* 110 Swap the bytes in a z_word_t to convert between little and big endian. Any 111 self-respecting compiler will optimize this to a single machine byte-swap 112 instruction, if one is available. This assumes that word_t is either 32 bits 113 or 64 bits. 114 */ 115 local z_word_t byte_swap(z_word_t word) 116 { 117 # if W == 8 118 return 119 (word & 0xff00000000000000) >> 56 | 120 (word & 0xff000000000000) >> 40 | 121 (word & 0xff0000000000) >> 24 | 122 (word & 0xff00000000) >> 8 | 123 (word & 0xff000000) << 8 | 124 (word & 0xff0000) << 24 | 125 (word & 0xff00) << 40 | 126 (word & 0xff) << 56; 127 # else /* W == 4 */ 128 return 129 (word & 0xff000000) >> 24 | 130 (word & 0xff0000) >> 8 | 131 (word & 0xff00) << 8 | 132 (word & 0xff) << 24; 133 # endif 134 } 135 #endif 136 137 /* CRC polynomial. */ 138 #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ 139 140 #ifdef DYNAMIC_CRC_TABLE 141 142 local z_crc_t FAR crc_table[256]; 143 local z_crc_t FAR x2n_table[32]; 144 local void make_crc_table OF((void)); 145 #ifdef W 146 local z_word_t FAR crc_big_table[256]; 147 local z_crc_t FAR crc_braid_table[W][256]; 148 local z_word_t FAR crc_braid_big_table[W][256]; 149 local void braid OF((z_crc_t [][256], z_word_t [][256], int, int)); 150 #endif 151 #ifdef MAKECRCH 152 local void write_table OF((FILE *, const z_crc_t FAR *, int)); 153 local void write_table32hi OF((FILE *, const z_word_t FAR *, int)); 154 local void write_table64 OF((FILE *, const z_word_t FAR *, int)); 155 #endif /* MAKECRCH */ 156 157 /* 158 Define a once() function depending on the availability of atomics. If this is 159 compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in 160 multiple threads, and if atomics are not available, then get_crc_table() must 161 be called to initialize the tables and must return before any threads are 162 allowed to compute or combine CRCs. 163 */ 164 165 /* Definition of once functionality. */ 166 typedef struct once_s once_t; 167 local void once OF((once_t *, void (*)(void))); 168 169 /* Check for the availability of atomics. */ 170 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \ 171 !defined(__STDC_NO_ATOMICS__) 172 173 #include <stdatomic.h> 174 175 /* Structure for once(), which must be initialized with ONCE_INIT. */ 176 struct once_s { 177 atomic_flag begun; 178 atomic_int done; 179 }; 180 #define ONCE_INIT {ATOMIC_FLAG_INIT, 0} 181 182 /* 183 Run the provided init() function exactly once, even if multiple threads 184 invoke once() at the same time. The state must be a once_t initialized with 185 ONCE_INIT. 186 */ 187 local void once(state, init) 188 once_t *state; 189 void (*init)(void); 190 { 191 if (!atomic_load(&state->done)) { 192 if (atomic_flag_test_and_set(&state->begun)) 193 while (!atomic_load(&state->done)) 194 ; 195 else { 196 init(); 197 atomic_store(&state->done, 1); 198 } 199 } 200 } 201 202 #else /* no atomics */ 203 204 /* Structure for once(), which must be initialized with ONCE_INIT. */ 205 struct once_s { 206 volatile int begun; 207 volatile int done; 208 }; 209 #define ONCE_INIT {0, 0} 210 211 /* Test and set. Alas, not atomic, but tries to minimize the period of 212 vulnerability. */ 213 local int test_and_set OF((int volatile *)); 214 local int test_and_set(flag) 215 int volatile *flag; 216 { 217 int was; 218 219 was = *flag; 220 *flag = 1; 221 return was; 222 } 223 224 /* Run the provided init() function once. This is not thread-safe. */ 225 local void once(state, init) 226 once_t *state; 227 void (*init)(void); 228 { 229 if (!state->done) { 230 if (test_and_set(&state->begun)) 231 while (!state->done) 232 ; 233 else { 234 init(); 235 state->done = 1; 236 } 237 } 238 } 239 240 #endif 241 242 /* State for once(). */ 243 local once_t made = ONCE_INIT; 244 245 /* 246 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: 247 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. 248 249 Polynomials over GF(2) are represented in binary, one bit per coefficient, 250 with the lowest powers in the most significant bit. Then adding polynomials 251 is just exclusive-or, and multiplying a polynomial by x is a right shift by 252 one. If we call the above polynomial p, and represent a byte as the 253 polynomial q, also with the lowest power in the most significant bit (so the 254 byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, 255 where a mod b means the remainder after dividing a by b. 256 257 This calculation is done using the shift-register method of multiplying and 258 taking the remainder. The register is initialized to zero, and for each 259 incoming bit, x^32 is added mod p to the register if the bit is a one (where 260 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x 261 (which is shifting right by one and adding x^32 mod p if the bit shifted out 262 is a one). We start with the highest power (least significant bit) of q and 263 repeat for all eight bits of q. 264 265 The table is simply the CRC of all possible eight bit values. This is all the 266 information needed to generate CRCs on data a byte at a time for all 267 combinations of CRC register values and incoming bytes. 268 */ 269 270 local void make_crc_table() 271 { 272 unsigned i, j, n; 273 z_crc_t p; 274 275 /* initialize the CRC of bytes tables */ 276 for (i = 0; i < 256; i++) { 277 p = i; 278 for (j = 0; j < 8; j++) 279 p = p & 1 ? (p >> 1) ^ POLY : p >> 1; 280 crc_table[i] = p; 281 #ifdef W 282 crc_big_table[i] = byte_swap(p); 283 #endif 284 } 285 286 /* initialize the x^2^n mod p(x) table */ 287 p = (z_crc_t)1 << 30; /* x^1 */ 288 x2n_table[0] = p; 289 for (n = 1; n < 32; n++) 290 x2n_table[n] = p = multmodp(p, p); 291 292 #ifdef W 293 /* initialize the braiding tables -- needs x2n_table[] */ 294 braid(crc_braid_table, crc_braid_big_table, N, W); 295 #endif 296 297 #ifdef MAKECRCH 298 { 299 /* 300 The crc32.h header file contains tables for both 32-bit and 64-bit 301 z_word_t's, and so requires a 64-bit type be available. In that case, 302 z_word_t must be defined to be 64-bits. This code then also generates 303 and writes out the tables for the case that z_word_t is 32 bits. 304 */ 305 #if !defined(W) || W != 8 306 # error Need a 64-bit integer type in order to generate crc32.h. 307 #endif 308 FILE *out; 309 int k, n; 310 z_crc_t ltl[8][256]; 311 z_word_t big[8][256]; 312 313 out = fopen("crc32.h", "w"); 314 if (out == NULL) return; 315 316 /* write out little-endian CRC table to crc32.h */ 317 fprintf(out, 318 "/* crc32.h -- tables for rapid CRC calculation\n" 319 " * Generated automatically by crc32.c\n */\n" 320 "\n" 321 "local const z_crc_t FAR crc_table[] = {\n" 322 " "); 323 write_table(out, crc_table, 256); 324 fprintf(out, 325 "};\n"); 326 327 /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ 328 fprintf(out, 329 "\n" 330 "#ifdef W\n" 331 "\n" 332 "#if W == 8\n" 333 "\n" 334 "local const z_word_t FAR crc_big_table[] = {\n" 335 " "); 336 write_table64(out, crc_big_table, 256); 337 fprintf(out, 338 "};\n"); 339 340 /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ 341 fprintf(out, 342 "\n" 343 "#else /* W == 4 */\n" 344 "\n" 345 "local const z_word_t FAR crc_big_table[] = {\n" 346 " "); 347 write_table32hi(out, crc_big_table, 256); 348 fprintf(out, 349 "};\n" 350 "\n" 351 "#endif\n"); 352 353 /* write out braid tables for each value of N */ 354 for (n = 1; n <= 6; n++) { 355 fprintf(out, 356 "\n" 357 "#if N == %d\n", n); 358 359 /* compute braid tables for this N and 64-bit word_t */ 360 braid(ltl, big, n, 8); 361 362 /* write out braid tables for 64-bit z_word_t to crc32.h */ 363 fprintf(out, 364 "\n" 365 "#if W == 8\n" 366 "\n" 367 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 368 for (k = 0; k < 8; k++) { 369 fprintf(out, " {"); 370 write_table(out, ltl[k], 256); 371 fprintf(out, "}%s", k < 7 ? ",\n" : ""); 372 } 373 fprintf(out, 374 "};\n" 375 "\n" 376 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 377 for (k = 0; k < 8; k++) { 378 fprintf(out, " {"); 379 write_table64(out, big[k], 256); 380 fprintf(out, "}%s", k < 7 ? ",\n" : ""); 381 } 382 fprintf(out, 383 "};\n"); 384 385 /* compute braid tables for this N and 32-bit word_t */ 386 braid(ltl, big, n, 4); 387 388 /* write out braid tables for 32-bit z_word_t to crc32.h */ 389 fprintf(out, 390 "\n" 391 "#else /* W == 4 */\n" 392 "\n" 393 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 394 for (k = 0; k < 4; k++) { 395 fprintf(out, " {"); 396 write_table(out, ltl[k], 256); 397 fprintf(out, "}%s", k < 3 ? ",\n" : ""); 398 } 399 fprintf(out, 400 "};\n" 401 "\n" 402 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 403 for (k = 0; k < 4; k++) { 404 fprintf(out, " {"); 405 write_table32hi(out, big[k], 256); 406 fprintf(out, "}%s", k < 3 ? ",\n" : ""); 407 } 408 fprintf(out, 409 "};\n" 410 "\n" 411 "#endif\n" 412 "\n" 413 "#endif\n"); 414 } 415 fprintf(out, 416 "\n" 417 "#endif\n"); 418 419 /* write out zeros operator table to crc32.h */ 420 fprintf(out, 421 "\n" 422 "local const z_crc_t FAR x2n_table[] = {\n" 423 " "); 424 write_table(out, x2n_table, 32); 425 fprintf(out, 426 "};\n"); 427 fclose(out); 428 } 429 #endif /* MAKECRCH */ 430 } 431 432 #ifdef MAKECRCH 433 434 /* 435 Write the 32-bit values in table[0..k-1] to out, five per line in 436 hexadecimal separated by commas. 437 */ 438 local void write_table(out, table, k) 439 FILE *out; 440 const z_crc_t FAR *table; 441 int k; 442 { 443 int n; 444 445 for (n = 0; n < k; n++) 446 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 447 (unsigned long)(table[n]), 448 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 449 } 450 451 /* 452 Write the high 32-bits of each value in table[0..k-1] to out, five per line 453 in hexadecimal separated by commas. 454 */ 455 local void write_table32hi(out, table, k) 456 FILE *out; 457 const z_word_t FAR *table; 458 int k; 459 { 460 int n; 461 462 for (n = 0; n < k; n++) 463 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 464 (unsigned long)(table[n] >> 32), 465 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 466 } 467 468 /* 469 Write the 64-bit values in table[0..k-1] to out, three per line in 470 hexadecimal separated by commas. This assumes that if there is a 64-bit 471 type, then there is also a long long integer type, and it is at least 64 472 bits. If not, then the type cast and format string can be adjusted 473 accordingly. 474 */ 475 local void write_table64(out, table, k) 476 FILE *out; 477 const z_word_t FAR *table; 478 int k; 479 { 480 int n; 481 482 for (n = 0; n < k; n++) 483 fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", 484 (unsigned long long)(table[n]), 485 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); 486 } 487 488 /* Actually do the deed. */ 489 int main() 490 { 491 make_crc_table(); 492 return 0; 493 } 494 495 #endif /* MAKECRCH */ 496 497 #ifdef W 498 /* 499 Generate the little and big-endian braid tables for the given n and z_word_t 500 size w. Each array must have room for w blocks of 256 elements. 501 */ 502 local void braid(ltl, big, n, w) 503 z_crc_t ltl[][256]; 504 z_word_t big[][256]; 505 int n; 506 int w; 507 { 508 int k; 509 z_crc_t i, p, q; 510 for (k = 0; k < w; k++) { 511 p = x2nmodp((n * w + 3 - k) << 3, 0); 512 ltl[k][0] = 0; 513 big[w - 1 - k][0] = 0; 514 for (i = 1; i < 256; i++) { 515 ltl[k][i] = q = multmodp(i << 24, p); 516 big[w - 1 - k][i] = byte_swap(q); 517 } 518 } 519 } 520 #endif 521 522 #else /* !DYNAMIC_CRC_TABLE */ 523 /* ======================================================================== 524 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers 525 * of x for combining CRC-32s, all made by make_crc_table(). 526 */ 527 #include "crc32.h" 528 #endif /* DYNAMIC_CRC_TABLE */ 529 530 /* ======================================================================== 531 * Routines used for CRC calculation. Some are also required for the table 532 * generation above. 533 */ 534 535 /* 536 Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, 537 reflected. For speed, this requires that a not be zero. 538 */ 539 local z_crc_t multmodp(z_crc_t a, z_crc_t b) 540 { 541 z_crc_t m, p; 542 543 m = (z_crc_t)1 << 31; 544 p = 0; 545 for (;;) { 546 if (a & m) { 547 p ^= b; 548 if ((a & (m - 1)) == 0) 549 break; 550 } 551 m >>= 1; 552 b = b & 1 ? (b >> 1) ^ POLY : b >> 1; 553 } 554 return p; 555 } 556 557 /* 558 Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been 559 initialized. 560 */ 561 local z_crc_t x2nmodp(z_off64_t n, unsigned k) 562 { 563 z_crc_t p; 564 565 p = (z_crc_t)1 << 31; /* x^0 == 1 */ 566 while (n) { 567 if (n & 1) 568 p = multmodp(x2n_table[k & 31], p); 569 n >>= 1; 570 k++; 571 } 572 return p; 573 } 574 575 /* ========================================================================= 576 * This function can be used by asm versions of crc32(), and to force the 577 * generation of the CRC tables in a threaded application. 578 */ 579 const z_crc_t FAR * ZEXPORT get_crc_table(void) 580 { 581 #ifdef DYNAMIC_CRC_TABLE 582 once(&made, make_crc_table); 583 #endif /* DYNAMIC_CRC_TABLE */ 584 return (const z_crc_t FAR *)crc_table; 585 } 586 587 /* ========================================================================= 588 * Use ARM machine instructions if available. This will compute the CRC about 589 * ten times faster than the braided calculation. This code does not check for 590 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will 591 * only be defined if the compilation specifies an ARM processor architecture 592 * that has the instructions. For example, compiling with -march=armv8.1-a or 593 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32 594 * instructions. 595 */ 596 #ifdef ARMCRC32 597 598 /* 599 Constants empirically determined to maximize speed. These values are from 600 measurements on a Cortex-A57. Your mileage may vary. 601 */ 602 #define Z_BATCH 3990 /* number of words in a batch */ 603 #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ 604 #define Z_BATCH_MIN 800 /* fewest words in a final batch */ 605 606 unsigned long ZEXPORT crc32_z(crc, buf, len) 607 unsigned long crc; 608 const unsigned char FAR *buf; 609 z_size_t len; 610 { 611 z_crc_t val; 612 z_word_t crc1, crc2; 613 const z_word_t *word; 614 z_word_t val0, val1, val2; 615 z_size_t last, last2, i; 616 z_size_t num; 617 618 /* Return initial CRC, if requested. */ 619 if (buf == Z_NULL) return 0; 620 621 #ifdef DYNAMIC_CRC_TABLE 622 once(&made, make_crc_table); 623 #endif /* DYNAMIC_CRC_TABLE */ 624 625 /* Pre-condition the CRC */ 626 crc = (~crc) & 0xffffffff; 627 628 /* Compute the CRC up to a word boundary. */ 629 while (len && ((z_size_t)buf & 7) != 0) { 630 len--; 631 val = *buf++; 632 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 633 } 634 635 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ 636 word = (z_word_t const *)buf; 637 num = len >> 3; 638 len &= 7; 639 640 /* Do three interleaved CRCs to realize the throughput of one crc32x 641 instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three 642 CRCs are combined into a single CRC after each set of batches. */ 643 while (num >= 3 * Z_BATCH) { 644 crc1 = 0; 645 crc2 = 0; 646 for (i = 0; i < Z_BATCH; i++) { 647 val0 = word[i]; 648 val1 = word[i + Z_BATCH]; 649 val2 = word[i + 2 * Z_BATCH]; 650 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 651 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 652 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 653 } 654 word += 3 * Z_BATCH; 655 num -= 3 * Z_BATCH; 656 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; 657 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; 658 } 659 660 /* Do one last smaller batch with the remaining words, if there are enough 661 to pay for the combination of CRCs. */ 662 last = num / 3; 663 if (last >= Z_BATCH_MIN) { 664 last2 = last << 1; 665 crc1 = 0; 666 crc2 = 0; 667 for (i = 0; i < last; i++) { 668 val0 = word[i]; 669 val1 = word[i + last]; 670 val2 = word[i + last2]; 671 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 672 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 673 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 674 } 675 word += 3 * last; 676 num -= 3 * last; 677 val = x2nmodp(last, 6); 678 crc = multmodp(val, crc) ^ crc1; 679 crc = multmodp(val, crc) ^ crc2; 680 } 681 682 /* Compute the CRC on any remaining words. */ 683 for (i = 0; i < num; i++) { 684 val0 = word[i]; 685 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 686 } 687 word += num; 688 689 /* Complete the CRC on any remaining bytes. */ 690 buf = (const unsigned char FAR *)word; 691 while (len) { 692 len--; 693 val = *buf++; 694 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 695 } 696 697 /* Return the CRC, post-conditioned. */ 698 return crc ^ 0xffffffff; 699 } 700 701 #else 702 703 #ifdef W 704 705 /* 706 Return the CRC of the W bytes in the word_t data, taking the 707 least-significant byte of the word as the first byte of data, without any pre 708 or post conditioning. This is used to combine the CRCs of each braid. 709 */ 710 local z_crc_t crc_word(z_word_t data) 711 { 712 int k; 713 for (k = 0; k < W; k++) 714 data = (data >> 8) ^ crc_table[data & 0xff]; 715 return (z_crc_t)data; 716 } 717 718 local z_word_t crc_word_big(z_word_t data) 719 { 720 int k; 721 for (k = 0; k < W; k++) 722 data = (data << 8) ^ 723 crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; 724 return data; 725 } 726 727 #endif 728 729 /* ========================================================================= */ 730 unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, 731 z_size_t len) 732 { 733 /* Return initial CRC, if requested. */ 734 if (buf == Z_NULL) return 0; 735 736 #ifdef DYNAMIC_CRC_TABLE 737 once(&made, make_crc_table); 738 #endif /* DYNAMIC_CRC_TABLE */ 739 740 /* Pre-condition the CRC */ 741 crc = (~crc) & 0xffffffff; 742 743 #ifdef W 744 745 /* If provided enough bytes, do a braided CRC calculation. */ 746 if (len >= N * W + W - 1) { 747 z_size_t blks; 748 z_word_t const *words; 749 unsigned endian; 750 int k; 751 752 /* Compute the CRC up to a z_word_t boundary. */ 753 while (len && ((z_size_t)buf & (W - 1)) != 0) { 754 len--; 755 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 756 } 757 758 /* Compute the CRC on as many N z_word_t blocks as are available. */ 759 blks = len / (N * W); 760 len -= blks * N * W; 761 words = (z_word_t const *)buf; 762 763 /* Do endian check at execution time instead of compile time, since ARM 764 processors can change the endianess at execution time. If the 765 compiler knows what the endianess will be, it can optimize out the 766 check and the unused branch. */ 767 endian = 1; 768 if (*(unsigned char *)&endian) { 769 /* Little endian. */ 770 771 z_crc_t crc0; 772 z_word_t word0; 773 #if N > 1 774 z_crc_t crc1; 775 z_word_t word1; 776 #if N > 2 777 z_crc_t crc2; 778 z_word_t word2; 779 #if N > 3 780 z_crc_t crc3; 781 z_word_t word3; 782 #if N > 4 783 z_crc_t crc4; 784 z_word_t word4; 785 #if N > 5 786 z_crc_t crc5; 787 z_word_t word5; 788 #endif 789 #endif 790 #endif 791 #endif 792 #endif 793 794 /* Initialize the CRC for each braid. */ 795 crc0 = crc; 796 #if N > 1 797 crc1 = 0; 798 #if N > 2 799 crc2 = 0; 800 #if N > 3 801 crc3 = 0; 802 #if N > 4 803 crc4 = 0; 804 #if N > 5 805 crc5 = 0; 806 #endif 807 #endif 808 #endif 809 #endif 810 #endif 811 812 /* 813 Process the first blks-1 blocks, computing the CRCs on each braid 814 independently. 815 */ 816 while (--blks) { 817 /* Load the word for each braid into registers. */ 818 word0 = crc0 ^ words[0]; 819 #if N > 1 820 word1 = crc1 ^ words[1]; 821 #if N > 2 822 word2 = crc2 ^ words[2]; 823 #if N > 3 824 word3 = crc3 ^ words[3]; 825 #if N > 4 826 word4 = crc4 ^ words[4]; 827 #if N > 5 828 word5 = crc5 ^ words[5]; 829 #endif 830 #endif 831 #endif 832 #endif 833 #endif 834 words += N; 835 836 /* Compute and update the CRC for each word. The loop should 837 get unrolled. */ 838 crc0 = crc_braid_table[0][word0 & 0xff]; 839 #if N > 1 840 crc1 = crc_braid_table[0][word1 & 0xff]; 841 #if N > 2 842 crc2 = crc_braid_table[0][word2 & 0xff]; 843 #if N > 3 844 crc3 = crc_braid_table[0][word3 & 0xff]; 845 #if N > 4 846 crc4 = crc_braid_table[0][word4 & 0xff]; 847 #if N > 5 848 crc5 = crc_braid_table[0][word5 & 0xff]; 849 #endif 850 #endif 851 #endif 852 #endif 853 #endif 854 for (k = 1; k < W; k++) { 855 crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; 856 #if N > 1 857 crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; 858 #if N > 2 859 crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; 860 #if N > 3 861 crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; 862 #if N > 4 863 crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; 864 #if N > 5 865 crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; 866 #endif 867 #endif 868 #endif 869 #endif 870 #endif 871 } 872 } 873 874 /* 875 Process the last block, combining the CRCs of the N braids at the 876 same time. 877 */ 878 crc = crc_word(crc0 ^ words[0]); 879 #if N > 1 880 crc = crc_word(crc1 ^ words[1] ^ crc); 881 #if N > 2 882 crc = crc_word(crc2 ^ words[2] ^ crc); 883 #if N > 3 884 crc = crc_word(crc3 ^ words[3] ^ crc); 885 #if N > 4 886 crc = crc_word(crc4 ^ words[4] ^ crc); 887 #if N > 5 888 crc = crc_word(crc5 ^ words[5] ^ crc); 889 #endif 890 #endif 891 #endif 892 #endif 893 #endif 894 words += N; 895 } 896 else { 897 /* Big endian. */ 898 899 z_word_t crc0, word0, comb; 900 #if N > 1 901 z_word_t crc1, word1; 902 #if N > 2 903 z_word_t crc2, word2; 904 #if N > 3 905 z_word_t crc3, word3; 906 #if N > 4 907 z_word_t crc4, word4; 908 #if N > 5 909 z_word_t crc5, word5; 910 #endif 911 #endif 912 #endif 913 #endif 914 #endif 915 916 /* Initialize the CRC for each braid. */ 917 crc0 = byte_swap(crc); 918 #if N > 1 919 crc1 = 0; 920 #if N > 2 921 crc2 = 0; 922 #if N > 3 923 crc3 = 0; 924 #if N > 4 925 crc4 = 0; 926 #if N > 5 927 crc5 = 0; 928 #endif 929 #endif 930 #endif 931 #endif 932 #endif 933 934 /* 935 Process the first blks-1 blocks, computing the CRCs on each braid 936 independently. 937 */ 938 while (--blks) { 939 /* Load the word for each braid into registers. */ 940 word0 = crc0 ^ words[0]; 941 #if N > 1 942 word1 = crc1 ^ words[1]; 943 #if N > 2 944 word2 = crc2 ^ words[2]; 945 #if N > 3 946 word3 = crc3 ^ words[3]; 947 #if N > 4 948 word4 = crc4 ^ words[4]; 949 #if N > 5 950 word5 = crc5 ^ words[5]; 951 #endif 952 #endif 953 #endif 954 #endif 955 #endif 956 words += N; 957 958 /* Compute and update the CRC for each word. The loop should 959 get unrolled. */ 960 crc0 = crc_braid_big_table[0][word0 & 0xff]; 961 #if N > 1 962 crc1 = crc_braid_big_table[0][word1 & 0xff]; 963 #if N > 2 964 crc2 = crc_braid_big_table[0][word2 & 0xff]; 965 #if N > 3 966 crc3 = crc_braid_big_table[0][word3 & 0xff]; 967 #if N > 4 968 crc4 = crc_braid_big_table[0][word4 & 0xff]; 969 #if N > 5 970 crc5 = crc_braid_big_table[0][word5 & 0xff]; 971 #endif 972 #endif 973 #endif 974 #endif 975 #endif 976 for (k = 1; k < W; k++) { 977 crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff]; 978 #if N > 1 979 crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff]; 980 #if N > 2 981 crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff]; 982 #if N > 3 983 crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff]; 984 #if N > 4 985 crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff]; 986 #if N > 5 987 crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff]; 988 #endif 989 #endif 990 #endif 991 #endif 992 #endif 993 } 994 } 995 996 /* 997 Process the last block, combining the CRCs of the N braids at the 998 same time. 999 */ 1000 comb = crc_word_big(crc0 ^ words[0]); 1001 #if N > 1 1002 comb = crc_word_big(crc1 ^ words[1] ^ comb); 1003 #if N > 2 1004 comb = crc_word_big(crc2 ^ words[2] ^ comb); 1005 #if N > 3 1006 comb = crc_word_big(crc3 ^ words[3] ^ comb); 1007 #if N > 4 1008 comb = crc_word_big(crc4 ^ words[4] ^ comb); 1009 #if N > 5 1010 comb = crc_word_big(crc5 ^ words[5] ^ comb); 1011 #endif 1012 #endif 1013 #endif 1014 #endif 1015 #endif 1016 words += N; 1017 crc = byte_swap(comb); 1018 } 1019 1020 /* 1021 Update the pointer to the remaining bytes to process. 1022 */ 1023 buf = (unsigned char const *)words; 1024 } 1025 1026 #endif /* W */ 1027 1028 /* Complete the computation of the CRC on any remaining bytes. */ 1029 while (len >= 8) { 1030 len -= 8; 1031 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1032 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1033 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1034 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1035 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1036 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1037 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1038 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1039 } 1040 while (len) { 1041 len--; 1042 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1043 } 1044 1045 /* Return the CRC, post-conditioned. */ 1046 return crc ^ 0xffffffff; 1047 } 1048 1049 #endif 1050 1051 /* ========================================================================= */ 1052 unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf, 1053 uInt len) 1054 { 1055 return crc32_z(crc, buf, len); 1056 } 1057 1058 /* ========================================================================= */ 1059 uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) 1060 { 1061 #ifdef DYNAMIC_CRC_TABLE 1062 once(&made, make_crc_table); 1063 #endif /* DYNAMIC_CRC_TABLE */ 1064 return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff); 1065 } 1066 1067 /* ========================================================================= */ 1068 uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) 1069 { 1070 return crc32_combine64(crc1, crc2, len2); 1071 } 1072 1073 /* ========================================================================= */ 1074 uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) 1075 { 1076 #ifdef DYNAMIC_CRC_TABLE 1077 once(&made, make_crc_table); 1078 #endif /* DYNAMIC_CRC_TABLE */ 1079 return x2nmodp(len2, 3); 1080 } 1081 1082 /* ========================================================================= */ 1083 uLong ZEXPORT crc32_combine_gen(z_off_t len2) 1084 { 1085 return crc32_combine_gen64(len2); 1086 } 1087 1088 /* ========================================================================= */ 1089 uLong crc32_combine_op(uLong crc1, uLong crc2, uLong op) 1090 { 1091 return multmodp(op, crc1) ^ (crc2 & 0xffffffff); 1092 } 1093