1 /* crc32.c -- compute the CRC-32 of a data stream 2 * Copyright (C) 1995-2022 Mark Adler 3 * For conditions of distribution and use, see copyright notice in zlib.h 4 * 5 * This interleaved implementation of a CRC makes use of pipelined multiple 6 * arithmetic-logic units, commonly found in modern CPU cores. It is due to 7 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. 8 */ 9 10 /* @(#) $Id$ */ 11 12 /* 13 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore 14 protection on the static variables used to control the first-use generation 15 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should 16 first call get_crc_table() to initialize the tables before allowing more than 17 one thread to use crc32(). 18 19 MAKECRCH can be #defined to write out crc32.h. A main() routine is also 20 produced, so that this one source file can be compiled to an executable. 21 */ 22 23 #ifdef MAKECRCH 24 # include <stdio.h> 25 # ifndef DYNAMIC_CRC_TABLE 26 # define DYNAMIC_CRC_TABLE 27 # endif /* !DYNAMIC_CRC_TABLE */ 28 #endif /* MAKECRCH */ 29 30 #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ 31 32 /* 33 A CRC of a message is computed on N braids of words in the message, where 34 each word consists of W bytes (4 or 8). If N is 3, for example, then three 35 running sparse CRCs are calculated respectively on each braid, at these 36 indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... 37 This is done starting at a word boundary, and continues until as many blocks 38 of N * W bytes as are available have been processed. The results are combined 39 into a single CRC at the end. For this code, N must be in the range 1..6 and 40 W must be 4 or 8. The upper limit on N can be increased if desired by adding 41 more #if blocks, extending the patterns apparent in the code. In addition, 42 crc32.h would need to be regenerated, if the maximum N value is increased. 43 44 N and W are chosen empirically by benchmarking the execution time on a given 45 processor. The choices for N and W below were based on testing on Intel Kaby 46 Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 47 Octeon II processors. The Intel, AMD, and ARM processors were all fastest 48 with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. 49 They were all tested with either gcc or clang, all using the -O3 optimization 50 level. Your mileage may vary. 51 */ 52 53 /* Define N */ 54 #ifdef Z_TESTN 55 # define N Z_TESTN 56 #else 57 # define N 5 58 #endif 59 #if N < 1 || N > 6 60 # error N must be in 1..6 61 #endif 62 63 /* 64 z_crc_t must be at least 32 bits. z_word_t must be at least as long as 65 z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and 66 that bytes are eight bits. 67 */ 68 69 /* 70 Define W and the associated z_word_t type. If W is not defined, then a 71 braided calculation is not used, and the associated tables and code are not 72 compiled. 73 */ 74 #ifdef Z_TESTW 75 # if Z_TESTW-1 != -1 76 # define W Z_TESTW 77 # endif 78 #else 79 # ifdef MAKECRCH 80 # define W 8 /* required for MAKECRCH */ 81 # else 82 # if defined(__x86_64__) || defined(__aarch64__) 83 # define W 8 84 # else 85 # define W 4 86 # endif 87 # endif 88 #endif 89 #ifdef W 90 # if W == 8 && defined(Z_U8) 91 typedef Z_U8 z_word_t; 92 # elif defined(Z_U4) 93 # undef W 94 # define W 4 95 typedef Z_U4 z_word_t; 96 # else 97 # undef W 98 # endif 99 #endif 100 101 /* Local functions. */ 102 local z_crc_t multmodp OF((z_crc_t a, z_crc_t b)); 103 local z_crc_t x2nmodp OF((z_off64_t n, unsigned k)); 104 105 /* If available, use the ARM processor CRC32 instruction. */ 106 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 107 # define ARMCRC32 108 #endif 109 110 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) 111 /* 112 Swap the bytes in a z_word_t to convert between little and big endian. Any 113 self-respecting compiler will optimize this to a single machine byte-swap 114 instruction, if one is available. This assumes that word_t is either 32 bits 115 or 64 bits. 116 */ 117 local z_word_t byte_swap OF((z_word_t word)); 118 119 local z_word_t byte_swap(word) 120 z_word_t word; 121 { 122 # if W == 8 123 return 124 (word & 0xff00000000000000) >> 56 | 125 (word & 0xff000000000000) >> 40 | 126 (word & 0xff0000000000) >> 24 | 127 (word & 0xff00000000) >> 8 | 128 (word & 0xff000000) << 8 | 129 (word & 0xff0000) << 24 | 130 (word & 0xff00) << 40 | 131 (word & 0xff) << 56; 132 # else /* W == 4 */ 133 return 134 (word & 0xff000000) >> 24 | 135 (word & 0xff0000) >> 8 | 136 (word & 0xff00) << 8 | 137 (word & 0xff) << 24; 138 # endif 139 } 140 #endif 141 142 /* CRC polynomial. */ 143 #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ 144 145 #ifdef DYNAMIC_CRC_TABLE 146 147 local z_crc_t FAR crc_table[256]; 148 local z_crc_t FAR x2n_table[32]; 149 local void make_crc_table OF((void)); 150 #ifdef W 151 local z_word_t FAR crc_big_table[256]; 152 local z_crc_t FAR crc_braid_table[W][256]; 153 local z_word_t FAR crc_braid_big_table[W][256]; 154 local void braid OF((z_crc_t [][256], z_word_t [][256], int, int)); 155 #endif 156 #ifdef MAKECRCH 157 local void write_table OF((FILE *, const z_crc_t FAR *, int)); 158 local void write_table32hi OF((FILE *, const z_word_t FAR *, int)); 159 local void write_table64 OF((FILE *, const z_word_t FAR *, int)); 160 #endif /* MAKECRCH */ 161 162 /* 163 Define a once() function depending on the availability of atomics. If this is 164 compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in 165 multiple threads, and if atomics are not available, then get_crc_table() must 166 be called to initialize the tables and must return before any threads are 167 allowed to compute or combine CRCs. 168 */ 169 170 /* Definition of once functionality. */ 171 typedef struct once_s once_t; 172 local void once OF((once_t *, void (*)(void))); 173 174 /* Check for the availability of atomics. */ 175 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \ 176 !defined(__STDC_NO_ATOMICS__) 177 178 #include <stdatomic.h> 179 180 /* Structure for once(), which must be initialized with ONCE_INIT. */ 181 struct once_s { 182 atomic_flag begun; 183 atomic_int done; 184 }; 185 #define ONCE_INIT {ATOMIC_FLAG_INIT, 0} 186 187 /* 188 Run the provided init() function exactly once, even if multiple threads 189 invoke once() at the same time. The state must be a once_t initialized with 190 ONCE_INIT. 191 */ 192 local void once(state, init) 193 once_t *state; 194 void (*init)(void); 195 { 196 if (!atomic_load(&state->done)) { 197 if (atomic_flag_test_and_set(&state->begun)) 198 while (!atomic_load(&state->done)) 199 ; 200 else { 201 init(); 202 atomic_store(&state->done, 1); 203 } 204 } 205 } 206 207 #else /* no atomics */ 208 209 /* Structure for once(), which must be initialized with ONCE_INIT. */ 210 struct once_s { 211 volatile int begun; 212 volatile int done; 213 }; 214 #define ONCE_INIT {0, 0} 215 216 /* Test and set. Alas, not atomic, but tries to minimize the period of 217 vulnerability. */ 218 local int test_and_set OF((int volatile *)); 219 local int test_and_set(flag) 220 int volatile *flag; 221 { 222 int was; 223 224 was = *flag; 225 *flag = 1; 226 return was; 227 } 228 229 /* Run the provided init() function once. This is not thread-safe. */ 230 local void once(state, init) 231 once_t *state; 232 void (*init)(void); 233 { 234 if (!state->done) { 235 if (test_and_set(&state->begun)) 236 while (!state->done) 237 ; 238 else { 239 init(); 240 state->done = 1; 241 } 242 } 243 } 244 245 #endif 246 247 /* State for once(). */ 248 local once_t made = ONCE_INIT; 249 250 /* 251 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: 252 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. 253 254 Polynomials over GF(2) are represented in binary, one bit per coefficient, 255 with the lowest powers in the most significant bit. Then adding polynomials 256 is just exclusive-or, and multiplying a polynomial by x is a right shift by 257 one. If we call the above polynomial p, and represent a byte as the 258 polynomial q, also with the lowest power in the most significant bit (so the 259 byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, 260 where a mod b means the remainder after dividing a by b. 261 262 This calculation is done using the shift-register method of multiplying and 263 taking the remainder. The register is initialized to zero, and for each 264 incoming bit, x^32 is added mod p to the register if the bit is a one (where 265 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x 266 (which is shifting right by one and adding x^32 mod p if the bit shifted out 267 is a one). We start with the highest power (least significant bit) of q and 268 repeat for all eight bits of q. 269 270 The table is simply the CRC of all possible eight bit values. This is all the 271 information needed to generate CRCs on data a byte at a time for all 272 combinations of CRC register values and incoming bytes. 273 */ 274 275 local void make_crc_table() 276 { 277 unsigned i, j, n; 278 z_crc_t p; 279 280 /* initialize the CRC of bytes tables */ 281 for (i = 0; i < 256; i++) { 282 p = i; 283 for (j = 0; j < 8; j++) 284 p = p & 1 ? (p >> 1) ^ POLY : p >> 1; 285 crc_table[i] = p; 286 #ifdef W 287 crc_big_table[i] = byte_swap(p); 288 #endif 289 } 290 291 /* initialize the x^2^n mod p(x) table */ 292 p = (z_crc_t)1 << 30; /* x^1 */ 293 x2n_table[0] = p; 294 for (n = 1; n < 32; n++) 295 x2n_table[n] = p = multmodp(p, p); 296 297 #ifdef W 298 /* initialize the braiding tables -- needs x2n_table[] */ 299 braid(crc_braid_table, crc_braid_big_table, N, W); 300 #endif 301 302 #ifdef MAKECRCH 303 { 304 /* 305 The crc32.h header file contains tables for both 32-bit and 64-bit 306 z_word_t's, and so requires a 64-bit type be available. In that case, 307 z_word_t must be defined to be 64-bits. This code then also generates 308 and writes out the tables for the case that z_word_t is 32 bits. 309 */ 310 #if !defined(W) || W != 8 311 # error Need a 64-bit integer type in order to generate crc32.h. 312 #endif 313 FILE *out; 314 int k, n; 315 z_crc_t ltl[8][256]; 316 z_word_t big[8][256]; 317 318 out = fopen("crc32.h", "w"); 319 if (out == NULL) return; 320 321 /* write out little-endian CRC table to crc32.h */ 322 fprintf(out, 323 "/* crc32.h -- tables for rapid CRC calculation\n" 324 " * Generated automatically by crc32.c\n */\n" 325 "\n" 326 "local const z_crc_t FAR crc_table[] = {\n" 327 " "); 328 write_table(out, crc_table, 256); 329 fprintf(out, 330 "};\n"); 331 332 /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ 333 fprintf(out, 334 "\n" 335 "#ifdef W\n" 336 "\n" 337 "#if W == 8\n" 338 "\n" 339 "local const z_word_t FAR crc_big_table[] = {\n" 340 " "); 341 write_table64(out, crc_big_table, 256); 342 fprintf(out, 343 "};\n"); 344 345 /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ 346 fprintf(out, 347 "\n" 348 "#else /* W == 4 */\n" 349 "\n" 350 "local const z_word_t FAR crc_big_table[] = {\n" 351 " "); 352 write_table32hi(out, crc_big_table, 256); 353 fprintf(out, 354 "};\n" 355 "\n" 356 "#endif\n"); 357 358 /* write out braid tables for each value of N */ 359 for (n = 1; n <= 6; n++) { 360 fprintf(out, 361 "\n" 362 "#if N == %d\n", n); 363 364 /* compute braid tables for this N and 64-bit word_t */ 365 braid(ltl, big, n, 8); 366 367 /* write out braid tables for 64-bit z_word_t to crc32.h */ 368 fprintf(out, 369 "\n" 370 "#if W == 8\n" 371 "\n" 372 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 373 for (k = 0; k < 8; k++) { 374 fprintf(out, " {"); 375 write_table(out, ltl[k], 256); 376 fprintf(out, "}%s", k < 7 ? ",\n" : ""); 377 } 378 fprintf(out, 379 "};\n" 380 "\n" 381 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 382 for (k = 0; k < 8; k++) { 383 fprintf(out, " {"); 384 write_table64(out, big[k], 256); 385 fprintf(out, "}%s", k < 7 ? ",\n" : ""); 386 } 387 fprintf(out, 388 "};\n"); 389 390 /* compute braid tables for this N and 32-bit word_t */ 391 braid(ltl, big, n, 4); 392 393 /* write out braid tables for 32-bit z_word_t to crc32.h */ 394 fprintf(out, 395 "\n" 396 "#else /* W == 4 */\n" 397 "\n" 398 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 399 for (k = 0; k < 4; k++) { 400 fprintf(out, " {"); 401 write_table(out, ltl[k], 256); 402 fprintf(out, "}%s", k < 3 ? ",\n" : ""); 403 } 404 fprintf(out, 405 "};\n" 406 "\n" 407 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 408 for (k = 0; k < 4; k++) { 409 fprintf(out, " {"); 410 write_table32hi(out, big[k], 256); 411 fprintf(out, "}%s", k < 3 ? ",\n" : ""); 412 } 413 fprintf(out, 414 "};\n" 415 "\n" 416 "#endif\n" 417 "\n" 418 "#endif\n"); 419 } 420 fprintf(out, 421 "\n" 422 "#endif\n"); 423 424 /* write out zeros operator table to crc32.h */ 425 fprintf(out, 426 "\n" 427 "local const z_crc_t FAR x2n_table[] = {\n" 428 " "); 429 write_table(out, x2n_table, 32); 430 fprintf(out, 431 "};\n"); 432 fclose(out); 433 } 434 #endif /* MAKECRCH */ 435 } 436 437 #ifdef MAKECRCH 438 439 /* 440 Write the 32-bit values in table[0..k-1] to out, five per line in 441 hexadecimal separated by commas. 442 */ 443 local void write_table(out, table, k) 444 FILE *out; 445 const z_crc_t FAR *table; 446 int k; 447 { 448 int n; 449 450 for (n = 0; n < k; n++) 451 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 452 (unsigned long)(table[n]), 453 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 454 } 455 456 /* 457 Write the high 32-bits of each value in table[0..k-1] to out, five per line 458 in hexadecimal separated by commas. 459 */ 460 local void write_table32hi(out, table, k) 461 FILE *out; 462 const z_word_t FAR *table; 463 int k; 464 { 465 int n; 466 467 for (n = 0; n < k; n++) 468 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 469 (unsigned long)(table[n] >> 32), 470 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 471 } 472 473 /* 474 Write the 64-bit values in table[0..k-1] to out, three per line in 475 hexadecimal separated by commas. This assumes that if there is a 64-bit 476 type, then there is also a long long integer type, and it is at least 64 477 bits. If not, then the type cast and format string can be adjusted 478 accordingly. 479 */ 480 local void write_table64(out, table, k) 481 FILE *out; 482 const z_word_t FAR *table; 483 int k; 484 { 485 int n; 486 487 for (n = 0; n < k; n++) 488 fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", 489 (unsigned long long)(table[n]), 490 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); 491 } 492 493 /* Actually do the deed. */ 494 int main() 495 { 496 make_crc_table(); 497 return 0; 498 } 499 500 #endif /* MAKECRCH */ 501 502 #ifdef W 503 /* 504 Generate the little and big-endian braid tables for the given n and z_word_t 505 size w. Each array must have room for w blocks of 256 elements. 506 */ 507 local void braid(ltl, big, n, w) 508 z_crc_t ltl[][256]; 509 z_word_t big[][256]; 510 int n; 511 int w; 512 { 513 int k; 514 z_crc_t i, p, q; 515 for (k = 0; k < w; k++) { 516 p = x2nmodp((n * w + 3 - k) << 3, 0); 517 ltl[k][0] = 0; 518 big[w - 1 - k][0] = 0; 519 for (i = 1; i < 256; i++) { 520 ltl[k][i] = q = multmodp(i << 24, p); 521 big[w - 1 - k][i] = byte_swap(q); 522 } 523 } 524 } 525 #endif 526 527 #else /* !DYNAMIC_CRC_TABLE */ 528 /* ======================================================================== 529 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers 530 * of x for combining CRC-32s, all made by make_crc_table(). 531 */ 532 #include "crc32.h" 533 #endif /* DYNAMIC_CRC_TABLE */ 534 535 /* ======================================================================== 536 * Routines used for CRC calculation. Some are also required for the table 537 * generation above. 538 */ 539 540 /* 541 Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, 542 reflected. For speed, this requires that a not be zero. 543 */ 544 local z_crc_t multmodp(a, b) 545 z_crc_t a; 546 z_crc_t b; 547 { 548 z_crc_t m, p; 549 550 m = (z_crc_t)1 << 31; 551 p = 0; 552 for (;;) { 553 if (a & m) { 554 p ^= b; 555 if ((a & (m - 1)) == 0) 556 break; 557 } 558 m >>= 1; 559 b = b & 1 ? (b >> 1) ^ POLY : b >> 1; 560 } 561 return p; 562 } 563 564 /* 565 Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been 566 initialized. 567 */ 568 local z_crc_t x2nmodp(n, k) 569 z_off64_t n; 570 unsigned k; 571 { 572 z_crc_t p; 573 574 p = (z_crc_t)1 << 31; /* x^0 == 1 */ 575 while (n) { 576 if (n & 1) 577 p = multmodp(x2n_table[k & 31], p); 578 n >>= 1; 579 k++; 580 } 581 return p; 582 } 583 584 /* ========================================================================= 585 * This function can be used by asm versions of crc32(), and to force the 586 * generation of the CRC tables in a threaded application. 587 */ 588 const z_crc_t FAR * ZEXPORT get_crc_table() 589 { 590 #ifdef DYNAMIC_CRC_TABLE 591 once(&made, make_crc_table); 592 #endif /* DYNAMIC_CRC_TABLE */ 593 return (const z_crc_t FAR *)crc_table; 594 } 595 596 /* ========================================================================= 597 * Use ARM machine instructions if available. This will compute the CRC about 598 * ten times faster than the braided calculation. This code does not check for 599 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will 600 * only be defined if the compilation specifies an ARM processor architecture 601 * that has the instructions. For example, compiling with -march=armv8.1-a or 602 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32 603 * instructions. 604 */ 605 #ifdef ARMCRC32 606 607 /* 608 Constants empirically determined to maximize speed. These values are from 609 measurements on a Cortex-A57. Your mileage may vary. 610 */ 611 #define Z_BATCH 3990 /* number of words in a batch */ 612 #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ 613 #define Z_BATCH_MIN 800 /* fewest words in a final batch */ 614 615 unsigned long ZEXPORT crc32_z(crc, buf, len) 616 unsigned long crc; 617 const unsigned char FAR *buf; 618 z_size_t len; 619 { 620 z_crc_t val; 621 z_word_t crc1, crc2; 622 const z_word_t *word; 623 z_word_t val0, val1, val2; 624 z_size_t last, last2, i; 625 z_size_t num; 626 627 /* Return initial CRC, if requested. */ 628 if (buf == Z_NULL) return 0; 629 630 #ifdef DYNAMIC_CRC_TABLE 631 once(&made, make_crc_table); 632 #endif /* DYNAMIC_CRC_TABLE */ 633 634 /* Pre-condition the CRC */ 635 crc = (~crc) & 0xffffffff; 636 637 /* Compute the CRC up to a word boundary. */ 638 while (len && ((z_size_t)buf & 7) != 0) { 639 len--; 640 val = *buf++; 641 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 642 } 643 644 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ 645 word = (z_word_t const *)buf; 646 num = len >> 3; 647 len &= 7; 648 649 /* Do three interleaved CRCs to realize the throughput of one crc32x 650 instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three 651 CRCs are combined into a single CRC after each set of batches. */ 652 while (num >= 3 * Z_BATCH) { 653 crc1 = 0; 654 crc2 = 0; 655 for (i = 0; i < Z_BATCH; i++) { 656 val0 = word[i]; 657 val1 = word[i + Z_BATCH]; 658 val2 = word[i + 2 * Z_BATCH]; 659 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 660 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 661 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 662 } 663 word += 3 * Z_BATCH; 664 num -= 3 * Z_BATCH; 665 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; 666 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; 667 } 668 669 /* Do one last smaller batch with the remaining words, if there are enough 670 to pay for the combination of CRCs. */ 671 last = num / 3; 672 if (last >= Z_BATCH_MIN) { 673 last2 = last << 1; 674 crc1 = 0; 675 crc2 = 0; 676 for (i = 0; i < last; i++) { 677 val0 = word[i]; 678 val1 = word[i + last]; 679 val2 = word[i + last2]; 680 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 681 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 682 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 683 } 684 word += 3 * last; 685 num -= 3 * last; 686 val = x2nmodp(last, 6); 687 crc = multmodp(val, crc) ^ crc1; 688 crc = multmodp(val, crc) ^ crc2; 689 } 690 691 /* Compute the CRC on any remaining words. */ 692 for (i = 0; i < num; i++) { 693 val0 = word[i]; 694 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 695 } 696 word += num; 697 698 /* Complete the CRC on any remaining bytes. */ 699 buf = (const unsigned char FAR *)word; 700 while (len) { 701 len--; 702 val = *buf++; 703 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 704 } 705 706 /* Return the CRC, post-conditioned. */ 707 return crc ^ 0xffffffff; 708 } 709 710 #else 711 712 #ifdef W 713 714 /* 715 Return the CRC of the W bytes in the word_t data, taking the 716 least-significant byte of the word as the first byte of data, without any pre 717 or post conditioning. This is used to combine the CRCs of each braid. 718 */ 719 local z_crc_t crc_word OF((z_word_t data)); 720 721 local z_crc_t crc_word(data) 722 z_word_t data; 723 { 724 int k; 725 for (k = 0; k < W; k++) 726 data = (data >> 8) ^ crc_table[data & 0xff]; 727 return (z_crc_t)data; 728 } 729 730 local z_word_t crc_word_big OF((z_word_t data)); 731 732 local z_word_t crc_word_big(data) 733 z_word_t data; 734 { 735 int k; 736 for (k = 0; k < W; k++) 737 data = (data << 8) ^ 738 crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; 739 return data; 740 } 741 742 #endif 743 744 /* ========================================================================= */ 745 unsigned long ZEXPORT crc32_z(crc, buf, len) 746 unsigned long crc; 747 const unsigned char FAR *buf; 748 z_size_t len; 749 { 750 /* Return initial CRC, if requested. */ 751 if (buf == Z_NULL) return 0; 752 753 #ifdef DYNAMIC_CRC_TABLE 754 once(&made, make_crc_table); 755 #endif /* DYNAMIC_CRC_TABLE */ 756 757 /* Pre-condition the CRC */ 758 crc = (~crc) & 0xffffffff; 759 760 #ifdef W 761 762 /* If provided enough bytes, do a braided CRC calculation. */ 763 if (len >= N * W + W - 1) { 764 z_size_t blks; 765 z_word_t const *words; 766 unsigned endian; 767 int k; 768 769 /* Compute the CRC up to a z_word_t boundary. */ 770 while (len && ((z_size_t)buf & (W - 1)) != 0) { 771 len--; 772 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 773 } 774 775 /* Compute the CRC on as many N z_word_t blocks as are available. */ 776 blks = len / (N * W); 777 len -= blks * N * W; 778 words = (z_word_t const *)buf; 779 780 /* Do endian check at execution time instead of compile time, since ARM 781 processors can change the endianess at execution time. If the 782 compiler knows what the endianess will be, it can optimize out the 783 check and the unused branch. */ 784 endian = 1; 785 if (*(unsigned char *)&endian) { 786 /* Little endian. */ 787 788 z_crc_t crc0; 789 z_word_t word0; 790 #if N > 1 791 z_crc_t crc1; 792 z_word_t word1; 793 #if N > 2 794 z_crc_t crc2; 795 z_word_t word2; 796 #if N > 3 797 z_crc_t crc3; 798 z_word_t word3; 799 #if N > 4 800 z_crc_t crc4; 801 z_word_t word4; 802 #if N > 5 803 z_crc_t crc5; 804 z_word_t word5; 805 #endif 806 #endif 807 #endif 808 #endif 809 #endif 810 811 /* Initialize the CRC for each braid. */ 812 crc0 = crc; 813 #if N > 1 814 crc1 = 0; 815 #if N > 2 816 crc2 = 0; 817 #if N > 3 818 crc3 = 0; 819 #if N > 4 820 crc4 = 0; 821 #if N > 5 822 crc5 = 0; 823 #endif 824 #endif 825 #endif 826 #endif 827 #endif 828 829 /* 830 Process the first blks-1 blocks, computing the CRCs on each braid 831 independently. 832 */ 833 while (--blks) { 834 /* Load the word for each braid into registers. */ 835 word0 = crc0 ^ words[0]; 836 #if N > 1 837 word1 = crc1 ^ words[1]; 838 #if N > 2 839 word2 = crc2 ^ words[2]; 840 #if N > 3 841 word3 = crc3 ^ words[3]; 842 #if N > 4 843 word4 = crc4 ^ words[4]; 844 #if N > 5 845 word5 = crc5 ^ words[5]; 846 #endif 847 #endif 848 #endif 849 #endif 850 #endif 851 words += N; 852 853 /* Compute and update the CRC for each word. The loop should 854 get unrolled. */ 855 crc0 = crc_braid_table[0][word0 & 0xff]; 856 #if N > 1 857 crc1 = crc_braid_table[0][word1 & 0xff]; 858 #if N > 2 859 crc2 = crc_braid_table[0][word2 & 0xff]; 860 #if N > 3 861 crc3 = crc_braid_table[0][word3 & 0xff]; 862 #if N > 4 863 crc4 = crc_braid_table[0][word4 & 0xff]; 864 #if N > 5 865 crc5 = crc_braid_table[0][word5 & 0xff]; 866 #endif 867 #endif 868 #endif 869 #endif 870 #endif 871 for (k = 1; k < W; k++) { 872 crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; 873 #if N > 1 874 crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; 875 #if N > 2 876 crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; 877 #if N > 3 878 crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; 879 #if N > 4 880 crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; 881 #if N > 5 882 crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; 883 #endif 884 #endif 885 #endif 886 #endif 887 #endif 888 } 889 } 890 891 /* 892 Process the last block, combining the CRCs of the N braids at the 893 same time. 894 */ 895 crc = crc_word(crc0 ^ words[0]); 896 #if N > 1 897 crc = crc_word(crc1 ^ words[1] ^ crc); 898 #if N > 2 899 crc = crc_word(crc2 ^ words[2] ^ crc); 900 #if N > 3 901 crc = crc_word(crc3 ^ words[3] ^ crc); 902 #if N > 4 903 crc = crc_word(crc4 ^ words[4] ^ crc); 904 #if N > 5 905 crc = crc_word(crc5 ^ words[5] ^ crc); 906 #endif 907 #endif 908 #endif 909 #endif 910 #endif 911 words += N; 912 } 913 else { 914 /* Big endian. */ 915 916 z_word_t crc0, word0, comb; 917 #if N > 1 918 z_word_t crc1, word1; 919 #if N > 2 920 z_word_t crc2, word2; 921 #if N > 3 922 z_word_t crc3, word3; 923 #if N > 4 924 z_word_t crc4, word4; 925 #if N > 5 926 z_word_t crc5, word5; 927 #endif 928 #endif 929 #endif 930 #endif 931 #endif 932 933 /* Initialize the CRC for each braid. */ 934 crc0 = byte_swap(crc); 935 #if N > 1 936 crc1 = 0; 937 #if N > 2 938 crc2 = 0; 939 #if N > 3 940 crc3 = 0; 941 #if N > 4 942 crc4 = 0; 943 #if N > 5 944 crc5 = 0; 945 #endif 946 #endif 947 #endif 948 #endif 949 #endif 950 951 /* 952 Process the first blks-1 blocks, computing the CRCs on each braid 953 independently. 954 */ 955 while (--blks) { 956 /* Load the word for each braid into registers. */ 957 word0 = crc0 ^ words[0]; 958 #if N > 1 959 word1 = crc1 ^ words[1]; 960 #if N > 2 961 word2 = crc2 ^ words[2]; 962 #if N > 3 963 word3 = crc3 ^ words[3]; 964 #if N > 4 965 word4 = crc4 ^ words[4]; 966 #if N > 5 967 word5 = crc5 ^ words[5]; 968 #endif 969 #endif 970 #endif 971 #endif 972 #endif 973 words += N; 974 975 /* Compute and update the CRC for each word. The loop should 976 get unrolled. */ 977 crc0 = crc_braid_big_table[0][word0 & 0xff]; 978 #if N > 1 979 crc1 = crc_braid_big_table[0][word1 & 0xff]; 980 #if N > 2 981 crc2 = crc_braid_big_table[0][word2 & 0xff]; 982 #if N > 3 983 crc3 = crc_braid_big_table[0][word3 & 0xff]; 984 #if N > 4 985 crc4 = crc_braid_big_table[0][word4 & 0xff]; 986 #if N > 5 987 crc5 = crc_braid_big_table[0][word5 & 0xff]; 988 #endif 989 #endif 990 #endif 991 #endif 992 #endif 993 for (k = 1; k < W; k++) { 994 crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff]; 995 #if N > 1 996 crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff]; 997 #if N > 2 998 crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff]; 999 #if N > 3 1000 crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff]; 1001 #if N > 4 1002 crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff]; 1003 #if N > 5 1004 crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff]; 1005 #endif 1006 #endif 1007 #endif 1008 #endif 1009 #endif 1010 } 1011 } 1012 1013 /* 1014 Process the last block, combining the CRCs of the N braids at the 1015 same time. 1016 */ 1017 comb = crc_word_big(crc0 ^ words[0]); 1018 #if N > 1 1019 comb = crc_word_big(crc1 ^ words[1] ^ comb); 1020 #if N > 2 1021 comb = crc_word_big(crc2 ^ words[2] ^ comb); 1022 #if N > 3 1023 comb = crc_word_big(crc3 ^ words[3] ^ comb); 1024 #if N > 4 1025 comb = crc_word_big(crc4 ^ words[4] ^ comb); 1026 #if N > 5 1027 comb = crc_word_big(crc5 ^ words[5] ^ comb); 1028 #endif 1029 #endif 1030 #endif 1031 #endif 1032 #endif 1033 words += N; 1034 crc = byte_swap(comb); 1035 } 1036 1037 /* 1038 Update the pointer to the remaining bytes to process. 1039 */ 1040 buf = (unsigned char const *)words; 1041 } 1042 1043 #endif /* W */ 1044 1045 /* Complete the computation of the CRC on any remaining bytes. */ 1046 while (len >= 8) { 1047 len -= 8; 1048 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1049 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1050 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1051 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1052 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1053 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1054 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1055 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1056 } 1057 while (len) { 1058 len--; 1059 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1060 } 1061 1062 /* Return the CRC, post-conditioned. */ 1063 return crc ^ 0xffffffff; 1064 } 1065 1066 #endif 1067 1068 /* ========================================================================= */ 1069 unsigned long ZEXPORT crc32(crc, buf, len) 1070 unsigned long crc; 1071 const unsigned char FAR *buf; 1072 uInt len; 1073 { 1074 return crc32_z(crc, buf, len); 1075 } 1076 1077 /* ========================================================================= */ 1078 uLong ZEXPORT crc32_combine64(crc1, crc2, len2) 1079 uLong crc1; 1080 uLong crc2; 1081 z_off64_t len2; 1082 { 1083 #ifdef DYNAMIC_CRC_TABLE 1084 once(&made, make_crc_table); 1085 #endif /* DYNAMIC_CRC_TABLE */ 1086 return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff); 1087 } 1088 1089 /* ========================================================================= */ 1090 uLong ZEXPORT crc32_combine(crc1, crc2, len2) 1091 uLong crc1; 1092 uLong crc2; 1093 z_off_t len2; 1094 { 1095 return crc32_combine64(crc1, crc2, len2); 1096 } 1097 1098 /* ========================================================================= */ 1099 uLong ZEXPORT crc32_combine_gen64(len2) 1100 z_off64_t len2; 1101 { 1102 #ifdef DYNAMIC_CRC_TABLE 1103 once(&made, make_crc_table); 1104 #endif /* DYNAMIC_CRC_TABLE */ 1105 return x2nmodp(len2, 3); 1106 } 1107 1108 /* ========================================================================= */ 1109 uLong ZEXPORT crc32_combine_gen(len2) 1110 z_off_t len2; 1111 { 1112 return crc32_combine_gen64(len2); 1113 } 1114 1115 /* ========================================================================= */ 1116 uLong crc32_combine_op(crc1, crc2, op) 1117 uLong crc1; 1118 uLong crc2; 1119 uLong op; 1120 { 1121 return multmodp(op, crc1) ^ (crc2 & 0xffffffff); 1122 } 1123