1c9083b85SXin LI /* crc32.c -- compute the CRC-32 of a data stream 2cd882207SXin LI * Copyright (C) 1995-2022 Mark Adler 3c9083b85SXin LI * For conditions of distribution and use, see copyright notice in zlib.h 4c9083b85SXin LI * 5cd882207SXin LI * This interleaved implementation of a CRC makes use of pipelined multiple 6cd882207SXin LI * arithmetic-logic units, commonly found in modern CPU cores. It is due to 7cd882207SXin LI * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. 8c9083b85SXin LI */ 9c9083b85SXin LI 10c9083b85SXin LI /* @(#) $Id$ */ 11c9083b85SXin LI 12c9083b85SXin LI /* 13c9083b85SXin LI Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore 14c9083b85SXin LI protection on the static variables used to control the first-use generation 15c9083b85SXin LI of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should 16c9083b85SXin LI first call get_crc_table() to initialize the tables before allowing more than 17c9083b85SXin LI one thread to use crc32(). 18c9083b85SXin LI 19cd882207SXin LI MAKECRCH can be #defined to write out crc32.h. A main() routine is also 20cd882207SXin LI produced, so that this one source file can be compiled to an executable. 21c9083b85SXin LI */ 22c9083b85SXin LI 23c9083b85SXin LI #ifdef MAKECRCH 24c9083b85SXin LI # include <stdio.h> 25c9083b85SXin LI # ifndef DYNAMIC_CRC_TABLE 26c9083b85SXin LI # define DYNAMIC_CRC_TABLE 27c9083b85SXin LI # endif /* !DYNAMIC_CRC_TABLE */ 28c9083b85SXin LI #endif /* MAKECRCH */ 29c9083b85SXin LI 30cd882207SXin LI #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ 31c9083b85SXin LI 32cd882207SXin LI /* 33cd882207SXin LI A CRC of a message is computed on N braids of words in the message, where 34cd882207SXin LI each word consists of W bytes (4 or 8). If N is 3, for example, then three 35cd882207SXin LI running sparse CRCs are calculated respectively on each braid, at these 36cd882207SXin LI indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... 37cd882207SXin LI This is done starting at a word boundary, and continues until as many blocks 38cd882207SXin LI of N * W bytes as are available have been processed. The results are combined 39cd882207SXin LI into a single CRC at the end. For this code, N must be in the range 1..6 and 40cd882207SXin LI W must be 4 or 8. The upper limit on N can be increased if desired by adding 41cd882207SXin LI more #if blocks, extending the patterns apparent in the code. In addition, 42cd882207SXin LI crc32.h would need to be regenerated, if the maximum N value is increased. 43cd882207SXin LI 44cd882207SXin LI N and W are chosen empirically by benchmarking the execution time on a given 45cd882207SXin LI processor. The choices for N and W below were based on testing on Intel Kaby 46cd882207SXin LI Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 47cd882207SXin LI Octeon II processors. The Intel, AMD, and ARM processors were all fastest 48cd882207SXin LI with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. 49cd882207SXin LI They were all tested with either gcc or clang, all using the -O3 optimization 50cd882207SXin LI level. Your mileage may vary. 51cd882207SXin LI */ 52cd882207SXin LI 53cd882207SXin LI /* Define N */ 54cd882207SXin LI #ifdef Z_TESTN 55cd882207SXin LI # define N Z_TESTN 56c9083b85SXin LI #else 57cd882207SXin LI # define N 5 58cd882207SXin LI #endif 59cd882207SXin LI #if N < 1 || N > 6 60cd882207SXin LI # error N must be in 1..6 61cd882207SXin LI #endif 62c9083b85SXin LI 63cd882207SXin LI /* 64cd882207SXin LI z_crc_t must be at least 32 bits. z_word_t must be at least as long as 65cd882207SXin LI z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and 66cd882207SXin LI that bytes are eight bits. 67cd882207SXin LI */ 68c9083b85SXin LI 69cd882207SXin LI /* 70cd882207SXin LI Define W and the associated z_word_t type. If W is not defined, then a 71cd882207SXin LI braided calculation is not used, and the associated tables and code are not 72cd882207SXin LI compiled. 73cd882207SXin LI */ 74cd882207SXin LI #ifdef Z_TESTW 75cd882207SXin LI # if Z_TESTW-1 != -1 76cd882207SXin LI # define W Z_TESTW 77cd882207SXin LI # endif 78cd882207SXin LI #else 79cd882207SXin LI # ifdef MAKECRCH 80cd882207SXin LI # define W 8 /* required for MAKECRCH */ 81cd882207SXin LI # else 82cd882207SXin LI # if defined(__x86_64__) || defined(__aarch64__) 83cd882207SXin LI # define W 8 84cd882207SXin LI # else 85cd882207SXin LI # define W 4 86cd882207SXin LI # endif 87cd882207SXin LI # endif 88cd882207SXin LI #endif 89cd882207SXin LI #ifdef W 90cd882207SXin LI # if W == 8 && defined(Z_U8) 91cd882207SXin LI typedef Z_U8 z_word_t; 92cd882207SXin LI # elif defined(Z_U4) 93cd882207SXin LI # undef W 94cd882207SXin LI # define W 4 95cd882207SXin LI typedef Z_U4 z_word_t; 96cd882207SXin LI # else 97cd882207SXin LI # undef W 98cd882207SXin LI # endif 99cd882207SXin LI #endif 100cd882207SXin LI 101*e37bb444SXin LI /* If available, use the ARM processor CRC32 instruction. */ 102*e37bb444SXin LI #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 103*e37bb444SXin LI # define ARMCRC32 104*e37bb444SXin LI #endif 105*e37bb444SXin LI 106cd882207SXin LI /* Local functions. */ 107cd882207SXin LI local z_crc_t multmodp OF((z_crc_t a, z_crc_t b)); 108cd882207SXin LI local z_crc_t x2nmodp OF((z_off64_t n, unsigned k)); 109cd882207SXin LI 110*e37bb444SXin LI #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) 111*e37bb444SXin LI local z_word_t byte_swap OF((z_word_t word)); 112*e37bb444SXin LI #endif 113*e37bb444SXin LI 114*e37bb444SXin LI #if defined(W) && !defined(ARMCRC32) 115*e37bb444SXin LI local z_crc_t crc_word OF((z_word_t data)); 116*e37bb444SXin LI local z_word_t crc_word_big OF((z_word_t data)); 117cd882207SXin LI #endif 118cd882207SXin LI 119cd882207SXin LI #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) 120cd882207SXin LI /* 121cd882207SXin LI Swap the bytes in a z_word_t to convert between little and big endian. Any 122cd882207SXin LI self-respecting compiler will optimize this to a single machine byte-swap 123cd882207SXin LI instruction, if one is available. This assumes that word_t is either 32 bits 124cd882207SXin LI or 64 bits. 125cd882207SXin LI */ 1267550e242SJohn Baldwin local z_word_t byte_swap OF((z_word_t word)); 1277550e242SJohn Baldwin 128cd882207SXin LI local z_word_t byte_swap(word) 129cd882207SXin LI z_word_t word; 130cd882207SXin LI { 131cd882207SXin LI # if W == 8 132cd882207SXin LI return 133cd882207SXin LI (word & 0xff00000000000000) >> 56 | 134cd882207SXin LI (word & 0xff000000000000) >> 40 | 135cd882207SXin LI (word & 0xff0000000000) >> 24 | 136cd882207SXin LI (word & 0xff00000000) >> 8 | 137cd882207SXin LI (word & 0xff000000) << 8 | 138cd882207SXin LI (word & 0xff0000) << 24 | 139cd882207SXin LI (word & 0xff00) << 40 | 140cd882207SXin LI (word & 0xff) << 56; 141cd882207SXin LI # else /* W == 4 */ 142cd882207SXin LI return 143cd882207SXin LI (word & 0xff000000) >> 24 | 144cd882207SXin LI (word & 0xff0000) >> 8 | 145cd882207SXin LI (word & 0xff00) << 8 | 146cd882207SXin LI (word & 0xff) << 24; 147cd882207SXin LI # endif 148cd882207SXin LI } 149cd882207SXin LI #endif 150cd882207SXin LI 151cd882207SXin LI /* CRC polynomial. */ 152cd882207SXin LI #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ 153c9083b85SXin LI 154c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE 155c9083b85SXin LI 156cd882207SXin LI local z_crc_t FAR crc_table[256]; 157cd882207SXin LI local z_crc_t FAR x2n_table[32]; 158c9083b85SXin LI local void make_crc_table OF((void)); 159cd882207SXin LI #ifdef W 160cd882207SXin LI local z_word_t FAR crc_big_table[256]; 161cd882207SXin LI local z_crc_t FAR crc_braid_table[W][256]; 162cd882207SXin LI local z_word_t FAR crc_braid_big_table[W][256]; 163cd882207SXin LI local void braid OF((z_crc_t [][256], z_word_t [][256], int, int)); 164cd882207SXin LI #endif 165c9083b85SXin LI #ifdef MAKECRCH 166cd882207SXin LI local void write_table OF((FILE *, const z_crc_t FAR *, int)); 167cd882207SXin LI local void write_table32hi OF((FILE *, const z_word_t FAR *, int)); 168cd882207SXin LI local void write_table64 OF((FILE *, const z_word_t FAR *, int)); 169c9083b85SXin LI #endif /* MAKECRCH */ 170cd882207SXin LI 171cd882207SXin LI /* 172cd882207SXin LI Define a once() function depending on the availability of atomics. If this is 173cd882207SXin LI compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in 174cd882207SXin LI multiple threads, and if atomics are not available, then get_crc_table() must 175cd882207SXin LI be called to initialize the tables and must return before any threads are 176cd882207SXin LI allowed to compute or combine CRCs. 177cd882207SXin LI */ 178cd882207SXin LI 179cd882207SXin LI /* Definition of once functionality. */ 180cd882207SXin LI typedef struct once_s once_t; 181cd882207SXin LI local void once OF((once_t *, void (*)(void))); 182cd882207SXin LI 183cd882207SXin LI /* Check for the availability of atomics. */ 184cd882207SXin LI #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \ 185cd882207SXin LI !defined(__STDC_NO_ATOMICS__) 186cd882207SXin LI 187cd882207SXin LI #include <stdatomic.h> 188cd882207SXin LI 189cd882207SXin LI /* Structure for once(), which must be initialized with ONCE_INIT. */ 190cd882207SXin LI struct once_s { 191cd882207SXin LI atomic_flag begun; 192cd882207SXin LI atomic_int done; 193cd882207SXin LI }; 194cd882207SXin LI #define ONCE_INIT {ATOMIC_FLAG_INIT, 0} 195cd882207SXin LI 196cd882207SXin LI /* 197cd882207SXin LI Run the provided init() function exactly once, even if multiple threads 198cd882207SXin LI invoke once() at the same time. The state must be a once_t initialized with 199cd882207SXin LI ONCE_INIT. 200cd882207SXin LI */ 201cd882207SXin LI local void once(state, init) 202cd882207SXin LI once_t *state; 203cd882207SXin LI void (*init)(void); 204cd882207SXin LI { 205cd882207SXin LI if (!atomic_load(&state->done)) { 206cd882207SXin LI if (atomic_flag_test_and_set(&state->begun)) 207cd882207SXin LI while (!atomic_load(&state->done)) 208cd882207SXin LI ; 209cd882207SXin LI else { 210cd882207SXin LI init(); 211cd882207SXin LI atomic_store(&state->done, 1); 212cd882207SXin LI } 213cd882207SXin LI } 214cd882207SXin LI } 215cd882207SXin LI 216cd882207SXin LI #else /* no atomics */ 217cd882207SXin LI 218cd882207SXin LI /* Structure for once(), which must be initialized with ONCE_INIT. */ 219cd882207SXin LI struct once_s { 220cd882207SXin LI volatile int begun; 221cd882207SXin LI volatile int done; 222cd882207SXin LI }; 223cd882207SXin LI #define ONCE_INIT {0, 0} 224cd882207SXin LI 225cd882207SXin LI /* Test and set. Alas, not atomic, but tries to minimize the period of 226cd882207SXin LI vulnerability. */ 227cd882207SXin LI local int test_and_set OF((int volatile *)); 228cd882207SXin LI local int test_and_set(flag) 229cd882207SXin LI int volatile *flag; 230cd882207SXin LI { 231cd882207SXin LI int was; 232cd882207SXin LI 233cd882207SXin LI was = *flag; 234cd882207SXin LI *flag = 1; 235cd882207SXin LI return was; 236cd882207SXin LI } 237cd882207SXin LI 238cd882207SXin LI /* Run the provided init() function once. This is not thread-safe. */ 239cd882207SXin LI local void once(state, init) 240cd882207SXin LI once_t *state; 241cd882207SXin LI void (*init)(void); 242cd882207SXin LI { 243cd882207SXin LI if (!state->done) { 244cd882207SXin LI if (test_and_set(&state->begun)) 245cd882207SXin LI while (!state->done) 246cd882207SXin LI ; 247cd882207SXin LI else { 248cd882207SXin LI init(); 249cd882207SXin LI state->done = 1; 250cd882207SXin LI } 251cd882207SXin LI } 252cd882207SXin LI } 253cd882207SXin LI 254cd882207SXin LI #endif 255cd882207SXin LI 256cd882207SXin LI /* State for once(). */ 257cd882207SXin LI local once_t made = ONCE_INIT; 258cd882207SXin LI 259c9083b85SXin LI /* 260c9083b85SXin LI Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: 261c9083b85SXin LI x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. 262c9083b85SXin LI 263c9083b85SXin LI Polynomials over GF(2) are represented in binary, one bit per coefficient, 264c9083b85SXin LI with the lowest powers in the most significant bit. Then adding polynomials 265c9083b85SXin LI is just exclusive-or, and multiplying a polynomial by x is a right shift by 266c9083b85SXin LI one. If we call the above polynomial p, and represent a byte as the 267c9083b85SXin LI polynomial q, also with the lowest power in the most significant bit (so the 268cd882207SXin LI byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, 269c9083b85SXin LI where a mod b means the remainder after dividing a by b. 270c9083b85SXin LI 271c9083b85SXin LI This calculation is done using the shift-register method of multiplying and 272c9083b85SXin LI taking the remainder. The register is initialized to zero, and for each 273c9083b85SXin LI incoming bit, x^32 is added mod p to the register if the bit is a one (where 274cd882207SXin LI x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x 275cd882207SXin LI (which is shifting right by one and adding x^32 mod p if the bit shifted out 276cd882207SXin LI is a one). We start with the highest power (least significant bit) of q and 277cd882207SXin LI repeat for all eight bits of q. 278c9083b85SXin LI 279cd882207SXin LI The table is simply the CRC of all possible eight bit values. This is all the 280cd882207SXin LI information needed to generate CRCs on data a byte at a time for all 281cd882207SXin LI combinations of CRC register values and incoming bytes. 282c9083b85SXin LI */ 283cd882207SXin LI 284c9083b85SXin LI local void make_crc_table() 285c9083b85SXin LI { 286cd882207SXin LI unsigned i, j, n; 287cd882207SXin LI z_crc_t p; 288c9083b85SXin LI 289cd882207SXin LI /* initialize the CRC of bytes tables */ 290cd882207SXin LI for (i = 0; i < 256; i++) { 291cd882207SXin LI p = i; 292cd882207SXin LI for (j = 0; j < 8; j++) 293cd882207SXin LI p = p & 1 ? (p >> 1) ^ POLY : p >> 1; 294cd882207SXin LI crc_table[i] = p; 295cd882207SXin LI #ifdef W 296cd882207SXin LI crc_big_table[i] = byte_swap(p); 297cd882207SXin LI #endif 298c9083b85SXin LI } 299c9083b85SXin LI 300cd882207SXin LI /* initialize the x^2^n mod p(x) table */ 301cd882207SXin LI p = (z_crc_t)1 << 30; /* x^1 */ 302cd882207SXin LI x2n_table[0] = p; 303cd882207SXin LI for (n = 1; n < 32; n++) 304cd882207SXin LI x2n_table[n] = p = multmodp(p, p); 305c9083b85SXin LI 306cd882207SXin LI #ifdef W 307cd882207SXin LI /* initialize the braiding tables -- needs x2n_table[] */ 308cd882207SXin LI braid(crc_braid_table, crc_braid_big_table, N, W); 309cd882207SXin LI #endif 310c9083b85SXin LI 311c9083b85SXin LI #ifdef MAKECRCH 312c9083b85SXin LI { 313cd882207SXin LI /* 314cd882207SXin LI The crc32.h header file contains tables for both 32-bit and 64-bit 315cd882207SXin LI z_word_t's, and so requires a 64-bit type be available. In that case, 316cd882207SXin LI z_word_t must be defined to be 64-bits. This code then also generates 317cd882207SXin LI and writes out the tables for the case that z_word_t is 32 bits. 318cd882207SXin LI */ 319cd882207SXin LI #if !defined(W) || W != 8 320cd882207SXin LI # error Need a 64-bit integer type in order to generate crc32.h. 321cd882207SXin LI #endif 322c9083b85SXin LI FILE *out; 323cd882207SXin LI int k, n; 324cd882207SXin LI z_crc_t ltl[8][256]; 325cd882207SXin LI z_word_t big[8][256]; 326c9083b85SXin LI 327c9083b85SXin LI out = fopen("crc32.h", "w"); 328c9083b85SXin LI if (out == NULL) return; 329cd882207SXin LI 330cd882207SXin LI /* write out little-endian CRC table to crc32.h */ 331cd882207SXin LI fprintf(out, 332cd882207SXin LI "/* crc32.h -- tables for rapid CRC calculation\n" 333cd882207SXin LI " * Generated automatically by crc32.c\n */\n" 334cd882207SXin LI "\n" 335cd882207SXin LI "local const z_crc_t FAR crc_table[] = {\n" 336cd882207SXin LI " "); 337cd882207SXin LI write_table(out, crc_table, 256); 338cd882207SXin LI fprintf(out, 339cd882207SXin LI "};\n"); 340cd882207SXin LI 341cd882207SXin LI /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ 342cd882207SXin LI fprintf(out, 343cd882207SXin LI "\n" 344cd882207SXin LI "#ifdef W\n" 345cd882207SXin LI "\n" 346cd882207SXin LI "#if W == 8\n" 347cd882207SXin LI "\n" 348cd882207SXin LI "local const z_word_t FAR crc_big_table[] = {\n" 349cd882207SXin LI " "); 350cd882207SXin LI write_table64(out, crc_big_table, 256); 351cd882207SXin LI fprintf(out, 352cd882207SXin LI "};\n"); 353cd882207SXin LI 354cd882207SXin LI /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ 355cd882207SXin LI fprintf(out, 356cd882207SXin LI "\n" 357cd882207SXin LI "#else /* W == 4 */\n" 358cd882207SXin LI "\n" 359cd882207SXin LI "local const z_word_t FAR crc_big_table[] = {\n" 360cd882207SXin LI " "); 361cd882207SXin LI write_table32hi(out, crc_big_table, 256); 362cd882207SXin LI fprintf(out, 363cd882207SXin LI "};\n" 364cd882207SXin LI "\n" 365cd882207SXin LI "#endif\n"); 366cd882207SXin LI 367cd882207SXin LI /* write out braid tables for each value of N */ 368cd882207SXin LI for (n = 1; n <= 6; n++) { 369cd882207SXin LI fprintf(out, 370cd882207SXin LI "\n" 371cd882207SXin LI "#if N == %d\n", n); 372cd882207SXin LI 373cd882207SXin LI /* compute braid tables for this N and 64-bit word_t */ 374cd882207SXin LI braid(ltl, big, n, 8); 375cd882207SXin LI 376cd882207SXin LI /* write out braid tables for 64-bit z_word_t to crc32.h */ 377cd882207SXin LI fprintf(out, 378cd882207SXin LI "\n" 379cd882207SXin LI "#if W == 8\n" 380cd882207SXin LI "\n" 381cd882207SXin LI "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 382cd882207SXin LI for (k = 0; k < 8; k++) { 383cd882207SXin LI fprintf(out, " {"); 384cd882207SXin LI write_table(out, ltl[k], 256); 385cd882207SXin LI fprintf(out, "}%s", k < 7 ? ",\n" : ""); 386c9083b85SXin LI } 387cd882207SXin LI fprintf(out, 388cd882207SXin LI "};\n" 389cd882207SXin LI "\n" 390cd882207SXin LI "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 391cd882207SXin LI for (k = 0; k < 8; k++) { 392cd882207SXin LI fprintf(out, " {"); 393cd882207SXin LI write_table64(out, big[k], 256); 394cd882207SXin LI fprintf(out, "}%s", k < 7 ? ",\n" : ""); 395cd882207SXin LI } 396cd882207SXin LI fprintf(out, 397cd882207SXin LI "};\n"); 398cd882207SXin LI 399cd882207SXin LI /* compute braid tables for this N and 32-bit word_t */ 400cd882207SXin LI braid(ltl, big, n, 4); 401cd882207SXin LI 402cd882207SXin LI /* write out braid tables for 32-bit z_word_t to crc32.h */ 403cd882207SXin LI fprintf(out, 404cd882207SXin LI "\n" 405cd882207SXin LI "#else /* W == 4 */\n" 406cd882207SXin LI "\n" 407cd882207SXin LI "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 408cd882207SXin LI for (k = 0; k < 4; k++) { 409cd882207SXin LI fprintf(out, " {"); 410cd882207SXin LI write_table(out, ltl[k], 256); 411cd882207SXin LI fprintf(out, "}%s", k < 3 ? ",\n" : ""); 412cd882207SXin LI } 413cd882207SXin LI fprintf(out, 414cd882207SXin LI "};\n" 415cd882207SXin LI "\n" 416cd882207SXin LI "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 417cd882207SXin LI for (k = 0; k < 4; k++) { 418cd882207SXin LI fprintf(out, " {"); 419cd882207SXin LI write_table32hi(out, big[k], 256); 420cd882207SXin LI fprintf(out, "}%s", k < 3 ? ",\n" : ""); 421cd882207SXin LI } 422cd882207SXin LI fprintf(out, 423cd882207SXin LI "};\n" 424cd882207SXin LI "\n" 425cd882207SXin LI "#endif\n" 426cd882207SXin LI "\n" 427cd882207SXin LI "#endif\n"); 428cd882207SXin LI } 429cd882207SXin LI fprintf(out, 430cd882207SXin LI "\n" 431cd882207SXin LI "#endif\n"); 432cd882207SXin LI 433cd882207SXin LI /* write out zeros operator table to crc32.h */ 434cd882207SXin LI fprintf(out, 435cd882207SXin LI "\n" 436cd882207SXin LI "local const z_crc_t FAR x2n_table[] = {\n" 437cd882207SXin LI " "); 438cd882207SXin LI write_table(out, x2n_table, 32); 439cd882207SXin LI fprintf(out, 440cd882207SXin LI "};\n"); 441c9083b85SXin LI fclose(out); 442c9083b85SXin LI } 443c9083b85SXin LI #endif /* MAKECRCH */ 444c9083b85SXin LI } 445c9083b85SXin LI 446c9083b85SXin LI #ifdef MAKECRCH 447cd882207SXin LI 448cd882207SXin LI /* 449cd882207SXin LI Write the 32-bit values in table[0..k-1] to out, five per line in 450cd882207SXin LI hexadecimal separated by commas. 451cd882207SXin LI */ 452cd882207SXin LI local void write_table(out, table, k) 453c9083b85SXin LI FILE *out; 454c9083b85SXin LI const z_crc_t FAR *table; 455cd882207SXin LI int k; 456c9083b85SXin LI { 457c9083b85SXin LI int n; 458c9083b85SXin LI 459cd882207SXin LI for (n = 0; n < k; n++) 460cd882207SXin LI fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 461c9083b85SXin LI (unsigned long)(table[n]), 462cd882207SXin LI n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 463c9083b85SXin LI } 464cd882207SXin LI 465cd882207SXin LI /* 466cd882207SXin LI Write the high 32-bits of each value in table[0..k-1] to out, five per line 467cd882207SXin LI in hexadecimal separated by commas. 468cd882207SXin LI */ 469cd882207SXin LI local void write_table32hi(out, table, k) 470cd882207SXin LI FILE *out; 471cd882207SXin LI const z_word_t FAR *table; 472cd882207SXin LI int k; 473cd882207SXin LI { 474cd882207SXin LI int n; 475cd882207SXin LI 476cd882207SXin LI for (n = 0; n < k; n++) 477cd882207SXin LI fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 478cd882207SXin LI (unsigned long)(table[n] >> 32), 479cd882207SXin LI n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 480cd882207SXin LI } 481cd882207SXin LI 482cd882207SXin LI /* 483cd882207SXin LI Write the 64-bit values in table[0..k-1] to out, three per line in 484cd882207SXin LI hexadecimal separated by commas. This assumes that if there is a 64-bit 485cd882207SXin LI type, then there is also a long long integer type, and it is at least 64 486cd882207SXin LI bits. If not, then the type cast and format string can be adjusted 487cd882207SXin LI accordingly. 488cd882207SXin LI */ 489cd882207SXin LI local void write_table64(out, table, k) 490cd882207SXin LI FILE *out; 491cd882207SXin LI const z_word_t FAR *table; 492cd882207SXin LI int k; 493cd882207SXin LI { 494cd882207SXin LI int n; 495cd882207SXin LI 496cd882207SXin LI for (n = 0; n < k; n++) 497cd882207SXin LI fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", 498cd882207SXin LI (unsigned long long)(table[n]), 499cd882207SXin LI n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); 500cd882207SXin LI } 501cd882207SXin LI 502cd882207SXin LI /* Actually do the deed. */ 503cd882207SXin LI int main() 504cd882207SXin LI { 505cd882207SXin LI make_crc_table(); 506cd882207SXin LI return 0; 507cd882207SXin LI } 508cd882207SXin LI 509c9083b85SXin LI #endif /* MAKECRCH */ 510c9083b85SXin LI 511cd882207SXin LI #ifdef W 512cd882207SXin LI /* 513cd882207SXin LI Generate the little and big-endian braid tables for the given n and z_word_t 514cd882207SXin LI size w. Each array must have room for w blocks of 256 elements. 515cd882207SXin LI */ 516cd882207SXin LI local void braid(ltl, big, n, w) 517cd882207SXin LI z_crc_t ltl[][256]; 518cd882207SXin LI z_word_t big[][256]; 519cd882207SXin LI int n; 520cd882207SXin LI int w; 521cd882207SXin LI { 522cd882207SXin LI int k; 523cd882207SXin LI z_crc_t i, p, q; 524cd882207SXin LI for (k = 0; k < w; k++) { 525cd882207SXin LI p = x2nmodp((n * w + 3 - k) << 3, 0); 526cd882207SXin LI ltl[k][0] = 0; 527cd882207SXin LI big[w - 1 - k][0] = 0; 528cd882207SXin LI for (i = 1; i < 256; i++) { 529cd882207SXin LI ltl[k][i] = q = multmodp(i << 24, p); 530cd882207SXin LI big[w - 1 - k][i] = byte_swap(q); 531cd882207SXin LI } 532cd882207SXin LI } 533cd882207SXin LI } 534cd882207SXin LI #endif 535cd882207SXin LI 536c9083b85SXin LI #else /* !DYNAMIC_CRC_TABLE */ 537c9083b85SXin LI /* ======================================================================== 538cd882207SXin LI * Tables for byte-wise and braided CRC-32 calculations, and a table of powers 539cd882207SXin LI * of x for combining CRC-32s, all made by make_crc_table(). 540c9083b85SXin LI */ 541c9083b85SXin LI #include "crc32.h" 542c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */ 543c9083b85SXin LI 544cd882207SXin LI /* ======================================================================== 545cd882207SXin LI * Routines used for CRC calculation. Some are also required for the table 546cd882207SXin LI * generation above. 547cd882207SXin LI */ 548cd882207SXin LI 549cd882207SXin LI /* 550cd882207SXin LI Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, 551cd882207SXin LI reflected. For speed, this requires that a not be zero. 552cd882207SXin LI */ 553cd882207SXin LI local z_crc_t multmodp(a, b) 554cd882207SXin LI z_crc_t a; 555cd882207SXin LI z_crc_t b; 556cd882207SXin LI { 557cd882207SXin LI z_crc_t m, p; 558cd882207SXin LI 559cd882207SXin LI m = (z_crc_t)1 << 31; 560cd882207SXin LI p = 0; 561cd882207SXin LI for (;;) { 562cd882207SXin LI if (a & m) { 563cd882207SXin LI p ^= b; 564cd882207SXin LI if ((a & (m - 1)) == 0) 565cd882207SXin LI break; 566cd882207SXin LI } 567cd882207SXin LI m >>= 1; 568cd882207SXin LI b = b & 1 ? (b >> 1) ^ POLY : b >> 1; 569cd882207SXin LI } 570cd882207SXin LI return p; 571cd882207SXin LI } 572cd882207SXin LI 573cd882207SXin LI /* 574cd882207SXin LI Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been 575cd882207SXin LI initialized. 576cd882207SXin LI */ 577cd882207SXin LI local z_crc_t x2nmodp(n, k) 578cd882207SXin LI z_off64_t n; 579cd882207SXin LI unsigned k; 580cd882207SXin LI { 581cd882207SXin LI z_crc_t p; 582cd882207SXin LI 583cd882207SXin LI p = (z_crc_t)1 << 31; /* x^0 == 1 */ 584cd882207SXin LI while (n) { 585cd882207SXin LI if (n & 1) 586cd882207SXin LI p = multmodp(x2n_table[k & 31], p); 587cd882207SXin LI n >>= 1; 588cd882207SXin LI k++; 589cd882207SXin LI } 590cd882207SXin LI return p; 591cd882207SXin LI } 592cd882207SXin LI 593c9083b85SXin LI /* ========================================================================= 594cd882207SXin LI * This function can be used by asm versions of crc32(), and to force the 595cd882207SXin LI * generation of the CRC tables in a threaded application. 596c9083b85SXin LI */ 597c9083b85SXin LI const z_crc_t FAR * ZEXPORT get_crc_table() 598c9083b85SXin LI { 599c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE 600cd882207SXin LI once(&made, make_crc_table); 601c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */ 602c9083b85SXin LI return (const z_crc_t FAR *)crc_table; 603c9083b85SXin LI } 604c9083b85SXin LI 605cd882207SXin LI /* ========================================================================= 606cd882207SXin LI * Use ARM machine instructions if available. This will compute the CRC about 607cd882207SXin LI * ten times faster than the braided calculation. This code does not check for 608cd882207SXin LI * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will 609cd882207SXin LI * only be defined if the compilation specifies an ARM processor architecture 610cd882207SXin LI * that has the instructions. For example, compiling with -march=armv8.1-a or 611cd882207SXin LI * -march=armv8-a+crc, or -march=native if the compile machine has the crc32 612cd882207SXin LI * instructions. 613cd882207SXin LI */ 614cd882207SXin LI #ifdef ARMCRC32 615cd882207SXin LI 616cd882207SXin LI /* 617cd882207SXin LI Constants empirically determined to maximize speed. These values are from 618cd882207SXin LI measurements on a Cortex-A57. Your mileage may vary. 619cd882207SXin LI */ 620cd882207SXin LI #define Z_BATCH 3990 /* number of words in a batch */ 621cd882207SXin LI #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ 622cd882207SXin LI #define Z_BATCH_MIN 800 /* fewest words in a final batch */ 623cd882207SXin LI 624cd882207SXin LI unsigned long ZEXPORT crc32_z(crc, buf, len) 625cd882207SXin LI unsigned long crc; 626cd882207SXin LI const unsigned char FAR *buf; 627cd882207SXin LI z_size_t len; 628cd882207SXin LI { 629cd882207SXin LI z_crc_t val; 630cd882207SXin LI z_word_t crc1, crc2; 631cd882207SXin LI const z_word_t *word; 632cd882207SXin LI z_word_t val0, val1, val2; 633cd882207SXin LI z_size_t last, last2, i; 634cd882207SXin LI z_size_t num; 635cd882207SXin LI 636cd882207SXin LI /* Return initial CRC, if requested. */ 637cd882207SXin LI if (buf == Z_NULL) return 0; 638cd882207SXin LI 639cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE 640cd882207SXin LI once(&made, make_crc_table); 641cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */ 642cd882207SXin LI 643cd882207SXin LI /* Pre-condition the CRC */ 644c61bc111SXin LI crc = (~crc) & 0xffffffff; 645cd882207SXin LI 646cd882207SXin LI /* Compute the CRC up to a word boundary. */ 647cd882207SXin LI while (len && ((z_size_t)buf & 7) != 0) { 648cd882207SXin LI len--; 649cd882207SXin LI val = *buf++; 650cd882207SXin LI __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 651cd882207SXin LI } 652cd882207SXin LI 653cd882207SXin LI /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ 654cd882207SXin LI word = (z_word_t const *)buf; 655cd882207SXin LI num = len >> 3; 656cd882207SXin LI len &= 7; 657cd882207SXin LI 658cd882207SXin LI /* Do three interleaved CRCs to realize the throughput of one crc32x 659*e37bb444SXin LI instruction per cycle. Each CRC is calculated on Z_BATCH words. The 660*e37bb444SXin LI three CRCs are combined into a single CRC after each set of batches. */ 661cd882207SXin LI while (num >= 3 * Z_BATCH) { 662cd882207SXin LI crc1 = 0; 663cd882207SXin LI crc2 = 0; 664cd882207SXin LI for (i = 0; i < Z_BATCH; i++) { 665cd882207SXin LI val0 = word[i]; 666cd882207SXin LI val1 = word[i + Z_BATCH]; 667cd882207SXin LI val2 = word[i + 2 * Z_BATCH]; 668cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 669cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 670cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 671cd882207SXin LI } 672cd882207SXin LI word += 3 * Z_BATCH; 673cd882207SXin LI num -= 3 * Z_BATCH; 674cd882207SXin LI crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; 675cd882207SXin LI crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; 676cd882207SXin LI } 677cd882207SXin LI 678cd882207SXin LI /* Do one last smaller batch with the remaining words, if there are enough 679cd882207SXin LI to pay for the combination of CRCs. */ 680cd882207SXin LI last = num / 3; 681cd882207SXin LI if (last >= Z_BATCH_MIN) { 682cd882207SXin LI last2 = last << 1; 683cd882207SXin LI crc1 = 0; 684cd882207SXin LI crc2 = 0; 685cd882207SXin LI for (i = 0; i < last; i++) { 686cd882207SXin LI val0 = word[i]; 687cd882207SXin LI val1 = word[i + last]; 688cd882207SXin LI val2 = word[i + last2]; 689cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 690cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 691cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 692cd882207SXin LI } 693cd882207SXin LI word += 3 * last; 694cd882207SXin LI num -= 3 * last; 695cd882207SXin LI val = x2nmodp(last, 6); 696cd882207SXin LI crc = multmodp(val, crc) ^ crc1; 697cd882207SXin LI crc = multmodp(val, crc) ^ crc2; 698cd882207SXin LI } 699cd882207SXin LI 700cd882207SXin LI /* Compute the CRC on any remaining words. */ 701cd882207SXin LI for (i = 0; i < num; i++) { 702cd882207SXin LI val0 = word[i]; 703cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 704cd882207SXin LI } 705cd882207SXin LI word += num; 706cd882207SXin LI 707cd882207SXin LI /* Complete the CRC on any remaining bytes. */ 708cd882207SXin LI buf = (const unsigned char FAR *)word; 709cd882207SXin LI while (len) { 710cd882207SXin LI len--; 711cd882207SXin LI val = *buf++; 712cd882207SXin LI __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 713cd882207SXin LI } 714cd882207SXin LI 715cd882207SXin LI /* Return the CRC, post-conditioned. */ 716cd882207SXin LI return crc ^ 0xffffffff; 717cd882207SXin LI } 718cd882207SXin LI 719cd882207SXin LI #else 720cd882207SXin LI 721cd882207SXin LI #ifdef W 722cd882207SXin LI 723cd882207SXin LI /* 724cd882207SXin LI Return the CRC of the W bytes in the word_t data, taking the 725cd882207SXin LI least-significant byte of the word as the first byte of data, without any pre 726cd882207SXin LI or post conditioning. This is used to combine the CRCs of each braid. 727cd882207SXin LI */ 7287550e242SJohn Baldwin local z_crc_t crc_word OF((z_word_t data)); 7297550e242SJohn Baldwin 730cd882207SXin LI local z_crc_t crc_word(data) 731cd882207SXin LI z_word_t data; 732cd882207SXin LI { 733cd882207SXin LI int k; 734cd882207SXin LI for (k = 0; k < W; k++) 735cd882207SXin LI data = (data >> 8) ^ crc_table[data & 0xff]; 736cd882207SXin LI return (z_crc_t)data; 737cd882207SXin LI } 738cd882207SXin LI 7397550e242SJohn Baldwin local z_word_t crc_word_big OF((z_word_t data)); 7407550e242SJohn Baldwin 741cd882207SXin LI local z_word_t crc_word_big(data) 742cd882207SXin LI z_word_t data; 743cd882207SXin LI { 744cd882207SXin LI int k; 745cd882207SXin LI for (k = 0; k < W; k++) 746cd882207SXin LI data = (data << 8) ^ 747cd882207SXin LI crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; 748cd882207SXin LI return data; 749cd882207SXin LI } 750cd882207SXin LI 751cd882207SXin LI #endif 752c9083b85SXin LI 753c9083b85SXin LI /* ========================================================================= */ 754c9083b85SXin LI unsigned long ZEXPORT crc32_z(crc, buf, len) 755c9083b85SXin LI unsigned long crc; 756c9083b85SXin LI const unsigned char FAR *buf; 757c9083b85SXin LI z_size_t len; 758c9083b85SXin LI { 759cd882207SXin LI /* Return initial CRC, if requested. */ 760cd882207SXin LI if (buf == Z_NULL) return 0; 761c9083b85SXin LI 762c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE 763cd882207SXin LI once(&made, make_crc_table); 764c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */ 765c9083b85SXin LI 766cd882207SXin LI /* Pre-condition the CRC */ 767c61bc111SXin LI crc = (~crc) & 0xffffffff; 768c9083b85SXin LI 769cd882207SXin LI #ifdef W 770cd882207SXin LI 771cd882207SXin LI /* If provided enough bytes, do a braided CRC calculation. */ 772cd882207SXin LI if (len >= N * W + W - 1) { 773cd882207SXin LI z_size_t blks; 774cd882207SXin LI z_word_t const *words; 775cd882207SXin LI unsigned endian; 776cd882207SXin LI int k; 777cd882207SXin LI 778cd882207SXin LI /* Compute the CRC up to a z_word_t boundary. */ 779cd882207SXin LI while (len && ((z_size_t)buf & (W - 1)) != 0) { 780cd882207SXin LI len--; 781cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 782cd882207SXin LI } 783cd882207SXin LI 784cd882207SXin LI /* Compute the CRC on as many N z_word_t blocks as are available. */ 785cd882207SXin LI blks = len / (N * W); 786cd882207SXin LI len -= blks * N * W; 787cd882207SXin LI words = (z_word_t const *)buf; 788cd882207SXin LI 789cd882207SXin LI /* Do endian check at execution time instead of compile time, since ARM 790cd882207SXin LI processors can change the endianess at execution time. If the 791cd882207SXin LI compiler knows what the endianess will be, it can optimize out the 792cd882207SXin LI check and the unused branch. */ 793c9083b85SXin LI endian = 1; 794cd882207SXin LI if (*(unsigned char *)&endian) { 795cd882207SXin LI /* Little endian. */ 796cd882207SXin LI 797cd882207SXin LI z_crc_t crc0; 798cd882207SXin LI z_word_t word0; 799cd882207SXin LI #if N > 1 800cd882207SXin LI z_crc_t crc1; 801cd882207SXin LI z_word_t word1; 802cd882207SXin LI #if N > 2 803cd882207SXin LI z_crc_t crc2; 804cd882207SXin LI z_word_t word2; 805cd882207SXin LI #if N > 3 806cd882207SXin LI z_crc_t crc3; 807cd882207SXin LI z_word_t word3; 808cd882207SXin LI #if N > 4 809cd882207SXin LI z_crc_t crc4; 810cd882207SXin LI z_word_t word4; 811cd882207SXin LI #if N > 5 812cd882207SXin LI z_crc_t crc5; 813cd882207SXin LI z_word_t word5; 814cd882207SXin LI #endif 815cd882207SXin LI #endif 816cd882207SXin LI #endif 817cd882207SXin LI #endif 818cd882207SXin LI #endif 819cd882207SXin LI 820cd882207SXin LI /* Initialize the CRC for each braid. */ 821cd882207SXin LI crc0 = crc; 822cd882207SXin LI #if N > 1 823cd882207SXin LI crc1 = 0; 824cd882207SXin LI #if N > 2 825cd882207SXin LI crc2 = 0; 826cd882207SXin LI #if N > 3 827cd882207SXin LI crc3 = 0; 828cd882207SXin LI #if N > 4 829cd882207SXin LI crc4 = 0; 830cd882207SXin LI #if N > 5 831cd882207SXin LI crc5 = 0; 832cd882207SXin LI #endif 833cd882207SXin LI #endif 834cd882207SXin LI #endif 835cd882207SXin LI #endif 836cd882207SXin LI #endif 837cd882207SXin LI 838cd882207SXin LI /* 839cd882207SXin LI Process the first blks-1 blocks, computing the CRCs on each braid 840cd882207SXin LI independently. 841cd882207SXin LI */ 842cd882207SXin LI while (--blks) { 843cd882207SXin LI /* Load the word for each braid into registers. */ 844cd882207SXin LI word0 = crc0 ^ words[0]; 845cd882207SXin LI #if N > 1 846cd882207SXin LI word1 = crc1 ^ words[1]; 847cd882207SXin LI #if N > 2 848cd882207SXin LI word2 = crc2 ^ words[2]; 849cd882207SXin LI #if N > 3 850cd882207SXin LI word3 = crc3 ^ words[3]; 851cd882207SXin LI #if N > 4 852cd882207SXin LI word4 = crc4 ^ words[4]; 853cd882207SXin LI #if N > 5 854cd882207SXin LI word5 = crc5 ^ words[5]; 855cd882207SXin LI #endif 856cd882207SXin LI #endif 857cd882207SXin LI #endif 858cd882207SXin LI #endif 859cd882207SXin LI #endif 860cd882207SXin LI words += N; 861cd882207SXin LI 862cd882207SXin LI /* Compute and update the CRC for each word. The loop should 863cd882207SXin LI get unrolled. */ 864cd882207SXin LI crc0 = crc_braid_table[0][word0 & 0xff]; 865cd882207SXin LI #if N > 1 866cd882207SXin LI crc1 = crc_braid_table[0][word1 & 0xff]; 867cd882207SXin LI #if N > 2 868cd882207SXin LI crc2 = crc_braid_table[0][word2 & 0xff]; 869cd882207SXin LI #if N > 3 870cd882207SXin LI crc3 = crc_braid_table[0][word3 & 0xff]; 871cd882207SXin LI #if N > 4 872cd882207SXin LI crc4 = crc_braid_table[0][word4 & 0xff]; 873cd882207SXin LI #if N > 5 874cd882207SXin LI crc5 = crc_braid_table[0][word5 & 0xff]; 875cd882207SXin LI #endif 876cd882207SXin LI #endif 877cd882207SXin LI #endif 878cd882207SXin LI #endif 879cd882207SXin LI #endif 880cd882207SXin LI for (k = 1; k < W; k++) { 881cd882207SXin LI crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; 882cd882207SXin LI #if N > 1 883cd882207SXin LI crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; 884cd882207SXin LI #if N > 2 885cd882207SXin LI crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; 886cd882207SXin LI #if N > 3 887cd882207SXin LI crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; 888cd882207SXin LI #if N > 4 889cd882207SXin LI crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; 890cd882207SXin LI #if N > 5 891cd882207SXin LI crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; 892cd882207SXin LI #endif 893cd882207SXin LI #endif 894cd882207SXin LI #endif 895cd882207SXin LI #endif 896cd882207SXin LI #endif 897c9083b85SXin LI } 898cd882207SXin LI } 899cd882207SXin LI 900cd882207SXin LI /* 901cd882207SXin LI Process the last block, combining the CRCs of the N braids at the 902cd882207SXin LI same time. 903cd882207SXin LI */ 904cd882207SXin LI crc = crc_word(crc0 ^ words[0]); 905cd882207SXin LI #if N > 1 906cd882207SXin LI crc = crc_word(crc1 ^ words[1] ^ crc); 907cd882207SXin LI #if N > 2 908cd882207SXin LI crc = crc_word(crc2 ^ words[2] ^ crc); 909cd882207SXin LI #if N > 3 910cd882207SXin LI crc = crc_word(crc3 ^ words[3] ^ crc); 911cd882207SXin LI #if N > 4 912cd882207SXin LI crc = crc_word(crc4 ^ words[4] ^ crc); 913cd882207SXin LI #if N > 5 914cd882207SXin LI crc = crc_word(crc5 ^ words[5] ^ crc); 915cd882207SXin LI #endif 916cd882207SXin LI #endif 917cd882207SXin LI #endif 918cd882207SXin LI #endif 919cd882207SXin LI #endif 920cd882207SXin LI words += N; 921cd882207SXin LI } 922cd882207SXin LI else { 923cd882207SXin LI /* Big endian. */ 924cd882207SXin LI 925cd882207SXin LI z_word_t crc0, word0, comb; 926cd882207SXin LI #if N > 1 927cd882207SXin LI z_word_t crc1, word1; 928cd882207SXin LI #if N > 2 929cd882207SXin LI z_word_t crc2, word2; 930cd882207SXin LI #if N > 3 931cd882207SXin LI z_word_t crc3, word3; 932cd882207SXin LI #if N > 4 933cd882207SXin LI z_word_t crc4, word4; 934cd882207SXin LI #if N > 5 935cd882207SXin LI z_word_t crc5, word5; 936cd882207SXin LI #endif 937cd882207SXin LI #endif 938cd882207SXin LI #endif 939cd882207SXin LI #endif 940cd882207SXin LI #endif 941cd882207SXin LI 942cd882207SXin LI /* Initialize the CRC for each braid. */ 943cd882207SXin LI crc0 = byte_swap(crc); 944cd882207SXin LI #if N > 1 945cd882207SXin LI crc1 = 0; 946cd882207SXin LI #if N > 2 947cd882207SXin LI crc2 = 0; 948cd882207SXin LI #if N > 3 949cd882207SXin LI crc3 = 0; 950cd882207SXin LI #if N > 4 951cd882207SXin LI crc4 = 0; 952cd882207SXin LI #if N > 5 953cd882207SXin LI crc5 = 0; 954cd882207SXin LI #endif 955cd882207SXin LI #endif 956cd882207SXin LI #endif 957cd882207SXin LI #endif 958cd882207SXin LI #endif 959cd882207SXin LI 960cd882207SXin LI /* 961cd882207SXin LI Process the first blks-1 blocks, computing the CRCs on each braid 962cd882207SXin LI independently. 963cd882207SXin LI */ 964cd882207SXin LI while (--blks) { 965cd882207SXin LI /* Load the word for each braid into registers. */ 966cd882207SXin LI word0 = crc0 ^ words[0]; 967cd882207SXin LI #if N > 1 968cd882207SXin LI word1 = crc1 ^ words[1]; 969cd882207SXin LI #if N > 2 970cd882207SXin LI word2 = crc2 ^ words[2]; 971cd882207SXin LI #if N > 3 972cd882207SXin LI word3 = crc3 ^ words[3]; 973cd882207SXin LI #if N > 4 974cd882207SXin LI word4 = crc4 ^ words[4]; 975cd882207SXin LI #if N > 5 976cd882207SXin LI word5 = crc5 ^ words[5]; 977cd882207SXin LI #endif 978cd882207SXin LI #endif 979cd882207SXin LI #endif 980cd882207SXin LI #endif 981cd882207SXin LI #endif 982cd882207SXin LI words += N; 983cd882207SXin LI 984cd882207SXin LI /* Compute and update the CRC for each word. The loop should 985cd882207SXin LI get unrolled. */ 986cd882207SXin LI crc0 = crc_braid_big_table[0][word0 & 0xff]; 987cd882207SXin LI #if N > 1 988cd882207SXin LI crc1 = crc_braid_big_table[0][word1 & 0xff]; 989cd882207SXin LI #if N > 2 990cd882207SXin LI crc2 = crc_braid_big_table[0][word2 & 0xff]; 991cd882207SXin LI #if N > 3 992cd882207SXin LI crc3 = crc_braid_big_table[0][word3 & 0xff]; 993cd882207SXin LI #if N > 4 994cd882207SXin LI crc4 = crc_braid_big_table[0][word4 & 0xff]; 995cd882207SXin LI #if N > 5 996cd882207SXin LI crc5 = crc_braid_big_table[0][word5 & 0xff]; 997cd882207SXin LI #endif 998cd882207SXin LI #endif 999cd882207SXin LI #endif 1000cd882207SXin LI #endif 1001cd882207SXin LI #endif 1002cd882207SXin LI for (k = 1; k < W; k++) { 1003cd882207SXin LI crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff]; 1004cd882207SXin LI #if N > 1 1005cd882207SXin LI crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff]; 1006cd882207SXin LI #if N > 2 1007cd882207SXin LI crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff]; 1008cd882207SXin LI #if N > 3 1009cd882207SXin LI crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff]; 1010cd882207SXin LI #if N > 4 1011cd882207SXin LI crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff]; 1012cd882207SXin LI #if N > 5 1013cd882207SXin LI crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff]; 1014cd882207SXin LI #endif 1015cd882207SXin LI #endif 1016cd882207SXin LI #endif 1017cd882207SXin LI #endif 1018cd882207SXin LI #endif 1019cd882207SXin LI } 1020cd882207SXin LI } 1021cd882207SXin LI 1022cd882207SXin LI /* 1023cd882207SXin LI Process the last block, combining the CRCs of the N braids at the 1024cd882207SXin LI same time. 1025cd882207SXin LI */ 1026cd882207SXin LI comb = crc_word_big(crc0 ^ words[0]); 1027cd882207SXin LI #if N > 1 1028cd882207SXin LI comb = crc_word_big(crc1 ^ words[1] ^ comb); 1029cd882207SXin LI #if N > 2 1030cd882207SXin LI comb = crc_word_big(crc2 ^ words[2] ^ comb); 1031cd882207SXin LI #if N > 3 1032cd882207SXin LI comb = crc_word_big(crc3 ^ words[3] ^ comb); 1033cd882207SXin LI #if N > 4 1034cd882207SXin LI comb = crc_word_big(crc4 ^ words[4] ^ comb); 1035cd882207SXin LI #if N > 5 1036cd882207SXin LI comb = crc_word_big(crc5 ^ words[5] ^ comb); 1037cd882207SXin LI #endif 1038cd882207SXin LI #endif 1039cd882207SXin LI #endif 1040cd882207SXin LI #endif 1041cd882207SXin LI #endif 1042cd882207SXin LI words += N; 1043cd882207SXin LI crc = byte_swap(comb); 1044cd882207SXin LI } 1045cd882207SXin LI 1046cd882207SXin LI /* 1047cd882207SXin LI Update the pointer to the remaining bytes to process. 1048cd882207SXin LI */ 1049cd882207SXin LI buf = (unsigned char const *)words; 1050cd882207SXin LI } 1051cd882207SXin LI 1052cd882207SXin LI #endif /* W */ 1053cd882207SXin LI 1054cd882207SXin LI /* Complete the computation of the CRC on any remaining bytes. */ 1055c9083b85SXin LI while (len >= 8) { 1056c9083b85SXin LI len -= 8; 1057cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1058cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1059cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1060cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1061cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1062cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1063cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1064cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1065c9083b85SXin LI } 1066cd882207SXin LI while (len) { 1067cd882207SXin LI len--; 1068cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1069c9083b85SXin LI } 1070c9083b85SXin LI 1071cd882207SXin LI /* Return the CRC, post-conditioned. */ 1072cd882207SXin LI return crc ^ 0xffffffff; 1073cd882207SXin LI } 1074cd882207SXin LI 1075cd882207SXin LI #endif 1076cd882207SXin LI 1077c9083b85SXin LI /* ========================================================================= */ 1078c9083b85SXin LI unsigned long ZEXPORT crc32(crc, buf, len) 1079c9083b85SXin LI unsigned long crc; 1080c9083b85SXin LI const unsigned char FAR *buf; 1081c9083b85SXin LI uInt len; 1082c9083b85SXin LI { 1083c9083b85SXin LI return crc32_z(crc, buf, len); 1084c9083b85SXin LI } 1085c9083b85SXin LI 1086c9083b85SXin LI /* ========================================================================= */ 1087cd882207SXin LI uLong ZEXPORT crc32_combine64(crc1, crc2, len2) 1088c9083b85SXin LI uLong crc1; 1089c9083b85SXin LI uLong crc2; 1090c9083b85SXin LI z_off64_t len2; 1091c9083b85SXin LI { 1092cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE 1093cd882207SXin LI once(&made, make_crc_table); 1094cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */ 1095c61bc111SXin LI return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff); 1096c9083b85SXin LI } 1097c9083b85SXin LI 1098c9083b85SXin LI /* ========================================================================= */ 1099c9083b85SXin LI uLong ZEXPORT crc32_combine(crc1, crc2, len2) 1100c9083b85SXin LI uLong crc1; 1101c9083b85SXin LI uLong crc2; 1102c9083b85SXin LI z_off_t len2; 1103c9083b85SXin LI { 1104*e37bb444SXin LI return crc32_combine64(crc1, crc2, (z_off64_t)len2); 1105c9083b85SXin LI } 1106c9083b85SXin LI 1107cd882207SXin LI /* ========================================================================= */ 1108cd882207SXin LI uLong ZEXPORT crc32_combine_gen64(len2) 1109c9083b85SXin LI z_off64_t len2; 1110c9083b85SXin LI { 1111cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE 1112cd882207SXin LI once(&made, make_crc_table); 1113cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */ 1114cd882207SXin LI return x2nmodp(len2, 3); 1115cd882207SXin LI } 1116cd882207SXin LI 1117cd882207SXin LI /* ========================================================================= */ 1118cd882207SXin LI uLong ZEXPORT crc32_combine_gen(len2) 1119cd882207SXin LI z_off_t len2; 1120cd882207SXin LI { 1121*e37bb444SXin LI return crc32_combine_gen64((z_off64_t)len2); 1122cd882207SXin LI } 1123cd882207SXin LI 1124cd882207SXin LI /* ========================================================================= */ 1125*e37bb444SXin LI uLong ZEXPORT crc32_combine_op(crc1, crc2, op) 1126cd882207SXin LI uLong crc1; 1127cd882207SXin LI uLong crc2; 1128cd882207SXin LI uLong op; 1129cd882207SXin LI { 1130c61bc111SXin LI return multmodp(op, crc1) ^ (crc2 & 0xffffffff); 1131c9083b85SXin LI } 1132