1c9083b85SXin LI /* crc32.c -- compute the CRC-32 of a data stream 2cd882207SXin LI * Copyright (C) 1995-2022 Mark Adler 3c9083b85SXin LI * For conditions of distribution and use, see copyright notice in zlib.h 4c9083b85SXin LI * 5cd882207SXin LI * This interleaved implementation of a CRC makes use of pipelined multiple 6cd882207SXin LI * arithmetic-logic units, commonly found in modern CPU cores. It is due to 7cd882207SXin LI * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. 8c9083b85SXin LI */ 9c9083b85SXin LI 10c9083b85SXin LI /* @(#) $Id$ */ 11c9083b85SXin LI 12c9083b85SXin LI /* 13c9083b85SXin LI Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore 14c9083b85SXin LI protection on the static variables used to control the first-use generation 15c9083b85SXin LI of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should 16c9083b85SXin LI first call get_crc_table() to initialize the tables before allowing more than 17c9083b85SXin LI one thread to use crc32(). 18c9083b85SXin LI 19cd882207SXin LI MAKECRCH can be #defined to write out crc32.h. A main() routine is also 20cd882207SXin LI produced, so that this one source file can be compiled to an executable. 21c9083b85SXin LI */ 22c9083b85SXin LI 23c9083b85SXin LI #ifdef MAKECRCH 24c9083b85SXin LI # include <stdio.h> 25c9083b85SXin LI # ifndef DYNAMIC_CRC_TABLE 26c9083b85SXin LI # define DYNAMIC_CRC_TABLE 27c9083b85SXin LI # endif /* !DYNAMIC_CRC_TABLE */ 28c9083b85SXin LI #endif /* MAKECRCH */ 29c9083b85SXin LI 30cd882207SXin LI #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ 31c9083b85SXin LI 32cd882207SXin LI /* 33cd882207SXin LI A CRC of a message is computed on N braids of words in the message, where 34cd882207SXin LI each word consists of W bytes (4 or 8). If N is 3, for example, then three 35cd882207SXin LI running sparse CRCs are calculated respectively on each braid, at these 36cd882207SXin LI indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... 37cd882207SXin LI This is done starting at a word boundary, and continues until as many blocks 38cd882207SXin LI of N * W bytes as are available have been processed. The results are combined 39cd882207SXin LI into a single CRC at the end. For this code, N must be in the range 1..6 and 40cd882207SXin LI W must be 4 or 8. The upper limit on N can be increased if desired by adding 41cd882207SXin LI more #if blocks, extending the patterns apparent in the code. In addition, 42cd882207SXin LI crc32.h would need to be regenerated, if the maximum N value is increased. 43cd882207SXin LI 44cd882207SXin LI N and W are chosen empirically by benchmarking the execution time on a given 45cd882207SXin LI processor. The choices for N and W below were based on testing on Intel Kaby 46cd882207SXin LI Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 47cd882207SXin LI Octeon II processors. The Intel, AMD, and ARM processors were all fastest 48cd882207SXin LI with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. 49cd882207SXin LI They were all tested with either gcc or clang, all using the -O3 optimization 50cd882207SXin LI level. Your mileage may vary. 51cd882207SXin LI */ 52cd882207SXin LI 53cd882207SXin LI /* Define N */ 54cd882207SXin LI #ifdef Z_TESTN 55cd882207SXin LI # define N Z_TESTN 56c9083b85SXin LI #else 57cd882207SXin LI # define N 5 58cd882207SXin LI #endif 59cd882207SXin LI #if N < 1 || N > 6 60cd882207SXin LI # error N must be in 1..6 61cd882207SXin LI #endif 62c9083b85SXin LI 63cd882207SXin LI /* 64cd882207SXin LI z_crc_t must be at least 32 bits. z_word_t must be at least as long as 65cd882207SXin LI z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and 66cd882207SXin LI that bytes are eight bits. 67cd882207SXin LI */ 68c9083b85SXin LI 69cd882207SXin LI /* 70cd882207SXin LI Define W and the associated z_word_t type. If W is not defined, then a 71cd882207SXin LI braided calculation is not used, and the associated tables and code are not 72cd882207SXin LI compiled. 73cd882207SXin LI */ 74cd882207SXin LI #ifdef Z_TESTW 75cd882207SXin LI # if Z_TESTW-1 != -1 76cd882207SXin LI # define W Z_TESTW 77cd882207SXin LI # endif 78cd882207SXin LI #else 79cd882207SXin LI # ifdef MAKECRCH 80cd882207SXin LI # define W 8 /* required for MAKECRCH */ 81cd882207SXin LI # else 82cd882207SXin LI # if defined(__x86_64__) || defined(__aarch64__) 83cd882207SXin LI # define W 8 84cd882207SXin LI # else 85cd882207SXin LI # define W 4 86cd882207SXin LI # endif 87cd882207SXin LI # endif 88cd882207SXin LI #endif 89cd882207SXin LI #ifdef W 90cd882207SXin LI # if W == 8 && defined(Z_U8) 91cd882207SXin LI typedef Z_U8 z_word_t; 92cd882207SXin LI # elif defined(Z_U4) 93cd882207SXin LI # undef W 94cd882207SXin LI # define W 4 95cd882207SXin LI typedef Z_U4 z_word_t; 96cd882207SXin LI # else 97cd882207SXin LI # undef W 98cd882207SXin LI # endif 99cd882207SXin LI #endif 100cd882207SXin LI 101cd882207SXin LI /* Local functions. */ 102cd882207SXin LI local z_crc_t multmodp OF((z_crc_t a, z_crc_t b)); 103cd882207SXin LI local z_crc_t x2nmodp OF((z_off64_t n, unsigned k)); 104cd882207SXin LI 105cd882207SXin LI /* If available, use the ARM processor CRC32 instruction. */ 106cd882207SXin LI #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 107cd882207SXin LI # define ARMCRC32 108cd882207SXin LI #endif 109cd882207SXin LI 110cd882207SXin LI #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) 111cd882207SXin LI /* 112cd882207SXin LI Swap the bytes in a z_word_t to convert between little and big endian. Any 113cd882207SXin LI self-respecting compiler will optimize this to a single machine byte-swap 114cd882207SXin LI instruction, if one is available. This assumes that word_t is either 32 bits 115cd882207SXin LI or 64 bits. 116cd882207SXin LI */ 117cd882207SXin LI local z_word_t byte_swap(word) 118cd882207SXin LI z_word_t word; 119cd882207SXin LI { 120cd882207SXin LI # if W == 8 121cd882207SXin LI return 122cd882207SXin LI (word & 0xff00000000000000) >> 56 | 123cd882207SXin LI (word & 0xff000000000000) >> 40 | 124cd882207SXin LI (word & 0xff0000000000) >> 24 | 125cd882207SXin LI (word & 0xff00000000) >> 8 | 126cd882207SXin LI (word & 0xff000000) << 8 | 127cd882207SXin LI (word & 0xff0000) << 24 | 128cd882207SXin LI (word & 0xff00) << 40 | 129cd882207SXin LI (word & 0xff) << 56; 130cd882207SXin LI # else /* W == 4 */ 131cd882207SXin LI return 132cd882207SXin LI (word & 0xff000000) >> 24 | 133cd882207SXin LI (word & 0xff0000) >> 8 | 134cd882207SXin LI (word & 0xff00) << 8 | 135cd882207SXin LI (word & 0xff) << 24; 136cd882207SXin LI # endif 137cd882207SXin LI } 138cd882207SXin LI #endif 139cd882207SXin LI 140cd882207SXin LI /* CRC polynomial. */ 141cd882207SXin LI #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ 142c9083b85SXin LI 143c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE 144c9083b85SXin LI 145cd882207SXin LI local z_crc_t FAR crc_table[256]; 146cd882207SXin LI local z_crc_t FAR x2n_table[32]; 147c9083b85SXin LI local void make_crc_table OF((void)); 148cd882207SXin LI #ifdef W 149cd882207SXin LI local z_word_t FAR crc_big_table[256]; 150cd882207SXin LI local z_crc_t FAR crc_braid_table[W][256]; 151cd882207SXin LI local z_word_t FAR crc_braid_big_table[W][256]; 152cd882207SXin LI local void braid OF((z_crc_t [][256], z_word_t [][256], int, int)); 153cd882207SXin LI #endif 154c9083b85SXin LI #ifdef MAKECRCH 155cd882207SXin LI local void write_table OF((FILE *, const z_crc_t FAR *, int)); 156cd882207SXin LI local void write_table32hi OF((FILE *, const z_word_t FAR *, int)); 157cd882207SXin LI local void write_table64 OF((FILE *, const z_word_t FAR *, int)); 158c9083b85SXin LI #endif /* MAKECRCH */ 159cd882207SXin LI 160cd882207SXin LI /* 161cd882207SXin LI Define a once() function depending on the availability of atomics. If this is 162cd882207SXin LI compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in 163cd882207SXin LI multiple threads, and if atomics are not available, then get_crc_table() must 164cd882207SXin LI be called to initialize the tables and must return before any threads are 165cd882207SXin LI allowed to compute or combine CRCs. 166cd882207SXin LI */ 167cd882207SXin LI 168cd882207SXin LI /* Definition of once functionality. */ 169cd882207SXin LI typedef struct once_s once_t; 170cd882207SXin LI local void once OF((once_t *, void (*)(void))); 171cd882207SXin LI 172cd882207SXin LI /* Check for the availability of atomics. */ 173cd882207SXin LI #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \ 174cd882207SXin LI !defined(__STDC_NO_ATOMICS__) 175cd882207SXin LI 176cd882207SXin LI #include <stdatomic.h> 177cd882207SXin LI 178cd882207SXin LI /* Structure for once(), which must be initialized with ONCE_INIT. */ 179cd882207SXin LI struct once_s { 180cd882207SXin LI atomic_flag begun; 181cd882207SXin LI atomic_int done; 182cd882207SXin LI }; 183cd882207SXin LI #define ONCE_INIT {ATOMIC_FLAG_INIT, 0} 184cd882207SXin LI 185cd882207SXin LI /* 186cd882207SXin LI Run the provided init() function exactly once, even if multiple threads 187cd882207SXin LI invoke once() at the same time. The state must be a once_t initialized with 188cd882207SXin LI ONCE_INIT. 189cd882207SXin LI */ 190cd882207SXin LI local void once(state, init) 191cd882207SXin LI once_t *state; 192cd882207SXin LI void (*init)(void); 193cd882207SXin LI { 194cd882207SXin LI if (!atomic_load(&state->done)) { 195cd882207SXin LI if (atomic_flag_test_and_set(&state->begun)) 196cd882207SXin LI while (!atomic_load(&state->done)) 197cd882207SXin LI ; 198cd882207SXin LI else { 199cd882207SXin LI init(); 200cd882207SXin LI atomic_store(&state->done, 1); 201cd882207SXin LI } 202cd882207SXin LI } 203cd882207SXin LI } 204cd882207SXin LI 205cd882207SXin LI #else /* no atomics */ 206cd882207SXin LI 207cd882207SXin LI /* Structure for once(), which must be initialized with ONCE_INIT. */ 208cd882207SXin LI struct once_s { 209cd882207SXin LI volatile int begun; 210cd882207SXin LI volatile int done; 211cd882207SXin LI }; 212cd882207SXin LI #define ONCE_INIT {0, 0} 213cd882207SXin LI 214cd882207SXin LI /* Test and set. Alas, not atomic, but tries to minimize the period of 215cd882207SXin LI vulnerability. */ 216cd882207SXin LI local int test_and_set OF((int volatile *)); 217cd882207SXin LI local int test_and_set(flag) 218cd882207SXin LI int volatile *flag; 219cd882207SXin LI { 220cd882207SXin LI int was; 221cd882207SXin LI 222cd882207SXin LI was = *flag; 223cd882207SXin LI *flag = 1; 224cd882207SXin LI return was; 225cd882207SXin LI } 226cd882207SXin LI 227cd882207SXin LI /* Run the provided init() function once. This is not thread-safe. */ 228cd882207SXin LI local void once(state, init) 229cd882207SXin LI once_t *state; 230cd882207SXin LI void (*init)(void); 231cd882207SXin LI { 232cd882207SXin LI if (!state->done) { 233cd882207SXin LI if (test_and_set(&state->begun)) 234cd882207SXin LI while (!state->done) 235cd882207SXin LI ; 236cd882207SXin LI else { 237cd882207SXin LI init(); 238cd882207SXin LI state->done = 1; 239cd882207SXin LI } 240cd882207SXin LI } 241cd882207SXin LI } 242cd882207SXin LI 243cd882207SXin LI #endif 244cd882207SXin LI 245cd882207SXin LI /* State for once(). */ 246cd882207SXin LI local once_t made = ONCE_INIT; 247cd882207SXin LI 248c9083b85SXin LI /* 249c9083b85SXin LI Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: 250c9083b85SXin LI x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. 251c9083b85SXin LI 252c9083b85SXin LI Polynomials over GF(2) are represented in binary, one bit per coefficient, 253c9083b85SXin LI with the lowest powers in the most significant bit. Then adding polynomials 254c9083b85SXin LI is just exclusive-or, and multiplying a polynomial by x is a right shift by 255c9083b85SXin LI one. If we call the above polynomial p, and represent a byte as the 256c9083b85SXin LI polynomial q, also with the lowest power in the most significant bit (so the 257cd882207SXin LI byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, 258c9083b85SXin LI where a mod b means the remainder after dividing a by b. 259c9083b85SXin LI 260c9083b85SXin LI This calculation is done using the shift-register method of multiplying and 261c9083b85SXin LI taking the remainder. The register is initialized to zero, and for each 262c9083b85SXin LI incoming bit, x^32 is added mod p to the register if the bit is a one (where 263cd882207SXin LI x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x 264cd882207SXin LI (which is shifting right by one and adding x^32 mod p if the bit shifted out 265cd882207SXin LI is a one). We start with the highest power (least significant bit) of q and 266cd882207SXin LI repeat for all eight bits of q. 267c9083b85SXin LI 268cd882207SXin LI The table is simply the CRC of all possible eight bit values. This is all the 269cd882207SXin LI information needed to generate CRCs on data a byte at a time for all 270cd882207SXin LI combinations of CRC register values and incoming bytes. 271c9083b85SXin LI */ 272cd882207SXin LI 273c9083b85SXin LI local void make_crc_table() 274c9083b85SXin LI { 275cd882207SXin LI unsigned i, j, n; 276cd882207SXin LI z_crc_t p; 277c9083b85SXin LI 278cd882207SXin LI /* initialize the CRC of bytes tables */ 279cd882207SXin LI for (i = 0; i < 256; i++) { 280cd882207SXin LI p = i; 281cd882207SXin LI for (j = 0; j < 8; j++) 282cd882207SXin LI p = p & 1 ? (p >> 1) ^ POLY : p >> 1; 283cd882207SXin LI crc_table[i] = p; 284cd882207SXin LI #ifdef W 285cd882207SXin LI crc_big_table[i] = byte_swap(p); 286cd882207SXin LI #endif 287c9083b85SXin LI } 288c9083b85SXin LI 289cd882207SXin LI /* initialize the x^2^n mod p(x) table */ 290cd882207SXin LI p = (z_crc_t)1 << 30; /* x^1 */ 291cd882207SXin LI x2n_table[0] = p; 292cd882207SXin LI for (n = 1; n < 32; n++) 293cd882207SXin LI x2n_table[n] = p = multmodp(p, p); 294c9083b85SXin LI 295cd882207SXin LI #ifdef W 296cd882207SXin LI /* initialize the braiding tables -- needs x2n_table[] */ 297cd882207SXin LI braid(crc_braid_table, crc_braid_big_table, N, W); 298cd882207SXin LI #endif 299c9083b85SXin LI 300c9083b85SXin LI #ifdef MAKECRCH 301c9083b85SXin LI { 302cd882207SXin LI /* 303cd882207SXin LI The crc32.h header file contains tables for both 32-bit and 64-bit 304cd882207SXin LI z_word_t's, and so requires a 64-bit type be available. In that case, 305cd882207SXin LI z_word_t must be defined to be 64-bits. This code then also generates 306cd882207SXin LI and writes out the tables for the case that z_word_t is 32 bits. 307cd882207SXin LI */ 308cd882207SXin LI #if !defined(W) || W != 8 309cd882207SXin LI # error Need a 64-bit integer type in order to generate crc32.h. 310cd882207SXin LI #endif 311c9083b85SXin LI FILE *out; 312cd882207SXin LI int k, n; 313cd882207SXin LI z_crc_t ltl[8][256]; 314cd882207SXin LI z_word_t big[8][256]; 315c9083b85SXin LI 316c9083b85SXin LI out = fopen("crc32.h", "w"); 317c9083b85SXin LI if (out == NULL) return; 318cd882207SXin LI 319cd882207SXin LI /* write out little-endian CRC table to crc32.h */ 320cd882207SXin LI fprintf(out, 321cd882207SXin LI "/* crc32.h -- tables for rapid CRC calculation\n" 322cd882207SXin LI " * Generated automatically by crc32.c\n */\n" 323cd882207SXin LI "\n" 324cd882207SXin LI "local const z_crc_t FAR crc_table[] = {\n" 325cd882207SXin LI " "); 326cd882207SXin LI write_table(out, crc_table, 256); 327cd882207SXin LI fprintf(out, 328cd882207SXin LI "};\n"); 329cd882207SXin LI 330cd882207SXin LI /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ 331cd882207SXin LI fprintf(out, 332cd882207SXin LI "\n" 333cd882207SXin LI "#ifdef W\n" 334cd882207SXin LI "\n" 335cd882207SXin LI "#if W == 8\n" 336cd882207SXin LI "\n" 337cd882207SXin LI "local const z_word_t FAR crc_big_table[] = {\n" 338cd882207SXin LI " "); 339cd882207SXin LI write_table64(out, crc_big_table, 256); 340cd882207SXin LI fprintf(out, 341cd882207SXin LI "};\n"); 342cd882207SXin LI 343cd882207SXin LI /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ 344cd882207SXin LI fprintf(out, 345cd882207SXin LI "\n" 346cd882207SXin LI "#else /* W == 4 */\n" 347cd882207SXin LI "\n" 348cd882207SXin LI "local const z_word_t FAR crc_big_table[] = {\n" 349cd882207SXin LI " "); 350cd882207SXin LI write_table32hi(out, crc_big_table, 256); 351cd882207SXin LI fprintf(out, 352cd882207SXin LI "};\n" 353cd882207SXin LI "\n" 354cd882207SXin LI "#endif\n"); 355cd882207SXin LI 356cd882207SXin LI /* write out braid tables for each value of N */ 357cd882207SXin LI for (n = 1; n <= 6; n++) { 358cd882207SXin LI fprintf(out, 359cd882207SXin LI "\n" 360cd882207SXin LI "#if N == %d\n", n); 361cd882207SXin LI 362cd882207SXin LI /* compute braid tables for this N and 64-bit word_t */ 363cd882207SXin LI braid(ltl, big, n, 8); 364cd882207SXin LI 365cd882207SXin LI /* write out braid tables for 64-bit z_word_t to crc32.h */ 366cd882207SXin LI fprintf(out, 367cd882207SXin LI "\n" 368cd882207SXin LI "#if W == 8\n" 369cd882207SXin LI "\n" 370cd882207SXin LI "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 371cd882207SXin LI for (k = 0; k < 8; k++) { 372cd882207SXin LI fprintf(out, " {"); 373cd882207SXin LI write_table(out, ltl[k], 256); 374cd882207SXin LI fprintf(out, "}%s", k < 7 ? ",\n" : ""); 375c9083b85SXin LI } 376cd882207SXin LI fprintf(out, 377cd882207SXin LI "};\n" 378cd882207SXin LI "\n" 379cd882207SXin LI "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 380cd882207SXin LI for (k = 0; k < 8; k++) { 381cd882207SXin LI fprintf(out, " {"); 382cd882207SXin LI write_table64(out, big[k], 256); 383cd882207SXin LI fprintf(out, "}%s", k < 7 ? ",\n" : ""); 384cd882207SXin LI } 385cd882207SXin LI fprintf(out, 386cd882207SXin LI "};\n"); 387cd882207SXin LI 388cd882207SXin LI /* compute braid tables for this N and 32-bit word_t */ 389cd882207SXin LI braid(ltl, big, n, 4); 390cd882207SXin LI 391cd882207SXin LI /* write out braid tables for 32-bit z_word_t to crc32.h */ 392cd882207SXin LI fprintf(out, 393cd882207SXin LI "\n" 394cd882207SXin LI "#else /* W == 4 */\n" 395cd882207SXin LI "\n" 396cd882207SXin LI "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 397cd882207SXin LI for (k = 0; k < 4; k++) { 398cd882207SXin LI fprintf(out, " {"); 399cd882207SXin LI write_table(out, ltl[k], 256); 400cd882207SXin LI fprintf(out, "}%s", k < 3 ? ",\n" : ""); 401cd882207SXin LI } 402cd882207SXin LI fprintf(out, 403cd882207SXin LI "};\n" 404cd882207SXin LI "\n" 405cd882207SXin LI "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 406cd882207SXin LI for (k = 0; k < 4; k++) { 407cd882207SXin LI fprintf(out, " {"); 408cd882207SXin LI write_table32hi(out, big[k], 256); 409cd882207SXin LI fprintf(out, "}%s", k < 3 ? ",\n" : ""); 410cd882207SXin LI } 411cd882207SXin LI fprintf(out, 412cd882207SXin LI "};\n" 413cd882207SXin LI "\n" 414cd882207SXin LI "#endif\n" 415cd882207SXin LI "\n" 416cd882207SXin LI "#endif\n"); 417cd882207SXin LI } 418cd882207SXin LI fprintf(out, 419cd882207SXin LI "\n" 420cd882207SXin LI "#endif\n"); 421cd882207SXin LI 422cd882207SXin LI /* write out zeros operator table to crc32.h */ 423cd882207SXin LI fprintf(out, 424cd882207SXin LI "\n" 425cd882207SXin LI "local const z_crc_t FAR x2n_table[] = {\n" 426cd882207SXin LI " "); 427cd882207SXin LI write_table(out, x2n_table, 32); 428cd882207SXin LI fprintf(out, 429cd882207SXin LI "};\n"); 430c9083b85SXin LI fclose(out); 431c9083b85SXin LI } 432c9083b85SXin LI #endif /* MAKECRCH */ 433c9083b85SXin LI } 434c9083b85SXin LI 435c9083b85SXin LI #ifdef MAKECRCH 436cd882207SXin LI 437cd882207SXin LI /* 438cd882207SXin LI Write the 32-bit values in table[0..k-1] to out, five per line in 439cd882207SXin LI hexadecimal separated by commas. 440cd882207SXin LI */ 441cd882207SXin LI local void write_table(out, table, k) 442c9083b85SXin LI FILE *out; 443c9083b85SXin LI const z_crc_t FAR *table; 444cd882207SXin LI int k; 445c9083b85SXin LI { 446c9083b85SXin LI int n; 447c9083b85SXin LI 448cd882207SXin LI for (n = 0; n < k; n++) 449cd882207SXin LI fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 450c9083b85SXin LI (unsigned long)(table[n]), 451cd882207SXin LI n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 452c9083b85SXin LI } 453cd882207SXin LI 454cd882207SXin LI /* 455cd882207SXin LI Write the high 32-bits of each value in table[0..k-1] to out, five per line 456cd882207SXin LI in hexadecimal separated by commas. 457cd882207SXin LI */ 458cd882207SXin LI local void write_table32hi(out, table, k) 459cd882207SXin LI FILE *out; 460cd882207SXin LI const z_word_t FAR *table; 461cd882207SXin LI int k; 462cd882207SXin LI { 463cd882207SXin LI int n; 464cd882207SXin LI 465cd882207SXin LI for (n = 0; n < k; n++) 466cd882207SXin LI fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 467cd882207SXin LI (unsigned long)(table[n] >> 32), 468cd882207SXin LI n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 469cd882207SXin LI } 470cd882207SXin LI 471cd882207SXin LI /* 472cd882207SXin LI Write the 64-bit values in table[0..k-1] to out, three per line in 473cd882207SXin LI hexadecimal separated by commas. This assumes that if there is a 64-bit 474cd882207SXin LI type, then there is also a long long integer type, and it is at least 64 475cd882207SXin LI bits. If not, then the type cast and format string can be adjusted 476cd882207SXin LI accordingly. 477cd882207SXin LI */ 478cd882207SXin LI local void write_table64(out, table, k) 479cd882207SXin LI FILE *out; 480cd882207SXin LI const z_word_t FAR *table; 481cd882207SXin LI int k; 482cd882207SXin LI { 483cd882207SXin LI int n; 484cd882207SXin LI 485cd882207SXin LI for (n = 0; n < k; n++) 486cd882207SXin LI fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", 487cd882207SXin LI (unsigned long long)(table[n]), 488cd882207SXin LI n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); 489cd882207SXin LI } 490cd882207SXin LI 491cd882207SXin LI /* Actually do the deed. */ 492cd882207SXin LI int main() 493cd882207SXin LI { 494cd882207SXin LI make_crc_table(); 495cd882207SXin LI return 0; 496cd882207SXin LI } 497cd882207SXin LI 498c9083b85SXin LI #endif /* MAKECRCH */ 499c9083b85SXin LI 500cd882207SXin LI #ifdef W 501cd882207SXin LI /* 502cd882207SXin LI Generate the little and big-endian braid tables for the given n and z_word_t 503cd882207SXin LI size w. Each array must have room for w blocks of 256 elements. 504cd882207SXin LI */ 505cd882207SXin LI local void braid(ltl, big, n, w) 506cd882207SXin LI z_crc_t ltl[][256]; 507cd882207SXin LI z_word_t big[][256]; 508cd882207SXin LI int n; 509cd882207SXin LI int w; 510cd882207SXin LI { 511cd882207SXin LI int k; 512cd882207SXin LI z_crc_t i, p, q; 513cd882207SXin LI for (k = 0; k < w; k++) { 514cd882207SXin LI p = x2nmodp((n * w + 3 - k) << 3, 0); 515cd882207SXin LI ltl[k][0] = 0; 516cd882207SXin LI big[w - 1 - k][0] = 0; 517cd882207SXin LI for (i = 1; i < 256; i++) { 518cd882207SXin LI ltl[k][i] = q = multmodp(i << 24, p); 519cd882207SXin LI big[w - 1 - k][i] = byte_swap(q); 520cd882207SXin LI } 521cd882207SXin LI } 522cd882207SXin LI } 523cd882207SXin LI #endif 524cd882207SXin LI 525c9083b85SXin LI #else /* !DYNAMIC_CRC_TABLE */ 526c9083b85SXin LI /* ======================================================================== 527cd882207SXin LI * Tables for byte-wise and braided CRC-32 calculations, and a table of powers 528cd882207SXin LI * of x for combining CRC-32s, all made by make_crc_table(). 529c9083b85SXin LI */ 530c9083b85SXin LI #include "crc32.h" 531c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */ 532c9083b85SXin LI 533cd882207SXin LI /* ======================================================================== 534cd882207SXin LI * Routines used for CRC calculation. Some are also required for the table 535cd882207SXin LI * generation above. 536cd882207SXin LI */ 537cd882207SXin LI 538cd882207SXin LI /* 539cd882207SXin LI Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, 540cd882207SXin LI reflected. For speed, this requires that a not be zero. 541cd882207SXin LI */ 542cd882207SXin LI local z_crc_t multmodp(a, b) 543cd882207SXin LI z_crc_t a; 544cd882207SXin LI z_crc_t b; 545cd882207SXin LI { 546cd882207SXin LI z_crc_t m, p; 547cd882207SXin LI 548cd882207SXin LI m = (z_crc_t)1 << 31; 549cd882207SXin LI p = 0; 550cd882207SXin LI for (;;) { 551cd882207SXin LI if (a & m) { 552cd882207SXin LI p ^= b; 553cd882207SXin LI if ((a & (m - 1)) == 0) 554cd882207SXin LI break; 555cd882207SXin LI } 556cd882207SXin LI m >>= 1; 557cd882207SXin LI b = b & 1 ? (b >> 1) ^ POLY : b >> 1; 558cd882207SXin LI } 559cd882207SXin LI return p; 560cd882207SXin LI } 561cd882207SXin LI 562cd882207SXin LI /* 563cd882207SXin LI Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been 564cd882207SXin LI initialized. 565cd882207SXin LI */ 566cd882207SXin LI local z_crc_t x2nmodp(n, k) 567cd882207SXin LI z_off64_t n; 568cd882207SXin LI unsigned k; 569cd882207SXin LI { 570cd882207SXin LI z_crc_t p; 571cd882207SXin LI 572cd882207SXin LI p = (z_crc_t)1 << 31; /* x^0 == 1 */ 573cd882207SXin LI while (n) { 574cd882207SXin LI if (n & 1) 575cd882207SXin LI p = multmodp(x2n_table[k & 31], p); 576cd882207SXin LI n >>= 1; 577cd882207SXin LI k++; 578cd882207SXin LI } 579cd882207SXin LI return p; 580cd882207SXin LI } 581cd882207SXin LI 582c9083b85SXin LI /* ========================================================================= 583cd882207SXin LI * This function can be used by asm versions of crc32(), and to force the 584cd882207SXin LI * generation of the CRC tables in a threaded application. 585c9083b85SXin LI */ 586c9083b85SXin LI const z_crc_t FAR * ZEXPORT get_crc_table() 587c9083b85SXin LI { 588c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE 589cd882207SXin LI once(&made, make_crc_table); 590c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */ 591c9083b85SXin LI return (const z_crc_t FAR *)crc_table; 592c9083b85SXin LI } 593c9083b85SXin LI 594cd882207SXin LI /* ========================================================================= 595cd882207SXin LI * Use ARM machine instructions if available. This will compute the CRC about 596cd882207SXin LI * ten times faster than the braided calculation. This code does not check for 597cd882207SXin LI * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will 598cd882207SXin LI * only be defined if the compilation specifies an ARM processor architecture 599cd882207SXin LI * that has the instructions. For example, compiling with -march=armv8.1-a or 600cd882207SXin LI * -march=armv8-a+crc, or -march=native if the compile machine has the crc32 601cd882207SXin LI * instructions. 602cd882207SXin LI */ 603cd882207SXin LI #ifdef ARMCRC32 604cd882207SXin LI 605cd882207SXin LI /* 606cd882207SXin LI Constants empirically determined to maximize speed. These values are from 607cd882207SXin LI measurements on a Cortex-A57. Your mileage may vary. 608cd882207SXin LI */ 609cd882207SXin LI #define Z_BATCH 3990 /* number of words in a batch */ 610cd882207SXin LI #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ 611cd882207SXin LI #define Z_BATCH_MIN 800 /* fewest words in a final batch */ 612cd882207SXin LI 613cd882207SXin LI unsigned long ZEXPORT crc32_z(crc, buf, len) 614cd882207SXin LI unsigned long crc; 615cd882207SXin LI const unsigned char FAR *buf; 616cd882207SXin LI z_size_t len; 617cd882207SXin LI { 618cd882207SXin LI z_crc_t val; 619cd882207SXin LI z_word_t crc1, crc2; 620cd882207SXin LI const z_word_t *word; 621cd882207SXin LI z_word_t val0, val1, val2; 622cd882207SXin LI z_size_t last, last2, i; 623cd882207SXin LI z_size_t num; 624cd882207SXin LI 625cd882207SXin LI /* Return initial CRC, if requested. */ 626cd882207SXin LI if (buf == Z_NULL) return 0; 627cd882207SXin LI 628cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE 629cd882207SXin LI once(&made, make_crc_table); 630cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */ 631cd882207SXin LI 632cd882207SXin LI /* Pre-condition the CRC */ 633*c61bc111SXin LI crc = (~crc) & 0xffffffff; 634cd882207SXin LI 635cd882207SXin LI /* Compute the CRC up to a word boundary. */ 636cd882207SXin LI while (len && ((z_size_t)buf & 7) != 0) { 637cd882207SXin LI len--; 638cd882207SXin LI val = *buf++; 639cd882207SXin LI __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 640cd882207SXin LI } 641cd882207SXin LI 642cd882207SXin LI /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ 643cd882207SXin LI word = (z_word_t const *)buf; 644cd882207SXin LI num = len >> 3; 645cd882207SXin LI len &= 7; 646cd882207SXin LI 647cd882207SXin LI /* Do three interleaved CRCs to realize the throughput of one crc32x 648cd882207SXin LI instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three 649cd882207SXin LI CRCs are combined into a single CRC after each set of batches. */ 650cd882207SXin LI while (num >= 3 * Z_BATCH) { 651cd882207SXin LI crc1 = 0; 652cd882207SXin LI crc2 = 0; 653cd882207SXin LI for (i = 0; i < Z_BATCH; i++) { 654cd882207SXin LI val0 = word[i]; 655cd882207SXin LI val1 = word[i + Z_BATCH]; 656cd882207SXin LI val2 = word[i + 2 * Z_BATCH]; 657cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 658cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 659cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 660cd882207SXin LI } 661cd882207SXin LI word += 3 * Z_BATCH; 662cd882207SXin LI num -= 3 * Z_BATCH; 663cd882207SXin LI crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; 664cd882207SXin LI crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; 665cd882207SXin LI } 666cd882207SXin LI 667cd882207SXin LI /* Do one last smaller batch with the remaining words, if there are enough 668cd882207SXin LI to pay for the combination of CRCs. */ 669cd882207SXin LI last = num / 3; 670cd882207SXin LI if (last >= Z_BATCH_MIN) { 671cd882207SXin LI last2 = last << 1; 672cd882207SXin LI crc1 = 0; 673cd882207SXin LI crc2 = 0; 674cd882207SXin LI for (i = 0; i < last; i++) { 675cd882207SXin LI val0 = word[i]; 676cd882207SXin LI val1 = word[i + last]; 677cd882207SXin LI val2 = word[i + last2]; 678cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 679cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 680cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 681cd882207SXin LI } 682cd882207SXin LI word += 3 * last; 683cd882207SXin LI num -= 3 * last; 684cd882207SXin LI val = x2nmodp(last, 6); 685cd882207SXin LI crc = multmodp(val, crc) ^ crc1; 686cd882207SXin LI crc = multmodp(val, crc) ^ crc2; 687cd882207SXin LI } 688cd882207SXin LI 689cd882207SXin LI /* Compute the CRC on any remaining words. */ 690cd882207SXin LI for (i = 0; i < num; i++) { 691cd882207SXin LI val0 = word[i]; 692cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 693cd882207SXin LI } 694cd882207SXin LI word += num; 695cd882207SXin LI 696cd882207SXin LI /* Complete the CRC on any remaining bytes. */ 697cd882207SXin LI buf = (const unsigned char FAR *)word; 698cd882207SXin LI while (len) { 699cd882207SXin LI len--; 700cd882207SXin LI val = *buf++; 701cd882207SXin LI __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 702cd882207SXin LI } 703cd882207SXin LI 704cd882207SXin LI /* Return the CRC, post-conditioned. */ 705cd882207SXin LI return crc ^ 0xffffffff; 706cd882207SXin LI } 707cd882207SXin LI 708cd882207SXin LI #else 709cd882207SXin LI 710cd882207SXin LI #ifdef W 711cd882207SXin LI 712cd882207SXin LI /* 713cd882207SXin LI Return the CRC of the W bytes in the word_t data, taking the 714cd882207SXin LI least-significant byte of the word as the first byte of data, without any pre 715cd882207SXin LI or post conditioning. This is used to combine the CRCs of each braid. 716cd882207SXin LI */ 717cd882207SXin LI local z_crc_t crc_word(data) 718cd882207SXin LI z_word_t data; 719cd882207SXin LI { 720cd882207SXin LI int k; 721cd882207SXin LI for (k = 0; k < W; k++) 722cd882207SXin LI data = (data >> 8) ^ crc_table[data & 0xff]; 723cd882207SXin LI return (z_crc_t)data; 724cd882207SXin LI } 725cd882207SXin LI 726cd882207SXin LI local z_word_t crc_word_big(data) 727cd882207SXin LI z_word_t data; 728cd882207SXin LI { 729cd882207SXin LI int k; 730cd882207SXin LI for (k = 0; k < W; k++) 731cd882207SXin LI data = (data << 8) ^ 732cd882207SXin LI crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; 733cd882207SXin LI return data; 734cd882207SXin LI } 735cd882207SXin LI 736cd882207SXin LI #endif 737c9083b85SXin LI 738c9083b85SXin LI /* ========================================================================= */ 739c9083b85SXin LI unsigned long ZEXPORT crc32_z(crc, buf, len) 740c9083b85SXin LI unsigned long crc; 741c9083b85SXin LI const unsigned char FAR *buf; 742c9083b85SXin LI z_size_t len; 743c9083b85SXin LI { 744cd882207SXin LI /* Return initial CRC, if requested. */ 745cd882207SXin LI if (buf == Z_NULL) return 0; 746c9083b85SXin LI 747c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE 748cd882207SXin LI once(&made, make_crc_table); 749c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */ 750c9083b85SXin LI 751cd882207SXin LI /* Pre-condition the CRC */ 752*c61bc111SXin LI crc = (~crc) & 0xffffffff; 753c9083b85SXin LI 754cd882207SXin LI #ifdef W 755cd882207SXin LI 756cd882207SXin LI /* If provided enough bytes, do a braided CRC calculation. */ 757cd882207SXin LI if (len >= N * W + W - 1) { 758cd882207SXin LI z_size_t blks; 759cd882207SXin LI z_word_t const *words; 760cd882207SXin LI unsigned endian; 761cd882207SXin LI int k; 762cd882207SXin LI 763cd882207SXin LI /* Compute the CRC up to a z_word_t boundary. */ 764cd882207SXin LI while (len && ((z_size_t)buf & (W - 1)) != 0) { 765cd882207SXin LI len--; 766cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 767cd882207SXin LI } 768cd882207SXin LI 769cd882207SXin LI /* Compute the CRC on as many N z_word_t blocks as are available. */ 770cd882207SXin LI blks = len / (N * W); 771cd882207SXin LI len -= blks * N * W; 772cd882207SXin LI words = (z_word_t const *)buf; 773cd882207SXin LI 774cd882207SXin LI /* Do endian check at execution time instead of compile time, since ARM 775cd882207SXin LI processors can change the endianess at execution time. If the 776cd882207SXin LI compiler knows what the endianess will be, it can optimize out the 777cd882207SXin LI check and the unused branch. */ 778c9083b85SXin LI endian = 1; 779cd882207SXin LI if (*(unsigned char *)&endian) { 780cd882207SXin LI /* Little endian. */ 781cd882207SXin LI 782cd882207SXin LI z_crc_t crc0; 783cd882207SXin LI z_word_t word0; 784cd882207SXin LI #if N > 1 785cd882207SXin LI z_crc_t crc1; 786cd882207SXin LI z_word_t word1; 787cd882207SXin LI #if N > 2 788cd882207SXin LI z_crc_t crc2; 789cd882207SXin LI z_word_t word2; 790cd882207SXin LI #if N > 3 791cd882207SXin LI z_crc_t crc3; 792cd882207SXin LI z_word_t word3; 793cd882207SXin LI #if N > 4 794cd882207SXin LI z_crc_t crc4; 795cd882207SXin LI z_word_t word4; 796cd882207SXin LI #if N > 5 797cd882207SXin LI z_crc_t crc5; 798cd882207SXin LI z_word_t word5; 799cd882207SXin LI #endif 800cd882207SXin LI #endif 801cd882207SXin LI #endif 802cd882207SXin LI #endif 803cd882207SXin LI #endif 804cd882207SXin LI 805cd882207SXin LI /* Initialize the CRC for each braid. */ 806cd882207SXin LI crc0 = crc; 807cd882207SXin LI #if N > 1 808cd882207SXin LI crc1 = 0; 809cd882207SXin LI #if N > 2 810cd882207SXin LI crc2 = 0; 811cd882207SXin LI #if N > 3 812cd882207SXin LI crc3 = 0; 813cd882207SXin LI #if N > 4 814cd882207SXin LI crc4 = 0; 815cd882207SXin LI #if N > 5 816cd882207SXin LI crc5 = 0; 817cd882207SXin LI #endif 818cd882207SXin LI #endif 819cd882207SXin LI #endif 820cd882207SXin LI #endif 821cd882207SXin LI #endif 822cd882207SXin LI 823cd882207SXin LI /* 824cd882207SXin LI Process the first blks-1 blocks, computing the CRCs on each braid 825cd882207SXin LI independently. 826cd882207SXin LI */ 827cd882207SXin LI while (--blks) { 828cd882207SXin LI /* Load the word for each braid into registers. */ 829cd882207SXin LI word0 = crc0 ^ words[0]; 830cd882207SXin LI #if N > 1 831cd882207SXin LI word1 = crc1 ^ words[1]; 832cd882207SXin LI #if N > 2 833cd882207SXin LI word2 = crc2 ^ words[2]; 834cd882207SXin LI #if N > 3 835cd882207SXin LI word3 = crc3 ^ words[3]; 836cd882207SXin LI #if N > 4 837cd882207SXin LI word4 = crc4 ^ words[4]; 838cd882207SXin LI #if N > 5 839cd882207SXin LI word5 = crc5 ^ words[5]; 840cd882207SXin LI #endif 841cd882207SXin LI #endif 842cd882207SXin LI #endif 843cd882207SXin LI #endif 844cd882207SXin LI #endif 845cd882207SXin LI words += N; 846cd882207SXin LI 847cd882207SXin LI /* Compute and update the CRC for each word. The loop should 848cd882207SXin LI get unrolled. */ 849cd882207SXin LI crc0 = crc_braid_table[0][word0 & 0xff]; 850cd882207SXin LI #if N > 1 851cd882207SXin LI crc1 = crc_braid_table[0][word1 & 0xff]; 852cd882207SXin LI #if N > 2 853cd882207SXin LI crc2 = crc_braid_table[0][word2 & 0xff]; 854cd882207SXin LI #if N > 3 855cd882207SXin LI crc3 = crc_braid_table[0][word3 & 0xff]; 856cd882207SXin LI #if N > 4 857cd882207SXin LI crc4 = crc_braid_table[0][word4 & 0xff]; 858cd882207SXin LI #if N > 5 859cd882207SXin LI crc5 = crc_braid_table[0][word5 & 0xff]; 860cd882207SXin LI #endif 861cd882207SXin LI #endif 862cd882207SXin LI #endif 863cd882207SXin LI #endif 864cd882207SXin LI #endif 865cd882207SXin LI for (k = 1; k < W; k++) { 866cd882207SXin LI crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; 867cd882207SXin LI #if N > 1 868cd882207SXin LI crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; 869cd882207SXin LI #if N > 2 870cd882207SXin LI crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; 871cd882207SXin LI #if N > 3 872cd882207SXin LI crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; 873cd882207SXin LI #if N > 4 874cd882207SXin LI crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; 875cd882207SXin LI #if N > 5 876cd882207SXin LI crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; 877cd882207SXin LI #endif 878cd882207SXin LI #endif 879cd882207SXin LI #endif 880cd882207SXin LI #endif 881cd882207SXin LI #endif 882c9083b85SXin LI } 883cd882207SXin LI } 884cd882207SXin LI 885cd882207SXin LI /* 886cd882207SXin LI Process the last block, combining the CRCs of the N braids at the 887cd882207SXin LI same time. 888cd882207SXin LI */ 889cd882207SXin LI crc = crc_word(crc0 ^ words[0]); 890cd882207SXin LI #if N > 1 891cd882207SXin LI crc = crc_word(crc1 ^ words[1] ^ crc); 892cd882207SXin LI #if N > 2 893cd882207SXin LI crc = crc_word(crc2 ^ words[2] ^ crc); 894cd882207SXin LI #if N > 3 895cd882207SXin LI crc = crc_word(crc3 ^ words[3] ^ crc); 896cd882207SXin LI #if N > 4 897cd882207SXin LI crc = crc_word(crc4 ^ words[4] ^ crc); 898cd882207SXin LI #if N > 5 899cd882207SXin LI crc = crc_word(crc5 ^ words[5] ^ crc); 900cd882207SXin LI #endif 901cd882207SXin LI #endif 902cd882207SXin LI #endif 903cd882207SXin LI #endif 904cd882207SXin LI #endif 905cd882207SXin LI words += N; 906cd882207SXin LI } 907cd882207SXin LI else { 908cd882207SXin LI /* Big endian. */ 909cd882207SXin LI 910cd882207SXin LI z_word_t crc0, word0, comb; 911cd882207SXin LI #if N > 1 912cd882207SXin LI z_word_t crc1, word1; 913cd882207SXin LI #if N > 2 914cd882207SXin LI z_word_t crc2, word2; 915cd882207SXin LI #if N > 3 916cd882207SXin LI z_word_t crc3, word3; 917cd882207SXin LI #if N > 4 918cd882207SXin LI z_word_t crc4, word4; 919cd882207SXin LI #if N > 5 920cd882207SXin LI z_word_t crc5, word5; 921cd882207SXin LI #endif 922cd882207SXin LI #endif 923cd882207SXin LI #endif 924cd882207SXin LI #endif 925cd882207SXin LI #endif 926cd882207SXin LI 927cd882207SXin LI /* Initialize the CRC for each braid. */ 928cd882207SXin LI crc0 = byte_swap(crc); 929cd882207SXin LI #if N > 1 930cd882207SXin LI crc1 = 0; 931cd882207SXin LI #if N > 2 932cd882207SXin LI crc2 = 0; 933cd882207SXin LI #if N > 3 934cd882207SXin LI crc3 = 0; 935cd882207SXin LI #if N > 4 936cd882207SXin LI crc4 = 0; 937cd882207SXin LI #if N > 5 938cd882207SXin LI crc5 = 0; 939cd882207SXin LI #endif 940cd882207SXin LI #endif 941cd882207SXin LI #endif 942cd882207SXin LI #endif 943cd882207SXin LI #endif 944cd882207SXin LI 945cd882207SXin LI /* 946cd882207SXin LI Process the first blks-1 blocks, computing the CRCs on each braid 947cd882207SXin LI independently. 948cd882207SXin LI */ 949cd882207SXin LI while (--blks) { 950cd882207SXin LI /* Load the word for each braid into registers. */ 951cd882207SXin LI word0 = crc0 ^ words[0]; 952cd882207SXin LI #if N > 1 953cd882207SXin LI word1 = crc1 ^ words[1]; 954cd882207SXin LI #if N > 2 955cd882207SXin LI word2 = crc2 ^ words[2]; 956cd882207SXin LI #if N > 3 957cd882207SXin LI word3 = crc3 ^ words[3]; 958cd882207SXin LI #if N > 4 959cd882207SXin LI word4 = crc4 ^ words[4]; 960cd882207SXin LI #if N > 5 961cd882207SXin LI word5 = crc5 ^ words[5]; 962cd882207SXin LI #endif 963cd882207SXin LI #endif 964cd882207SXin LI #endif 965cd882207SXin LI #endif 966cd882207SXin LI #endif 967cd882207SXin LI words += N; 968cd882207SXin LI 969cd882207SXin LI /* Compute and update the CRC for each word. The loop should 970cd882207SXin LI get unrolled. */ 971cd882207SXin LI crc0 = crc_braid_big_table[0][word0 & 0xff]; 972cd882207SXin LI #if N > 1 973cd882207SXin LI crc1 = crc_braid_big_table[0][word1 & 0xff]; 974cd882207SXin LI #if N > 2 975cd882207SXin LI crc2 = crc_braid_big_table[0][word2 & 0xff]; 976cd882207SXin LI #if N > 3 977cd882207SXin LI crc3 = crc_braid_big_table[0][word3 & 0xff]; 978cd882207SXin LI #if N > 4 979cd882207SXin LI crc4 = crc_braid_big_table[0][word4 & 0xff]; 980cd882207SXin LI #if N > 5 981cd882207SXin LI crc5 = crc_braid_big_table[0][word5 & 0xff]; 982cd882207SXin LI #endif 983cd882207SXin LI #endif 984cd882207SXin LI #endif 985cd882207SXin LI #endif 986cd882207SXin LI #endif 987cd882207SXin LI for (k = 1; k < W; k++) { 988cd882207SXin LI crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff]; 989cd882207SXin LI #if N > 1 990cd882207SXin LI crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff]; 991cd882207SXin LI #if N > 2 992cd882207SXin LI crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff]; 993cd882207SXin LI #if N > 3 994cd882207SXin LI crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff]; 995cd882207SXin LI #if N > 4 996cd882207SXin LI crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff]; 997cd882207SXin LI #if N > 5 998cd882207SXin LI crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff]; 999cd882207SXin LI #endif 1000cd882207SXin LI #endif 1001cd882207SXin LI #endif 1002cd882207SXin LI #endif 1003cd882207SXin LI #endif 1004cd882207SXin LI } 1005cd882207SXin LI } 1006cd882207SXin LI 1007cd882207SXin LI /* 1008cd882207SXin LI Process the last block, combining the CRCs of the N braids at the 1009cd882207SXin LI same time. 1010cd882207SXin LI */ 1011cd882207SXin LI comb = crc_word_big(crc0 ^ words[0]); 1012cd882207SXin LI #if N > 1 1013cd882207SXin LI comb = crc_word_big(crc1 ^ words[1] ^ comb); 1014cd882207SXin LI #if N > 2 1015cd882207SXin LI comb = crc_word_big(crc2 ^ words[2] ^ comb); 1016cd882207SXin LI #if N > 3 1017cd882207SXin LI comb = crc_word_big(crc3 ^ words[3] ^ comb); 1018cd882207SXin LI #if N > 4 1019cd882207SXin LI comb = crc_word_big(crc4 ^ words[4] ^ comb); 1020cd882207SXin LI #if N > 5 1021cd882207SXin LI comb = crc_word_big(crc5 ^ words[5] ^ comb); 1022cd882207SXin LI #endif 1023cd882207SXin LI #endif 1024cd882207SXin LI #endif 1025cd882207SXin LI #endif 1026cd882207SXin LI #endif 1027cd882207SXin LI words += N; 1028cd882207SXin LI crc = byte_swap(comb); 1029cd882207SXin LI } 1030cd882207SXin LI 1031cd882207SXin LI /* 1032cd882207SXin LI Update the pointer to the remaining bytes to process. 1033cd882207SXin LI */ 1034cd882207SXin LI buf = (unsigned char const *)words; 1035cd882207SXin LI } 1036cd882207SXin LI 1037cd882207SXin LI #endif /* W */ 1038cd882207SXin LI 1039cd882207SXin LI /* Complete the computation of the CRC on any remaining bytes. */ 1040c9083b85SXin LI while (len >= 8) { 1041c9083b85SXin LI len -= 8; 1042cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1043cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1044cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1045cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1046cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1047cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1048cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1049cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1050c9083b85SXin LI } 1051cd882207SXin LI while (len) { 1052cd882207SXin LI len--; 1053cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1054c9083b85SXin LI } 1055c9083b85SXin LI 1056cd882207SXin LI /* Return the CRC, post-conditioned. */ 1057cd882207SXin LI return crc ^ 0xffffffff; 1058cd882207SXin LI } 1059cd882207SXin LI 1060cd882207SXin LI #endif 1061cd882207SXin LI 1062c9083b85SXin LI /* ========================================================================= */ 1063c9083b85SXin LI unsigned long ZEXPORT crc32(crc, buf, len) 1064c9083b85SXin LI unsigned long crc; 1065c9083b85SXin LI const unsigned char FAR *buf; 1066c9083b85SXin LI uInt len; 1067c9083b85SXin LI { 1068c9083b85SXin LI return crc32_z(crc, buf, len); 1069c9083b85SXin LI } 1070c9083b85SXin LI 1071c9083b85SXin LI /* ========================================================================= */ 1072cd882207SXin LI uLong ZEXPORT crc32_combine64(crc1, crc2, len2) 1073c9083b85SXin LI uLong crc1; 1074c9083b85SXin LI uLong crc2; 1075c9083b85SXin LI z_off64_t len2; 1076c9083b85SXin LI { 1077cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE 1078cd882207SXin LI once(&made, make_crc_table); 1079cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */ 1080*c61bc111SXin LI return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff); 1081c9083b85SXin LI } 1082c9083b85SXin LI 1083c9083b85SXin LI /* ========================================================================= */ 1084c9083b85SXin LI uLong ZEXPORT crc32_combine(crc1, crc2, len2) 1085c9083b85SXin LI uLong crc1; 1086c9083b85SXin LI uLong crc2; 1087c9083b85SXin LI z_off_t len2; 1088c9083b85SXin LI { 1089cd882207SXin LI return crc32_combine64(crc1, crc2, len2); 1090c9083b85SXin LI } 1091c9083b85SXin LI 1092cd882207SXin LI /* ========================================================================= */ 1093cd882207SXin LI uLong ZEXPORT crc32_combine_gen64(len2) 1094c9083b85SXin LI z_off64_t len2; 1095c9083b85SXin LI { 1096cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE 1097cd882207SXin LI once(&made, make_crc_table); 1098cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */ 1099cd882207SXin LI return x2nmodp(len2, 3); 1100cd882207SXin LI } 1101cd882207SXin LI 1102cd882207SXin LI /* ========================================================================= */ 1103cd882207SXin LI uLong ZEXPORT crc32_combine_gen(len2) 1104cd882207SXin LI z_off_t len2; 1105cd882207SXin LI { 1106cd882207SXin LI return crc32_combine_gen64(len2); 1107cd882207SXin LI } 1108cd882207SXin LI 1109cd882207SXin LI /* ========================================================================= */ 1110cd882207SXin LI uLong crc32_combine_op(crc1, crc2, op) 1111cd882207SXin LI uLong crc1; 1112cd882207SXin LI uLong crc2; 1113cd882207SXin LI uLong op; 1114cd882207SXin LI { 1115*c61bc111SXin LI return multmodp(op, crc1) ^ (crc2 & 0xffffffff); 1116c9083b85SXin LI } 1117