xref: /freebsd/sys/contrib/zlib/crc32.c (revision 4717628ed859513a3262ea68259d0605f39de0b3)
1c9083b85SXin LI /* crc32.c -- compute the CRC-32 of a data stream
2cd882207SXin LI  * Copyright (C) 1995-2022 Mark Adler
3c9083b85SXin LI  * For conditions of distribution and use, see copyright notice in zlib.h
4c9083b85SXin LI  *
5cd882207SXin LI  * This interleaved implementation of a CRC makes use of pipelined multiple
6cd882207SXin LI  * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7cd882207SXin LI  * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8c9083b85SXin LI  */
9c9083b85SXin LI 
10c9083b85SXin LI /* @(#) $Id$ */
11c9083b85SXin LI 
12c9083b85SXin LI /*
13c9083b85SXin LI   Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14c9083b85SXin LI   protection on the static variables used to control the first-use generation
15c9083b85SXin LI   of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16c9083b85SXin LI   first call get_crc_table() to initialize the tables before allowing more than
17c9083b85SXin LI   one thread to use crc32().
18c9083b85SXin LI 
19cd882207SXin LI   MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20cd882207SXin LI   produced, so that this one source file can be compiled to an executable.
21c9083b85SXin LI  */
22c9083b85SXin LI 
23c9083b85SXin LI #ifdef MAKECRCH
24c9083b85SXin LI #  include <stdio.h>
25c9083b85SXin LI #  ifndef DYNAMIC_CRC_TABLE
26c9083b85SXin LI #    define DYNAMIC_CRC_TABLE
27c9083b85SXin LI #  endif /* !DYNAMIC_CRC_TABLE */
28c9083b85SXin LI #endif /* MAKECRCH */
29c9083b85SXin LI 
30cd882207SXin LI #include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
31c9083b85SXin LI 
32cd882207SXin LI  /*
33cd882207SXin LI   A CRC of a message is computed on N braids of words in the message, where
34cd882207SXin LI   each word consists of W bytes (4 or 8). If N is 3, for example, then three
35cd882207SXin LI   running sparse CRCs are calculated respectively on each braid, at these
36cd882207SXin LI   indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37cd882207SXin LI   This is done starting at a word boundary, and continues until as many blocks
38cd882207SXin LI   of N * W bytes as are available have been processed. The results are combined
39cd882207SXin LI   into a single CRC at the end. For this code, N must be in the range 1..6 and
40cd882207SXin LI   W must be 4 or 8. The upper limit on N can be increased if desired by adding
41cd882207SXin LI   more #if blocks, extending the patterns apparent in the code. In addition,
42cd882207SXin LI   crc32.h would need to be regenerated, if the maximum N value is increased.
43cd882207SXin LI 
44cd882207SXin LI   N and W are chosen empirically by benchmarking the execution time on a given
45cd882207SXin LI   processor. The choices for N and W below were based on testing on Intel Kaby
46cd882207SXin LI   Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47cd882207SXin LI   Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48cd882207SXin LI   with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49cd882207SXin LI   They were all tested with either gcc or clang, all using the -O3 optimization
50cd882207SXin LI   level. Your mileage may vary.
51cd882207SXin LI  */
52cd882207SXin LI 
53cd882207SXin LI /* Define N */
54cd882207SXin LI #ifdef Z_TESTN
55cd882207SXin LI #  define N Z_TESTN
56c9083b85SXin LI #else
57cd882207SXin LI #  define N 5
58cd882207SXin LI #endif
59cd882207SXin LI #if N < 1 || N > 6
60cd882207SXin LI #  error N must be in 1..6
61cd882207SXin LI #endif
62c9083b85SXin LI 
63cd882207SXin LI /*
64cd882207SXin LI   z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65cd882207SXin LI   z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66cd882207SXin LI   that bytes are eight bits.
67cd882207SXin LI  */
68c9083b85SXin LI 
69cd882207SXin LI /*
70cd882207SXin LI   Define W and the associated z_word_t type. If W is not defined, then a
71cd882207SXin LI   braided calculation is not used, and the associated tables and code are not
72cd882207SXin LI   compiled.
73cd882207SXin LI  */
74cd882207SXin LI #ifdef Z_TESTW
75cd882207SXin LI #  if Z_TESTW-1 != -1
76cd882207SXin LI #    define W Z_TESTW
77cd882207SXin LI #  endif
78cd882207SXin LI #else
79cd882207SXin LI #  ifdef MAKECRCH
80cd882207SXin LI #    define W 8         /* required for MAKECRCH */
81cd882207SXin LI #  else
82cd882207SXin LI #    if defined(__x86_64__) || defined(__aarch64__)
83cd882207SXin LI #      define W 8
84cd882207SXin LI #    else
85cd882207SXin LI #      define W 4
86cd882207SXin LI #    endif
87cd882207SXin LI #  endif
88cd882207SXin LI #endif
89cd882207SXin LI #ifdef W
90cd882207SXin LI #  if W == 8 && defined(Z_U8)
91cd882207SXin LI      typedef Z_U8 z_word_t;
92cd882207SXin LI #  elif defined(Z_U4)
93cd882207SXin LI #    undef W
94cd882207SXin LI #    define W 4
95cd882207SXin LI      typedef Z_U4 z_word_t;
96cd882207SXin LI #  else
97cd882207SXin LI #    undef W
98cd882207SXin LI #  endif
99cd882207SXin LI #endif
100cd882207SXin LI 
101e37bb444SXin LI /* If available, use the ARM processor CRC32 instruction. */
102e37bb444SXin LI #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
103e37bb444SXin LI #  define ARMCRC32
104e37bb444SXin LI #endif
105e37bb444SXin LI 
106cd882207SXin LI #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
107cd882207SXin LI /*
108cd882207SXin LI   Swap the bytes in a z_word_t to convert between little and big endian. Any
109cd882207SXin LI   self-respecting compiler will optimize this to a single machine byte-swap
110cd882207SXin LI   instruction, if one is available. This assumes that word_t is either 32 bits
111cd882207SXin LI   or 64 bits.
112cd882207SXin LI  */
byte_swap(z_word_t word)113*4717628eSXin LI local z_word_t byte_swap(z_word_t word) {
114cd882207SXin LI #  if W == 8
115cd882207SXin LI     return
116cd882207SXin LI         (word & 0xff00000000000000) >> 56 |
117cd882207SXin LI         (word & 0xff000000000000) >> 40 |
118cd882207SXin LI         (word & 0xff0000000000) >> 24 |
119cd882207SXin LI         (word & 0xff00000000) >> 8 |
120cd882207SXin LI         (word & 0xff000000) << 8 |
121cd882207SXin LI         (word & 0xff0000) << 24 |
122cd882207SXin LI         (word & 0xff00) << 40 |
123cd882207SXin LI         (word & 0xff) << 56;
124cd882207SXin LI #  else   /* W == 4 */
125cd882207SXin LI     return
126cd882207SXin LI         (word & 0xff000000) >> 24 |
127cd882207SXin LI         (word & 0xff0000) >> 8 |
128cd882207SXin LI         (word & 0xff00) << 8 |
129cd882207SXin LI         (word & 0xff) << 24;
130cd882207SXin LI #  endif
131cd882207SXin LI }
132cd882207SXin LI #endif
133cd882207SXin LI 
134*4717628eSXin LI #ifdef DYNAMIC_CRC_TABLE
135*4717628eSXin LI /* =========================================================================
136*4717628eSXin LI  * Table of powers of x for combining CRC-32s, filled in by make_crc_table()
137*4717628eSXin LI  * below.
138*4717628eSXin LI  */
139*4717628eSXin LI    local z_crc_t FAR x2n_table[32];
140*4717628eSXin LI #else
141*4717628eSXin LI /* =========================================================================
142*4717628eSXin LI  * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
143*4717628eSXin LI  * of x for combining CRC-32s, all made by make_crc_table().
144*4717628eSXin LI  */
145*4717628eSXin LI #  include "crc32.h"
146*4717628eSXin LI #endif
147*4717628eSXin LI 
148cd882207SXin LI /* CRC polynomial. */
149cd882207SXin LI #define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
150c9083b85SXin LI 
151*4717628eSXin LI /*
152*4717628eSXin LI   Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
153*4717628eSXin LI   reflected. For speed, this requires that a not be zero.
154*4717628eSXin LI  */
multmodp(z_crc_t a,z_crc_t b)155*4717628eSXin LI local z_crc_t multmodp(z_crc_t a, z_crc_t b) {
156*4717628eSXin LI     z_crc_t m, p;
157c9083b85SXin LI 
158*4717628eSXin LI     m = (z_crc_t)1 << 31;
159*4717628eSXin LI     p = 0;
160*4717628eSXin LI     for (;;) {
161*4717628eSXin LI         if (a & m) {
162*4717628eSXin LI             p ^= b;
163*4717628eSXin LI             if ((a & (m - 1)) == 0)
164*4717628eSXin LI                 break;
165*4717628eSXin LI         }
166*4717628eSXin LI         m >>= 1;
167*4717628eSXin LI         b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
168*4717628eSXin LI     }
169*4717628eSXin LI     return p;
170*4717628eSXin LI }
171*4717628eSXin LI 
172*4717628eSXin LI /*
173*4717628eSXin LI   Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
174*4717628eSXin LI   initialized.
175*4717628eSXin LI  */
x2nmodp(z_off64_t n,unsigned k)176*4717628eSXin LI local z_crc_t x2nmodp(z_off64_t n, unsigned k) {
177*4717628eSXin LI     z_crc_t p;
178*4717628eSXin LI 
179*4717628eSXin LI     p = (z_crc_t)1 << 31;           /* x^0 == 1 */
180*4717628eSXin LI     while (n) {
181*4717628eSXin LI         if (n & 1)
182*4717628eSXin LI             p = multmodp(x2n_table[k & 31], p);
183*4717628eSXin LI         n >>= 1;
184*4717628eSXin LI         k++;
185*4717628eSXin LI     }
186*4717628eSXin LI     return p;
187*4717628eSXin LI }
188*4717628eSXin LI 
189*4717628eSXin LI #ifdef DYNAMIC_CRC_TABLE
190*4717628eSXin LI /* =========================================================================
191*4717628eSXin LI  * Build the tables for byte-wise and braided CRC-32 calculations, and a table
192*4717628eSXin LI  * of powers of x for combining CRC-32s.
193*4717628eSXin LI  */
194cd882207SXin LI local z_crc_t FAR crc_table[256];
195cd882207SXin LI #ifdef W
196cd882207SXin LI    local z_word_t FAR crc_big_table[256];
197cd882207SXin LI    local z_crc_t FAR crc_braid_table[W][256];
198cd882207SXin LI    local z_word_t FAR crc_braid_big_table[W][256];
199*4717628eSXin LI    local void braid(z_crc_t [][256], z_word_t [][256], int, int);
200cd882207SXin LI #endif
201c9083b85SXin LI #ifdef MAKECRCH
202*4717628eSXin LI    local void write_table(FILE *, const z_crc_t FAR *, int);
203*4717628eSXin LI    local void write_table32hi(FILE *, const z_word_t FAR *, int);
204*4717628eSXin LI    local void write_table64(FILE *, const z_word_t FAR *, int);
205c9083b85SXin LI #endif /* MAKECRCH */
206cd882207SXin LI 
207cd882207SXin LI /*
208cd882207SXin LI   Define a once() function depending on the availability of atomics. If this is
209cd882207SXin LI   compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
210cd882207SXin LI   multiple threads, and if atomics are not available, then get_crc_table() must
211cd882207SXin LI   be called to initialize the tables and must return before any threads are
212cd882207SXin LI   allowed to compute or combine CRCs.
213cd882207SXin LI  */
214cd882207SXin LI 
215cd882207SXin LI /* Definition of once functionality. */
216cd882207SXin LI typedef struct once_s once_t;
217cd882207SXin LI 
218cd882207SXin LI /* Check for the availability of atomics. */
219cd882207SXin LI #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
220cd882207SXin LI     !defined(__STDC_NO_ATOMICS__)
221cd882207SXin LI 
222cd882207SXin LI #include <stdatomic.h>
223cd882207SXin LI 
224cd882207SXin LI /* Structure for once(), which must be initialized with ONCE_INIT. */
225cd882207SXin LI struct once_s {
226cd882207SXin LI     atomic_flag begun;
227cd882207SXin LI     atomic_int done;
228cd882207SXin LI };
229cd882207SXin LI #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
230cd882207SXin LI 
231cd882207SXin LI /*
232cd882207SXin LI   Run the provided init() function exactly once, even if multiple threads
233cd882207SXin LI   invoke once() at the same time. The state must be a once_t initialized with
234cd882207SXin LI   ONCE_INIT.
235cd882207SXin LI  */
once(once_t * state,void (* init)(void))236*4717628eSXin LI local void once(once_t *state, void (*init)(void)) {
237cd882207SXin LI     if (!atomic_load(&state->done)) {
238cd882207SXin LI         if (atomic_flag_test_and_set(&state->begun))
239cd882207SXin LI             while (!atomic_load(&state->done))
240cd882207SXin LI                 ;
241cd882207SXin LI         else {
242cd882207SXin LI             init();
243cd882207SXin LI             atomic_store(&state->done, 1);
244cd882207SXin LI         }
245cd882207SXin LI     }
246cd882207SXin LI }
247cd882207SXin LI 
248cd882207SXin LI #else   /* no atomics */
249cd882207SXin LI 
250cd882207SXin LI /* Structure for once(), which must be initialized with ONCE_INIT. */
251cd882207SXin LI struct once_s {
252cd882207SXin LI     volatile int begun;
253cd882207SXin LI     volatile int done;
254cd882207SXin LI };
255cd882207SXin LI #define ONCE_INIT {0, 0}
256cd882207SXin LI 
257cd882207SXin LI /* Test and set. Alas, not atomic, but tries to minimize the period of
258cd882207SXin LI    vulnerability. */
test_and_set(int volatile * flag)259*4717628eSXin LI local int test_and_set(int volatile *flag) {
260cd882207SXin LI     int was;
261cd882207SXin LI 
262cd882207SXin LI     was = *flag;
263cd882207SXin LI     *flag = 1;
264cd882207SXin LI     return was;
265cd882207SXin LI }
266cd882207SXin LI 
267cd882207SXin LI /* Run the provided init() function once. This is not thread-safe. */
once(once_t * state,void (* init)(void))268*4717628eSXin LI local void once(once_t *state, void (*init)(void)) {
269cd882207SXin LI     if (!state->done) {
270cd882207SXin LI         if (test_and_set(&state->begun))
271cd882207SXin LI             while (!state->done)
272cd882207SXin LI                 ;
273cd882207SXin LI         else {
274cd882207SXin LI             init();
275cd882207SXin LI             state->done = 1;
276cd882207SXin LI         }
277cd882207SXin LI     }
278cd882207SXin LI }
279cd882207SXin LI 
280cd882207SXin LI #endif
281cd882207SXin LI 
282cd882207SXin LI /* State for once(). */
283cd882207SXin LI local once_t made = ONCE_INIT;
284cd882207SXin LI 
285c9083b85SXin LI /*
286c9083b85SXin LI   Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
287c9083b85SXin LI   x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
288c9083b85SXin LI 
289c9083b85SXin LI   Polynomials over GF(2) are represented in binary, one bit per coefficient,
290c9083b85SXin LI   with the lowest powers in the most significant bit. Then adding polynomials
291c9083b85SXin LI   is just exclusive-or, and multiplying a polynomial by x is a right shift by
292c9083b85SXin LI   one. If we call the above polynomial p, and represent a byte as the
293c9083b85SXin LI   polynomial q, also with the lowest power in the most significant bit (so the
294cd882207SXin LI   byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
295c9083b85SXin LI   where a mod b means the remainder after dividing a by b.
296c9083b85SXin LI 
297c9083b85SXin LI   This calculation is done using the shift-register method of multiplying and
298c9083b85SXin LI   taking the remainder. The register is initialized to zero, and for each
299c9083b85SXin LI   incoming bit, x^32 is added mod p to the register if the bit is a one (where
300cd882207SXin LI   x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
301cd882207SXin LI   (which is shifting right by one and adding x^32 mod p if the bit shifted out
302cd882207SXin LI   is a one). We start with the highest power (least significant bit) of q and
303cd882207SXin LI   repeat for all eight bits of q.
304c9083b85SXin LI 
305cd882207SXin LI   The table is simply the CRC of all possible eight bit values. This is all the
306cd882207SXin LI   information needed to generate CRCs on data a byte at a time for all
307cd882207SXin LI   combinations of CRC register values and incoming bytes.
308c9083b85SXin LI  */
309cd882207SXin LI 
make_crc_table(void)310*4717628eSXin LI local void make_crc_table(void) {
311cd882207SXin LI     unsigned i, j, n;
312cd882207SXin LI     z_crc_t p;
313c9083b85SXin LI 
314cd882207SXin LI     /* initialize the CRC of bytes tables */
315cd882207SXin LI     for (i = 0; i < 256; i++) {
316cd882207SXin LI         p = i;
317cd882207SXin LI         for (j = 0; j < 8; j++)
318cd882207SXin LI             p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
319cd882207SXin LI         crc_table[i] = p;
320cd882207SXin LI #ifdef W
321cd882207SXin LI         crc_big_table[i] = byte_swap(p);
322cd882207SXin LI #endif
323c9083b85SXin LI     }
324c9083b85SXin LI 
325cd882207SXin LI     /* initialize the x^2^n mod p(x) table */
326cd882207SXin LI     p = (z_crc_t)1 << 30;         /* x^1 */
327cd882207SXin LI     x2n_table[0] = p;
328cd882207SXin LI     for (n = 1; n < 32; n++)
329cd882207SXin LI         x2n_table[n] = p = multmodp(p, p);
330c9083b85SXin LI 
331cd882207SXin LI #ifdef W
332cd882207SXin LI     /* initialize the braiding tables -- needs x2n_table[] */
333cd882207SXin LI     braid(crc_braid_table, crc_braid_big_table, N, W);
334cd882207SXin LI #endif
335c9083b85SXin LI 
336c9083b85SXin LI #ifdef MAKECRCH
337c9083b85SXin LI     {
338cd882207SXin LI         /*
339cd882207SXin LI           The crc32.h header file contains tables for both 32-bit and 64-bit
340cd882207SXin LI           z_word_t's, and so requires a 64-bit type be available. In that case,
341cd882207SXin LI           z_word_t must be defined to be 64-bits. This code then also generates
342cd882207SXin LI           and writes out the tables for the case that z_word_t is 32 bits.
343cd882207SXin LI          */
344cd882207SXin LI #if !defined(W) || W != 8
345cd882207SXin LI #  error Need a 64-bit integer type in order to generate crc32.h.
346cd882207SXin LI #endif
347c9083b85SXin LI         FILE *out;
348cd882207SXin LI         int k, n;
349cd882207SXin LI         z_crc_t ltl[8][256];
350cd882207SXin LI         z_word_t big[8][256];
351c9083b85SXin LI 
352c9083b85SXin LI         out = fopen("crc32.h", "w");
353c9083b85SXin LI         if (out == NULL) return;
354cd882207SXin LI 
355cd882207SXin LI         /* write out little-endian CRC table to crc32.h */
356cd882207SXin LI         fprintf(out,
357cd882207SXin LI             "/* crc32.h -- tables for rapid CRC calculation\n"
358cd882207SXin LI             " * Generated automatically by crc32.c\n */\n"
359cd882207SXin LI             "\n"
360cd882207SXin LI             "local const z_crc_t FAR crc_table[] = {\n"
361cd882207SXin LI             "    ");
362cd882207SXin LI         write_table(out, crc_table, 256);
363cd882207SXin LI         fprintf(out,
364cd882207SXin LI             "};\n");
365cd882207SXin LI 
366cd882207SXin LI         /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
367cd882207SXin LI         fprintf(out,
368cd882207SXin LI             "\n"
369cd882207SXin LI             "#ifdef W\n"
370cd882207SXin LI             "\n"
371cd882207SXin LI             "#if W == 8\n"
372cd882207SXin LI             "\n"
373cd882207SXin LI             "local const z_word_t FAR crc_big_table[] = {\n"
374cd882207SXin LI             "    ");
375cd882207SXin LI         write_table64(out, crc_big_table, 256);
376cd882207SXin LI         fprintf(out,
377cd882207SXin LI             "};\n");
378cd882207SXin LI 
379cd882207SXin LI         /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
380cd882207SXin LI         fprintf(out,
381cd882207SXin LI             "\n"
382cd882207SXin LI             "#else /* W == 4 */\n"
383cd882207SXin LI             "\n"
384cd882207SXin LI             "local const z_word_t FAR crc_big_table[] = {\n"
385cd882207SXin LI             "    ");
386cd882207SXin LI         write_table32hi(out, crc_big_table, 256);
387cd882207SXin LI         fprintf(out,
388cd882207SXin LI             "};\n"
389cd882207SXin LI             "\n"
390cd882207SXin LI             "#endif\n");
391cd882207SXin LI 
392cd882207SXin LI         /* write out braid tables for each value of N */
393cd882207SXin LI         for (n = 1; n <= 6; n++) {
394cd882207SXin LI             fprintf(out,
395cd882207SXin LI             "\n"
396cd882207SXin LI             "#if N == %d\n", n);
397cd882207SXin LI 
398cd882207SXin LI             /* compute braid tables for this N and 64-bit word_t */
399cd882207SXin LI             braid(ltl, big, n, 8);
400cd882207SXin LI 
401cd882207SXin LI             /* write out braid tables for 64-bit z_word_t to crc32.h */
402cd882207SXin LI             fprintf(out,
403cd882207SXin LI             "\n"
404cd882207SXin LI             "#if W == 8\n"
405cd882207SXin LI             "\n"
406cd882207SXin LI             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
407cd882207SXin LI             for (k = 0; k < 8; k++) {
408cd882207SXin LI                 fprintf(out, "   {");
409cd882207SXin LI                 write_table(out, ltl[k], 256);
410cd882207SXin LI                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
411c9083b85SXin LI             }
412cd882207SXin LI             fprintf(out,
413cd882207SXin LI             "};\n"
414cd882207SXin LI             "\n"
415cd882207SXin LI             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
416cd882207SXin LI             for (k = 0; k < 8; k++) {
417cd882207SXin LI                 fprintf(out, "   {");
418cd882207SXin LI                 write_table64(out, big[k], 256);
419cd882207SXin LI                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
420cd882207SXin LI             }
421cd882207SXin LI             fprintf(out,
422cd882207SXin LI             "};\n");
423cd882207SXin LI 
424cd882207SXin LI             /* compute braid tables for this N and 32-bit word_t */
425cd882207SXin LI             braid(ltl, big, n, 4);
426cd882207SXin LI 
427cd882207SXin LI             /* write out braid tables for 32-bit z_word_t to crc32.h */
428cd882207SXin LI             fprintf(out,
429cd882207SXin LI             "\n"
430cd882207SXin LI             "#else /* W == 4 */\n"
431cd882207SXin LI             "\n"
432cd882207SXin LI             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
433cd882207SXin LI             for (k = 0; k < 4; k++) {
434cd882207SXin LI                 fprintf(out, "   {");
435cd882207SXin LI                 write_table(out, ltl[k], 256);
436cd882207SXin LI                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
437cd882207SXin LI             }
438cd882207SXin LI             fprintf(out,
439cd882207SXin LI             "};\n"
440cd882207SXin LI             "\n"
441cd882207SXin LI             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
442cd882207SXin LI             for (k = 0; k < 4; k++) {
443cd882207SXin LI                 fprintf(out, "   {");
444cd882207SXin LI                 write_table32hi(out, big[k], 256);
445cd882207SXin LI                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
446cd882207SXin LI             }
447cd882207SXin LI             fprintf(out,
448cd882207SXin LI             "};\n"
449cd882207SXin LI             "\n"
450cd882207SXin LI             "#endif\n"
451cd882207SXin LI             "\n"
452cd882207SXin LI             "#endif\n");
453cd882207SXin LI         }
454cd882207SXin LI         fprintf(out,
455cd882207SXin LI             "\n"
456cd882207SXin LI             "#endif\n");
457cd882207SXin LI 
458cd882207SXin LI         /* write out zeros operator table to crc32.h */
459cd882207SXin LI         fprintf(out,
460cd882207SXin LI             "\n"
461cd882207SXin LI             "local const z_crc_t FAR x2n_table[] = {\n"
462cd882207SXin LI             "    ");
463cd882207SXin LI         write_table(out, x2n_table, 32);
464cd882207SXin LI         fprintf(out,
465cd882207SXin LI             "};\n");
466c9083b85SXin LI         fclose(out);
467c9083b85SXin LI     }
468c9083b85SXin LI #endif /* MAKECRCH */
469c9083b85SXin LI }
470c9083b85SXin LI 
471c9083b85SXin LI #ifdef MAKECRCH
472cd882207SXin LI 
473cd882207SXin LI /*
474cd882207SXin LI    Write the 32-bit values in table[0..k-1] to out, five per line in
475cd882207SXin LI    hexadecimal separated by commas.
476cd882207SXin LI  */
write_table(FILE * out,const z_crc_t FAR * table,int k)477*4717628eSXin LI local void write_table(FILE *out, const z_crc_t FAR *table, int k) {
478c9083b85SXin LI     int n;
479c9083b85SXin LI 
480cd882207SXin LI     for (n = 0; n < k; n++)
481cd882207SXin LI         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
482c9083b85SXin LI                 (unsigned long)(table[n]),
483cd882207SXin LI                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
484c9083b85SXin LI }
485cd882207SXin LI 
486cd882207SXin LI /*
487cd882207SXin LI    Write the high 32-bits of each value in table[0..k-1] to out, five per line
488cd882207SXin LI    in hexadecimal separated by commas.
489cd882207SXin LI  */
write_table32hi(FILE * out,const z_word_t FAR * table,int k)490*4717628eSXin LI local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) {
491cd882207SXin LI     int n;
492cd882207SXin LI 
493cd882207SXin LI     for (n = 0; n < k; n++)
494cd882207SXin LI         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
495cd882207SXin LI                 (unsigned long)(table[n] >> 32),
496cd882207SXin LI                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
497cd882207SXin LI }
498cd882207SXin LI 
499cd882207SXin LI /*
500cd882207SXin LI   Write the 64-bit values in table[0..k-1] to out, three per line in
501cd882207SXin LI   hexadecimal separated by commas. This assumes that if there is a 64-bit
502cd882207SXin LI   type, then there is also a long long integer type, and it is at least 64
503cd882207SXin LI   bits. If not, then the type cast and format string can be adjusted
504cd882207SXin LI   accordingly.
505cd882207SXin LI  */
write_table64(FILE * out,const z_word_t FAR * table,int k)506*4717628eSXin LI local void write_table64(FILE *out, const z_word_t FAR *table, int k) {
507cd882207SXin LI     int n;
508cd882207SXin LI 
509cd882207SXin LI     for (n = 0; n < k; n++)
510cd882207SXin LI         fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
511cd882207SXin LI                 (unsigned long long)(table[n]),
512cd882207SXin LI                 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
513cd882207SXin LI }
514cd882207SXin LI 
515cd882207SXin LI /* Actually do the deed. */
main(void)516*4717628eSXin LI int main(void) {
517cd882207SXin LI     make_crc_table();
518cd882207SXin LI     return 0;
519cd882207SXin LI }
520cd882207SXin LI 
521c9083b85SXin LI #endif /* MAKECRCH */
522c9083b85SXin LI 
523cd882207SXin LI #ifdef W
524cd882207SXin LI /*
525cd882207SXin LI   Generate the little and big-endian braid tables for the given n and z_word_t
526cd882207SXin LI   size w. Each array must have room for w blocks of 256 elements.
527cd882207SXin LI  */
braid(z_crc_t ltl[][256],z_word_t big[][256],int n,int w)528*4717628eSXin LI local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) {
529cd882207SXin LI     int k;
530cd882207SXin LI     z_crc_t i, p, q;
531cd882207SXin LI     for (k = 0; k < w; k++) {
532cd882207SXin LI         p = x2nmodp((n * w + 3 - k) << 3, 0);
533cd882207SXin LI         ltl[k][0] = 0;
534cd882207SXin LI         big[w - 1 - k][0] = 0;
535cd882207SXin LI         for (i = 1; i < 256; i++) {
536cd882207SXin LI             ltl[k][i] = q = multmodp(i << 24, p);
537cd882207SXin LI             big[w - 1 - k][i] = byte_swap(q);
538cd882207SXin LI         }
539cd882207SXin LI     }
540cd882207SXin LI }
541cd882207SXin LI #endif
542cd882207SXin LI 
543c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */
544c9083b85SXin LI 
545c9083b85SXin LI /* =========================================================================
546cd882207SXin LI  * This function can be used by asm versions of crc32(), and to force the
547cd882207SXin LI  * generation of the CRC tables in a threaded application.
548c9083b85SXin LI  */
get_crc_table(void)549*4717628eSXin LI const z_crc_t FAR * ZEXPORT get_crc_table(void) {
550c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE
551cd882207SXin LI     once(&made, make_crc_table);
552c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */
553c9083b85SXin LI     return (const z_crc_t FAR *)crc_table;
554c9083b85SXin LI }
555c9083b85SXin LI 
556cd882207SXin LI /* =========================================================================
557cd882207SXin LI  * Use ARM machine instructions if available. This will compute the CRC about
558cd882207SXin LI  * ten times faster than the braided calculation. This code does not check for
559cd882207SXin LI  * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
560cd882207SXin LI  * only be defined if the compilation specifies an ARM processor architecture
561cd882207SXin LI  * that has the instructions. For example, compiling with -march=armv8.1-a or
562cd882207SXin LI  * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
563cd882207SXin LI  * instructions.
564cd882207SXin LI  */
565cd882207SXin LI #ifdef ARMCRC32
566cd882207SXin LI 
567cd882207SXin LI /*
568cd882207SXin LI    Constants empirically determined to maximize speed. These values are from
569cd882207SXin LI    measurements on a Cortex-A57. Your mileage may vary.
570cd882207SXin LI  */
571cd882207SXin LI #define Z_BATCH 3990                /* number of words in a batch */
572cd882207SXin LI #define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
573cd882207SXin LI #define Z_BATCH_MIN 800             /* fewest words in a final batch */
574cd882207SXin LI 
crc32_z(unsigned long crc,const unsigned char FAR * buf,z_size_t len)575*4717628eSXin LI unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
576*4717628eSXin LI                               z_size_t len) {
577cd882207SXin LI     z_crc_t val;
578cd882207SXin LI     z_word_t crc1, crc2;
579cd882207SXin LI     const z_word_t *word;
580cd882207SXin LI     z_word_t val0, val1, val2;
581cd882207SXin LI     z_size_t last, last2, i;
582cd882207SXin LI     z_size_t num;
583cd882207SXin LI 
584cd882207SXin LI     /* Return initial CRC, if requested. */
585cd882207SXin LI     if (buf == Z_NULL) return 0;
586cd882207SXin LI 
587cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE
588cd882207SXin LI     once(&made, make_crc_table);
589cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */
590cd882207SXin LI 
591cd882207SXin LI     /* Pre-condition the CRC */
592c61bc111SXin LI     crc = (~crc) & 0xffffffff;
593cd882207SXin LI 
594cd882207SXin LI     /* Compute the CRC up to a word boundary. */
595cd882207SXin LI     while (len && ((z_size_t)buf & 7) != 0) {
596cd882207SXin LI         len--;
597cd882207SXin LI         val = *buf++;
598cd882207SXin LI         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
599cd882207SXin LI     }
600cd882207SXin LI 
601cd882207SXin LI     /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
602cd882207SXin LI     word = (z_word_t const *)buf;
603cd882207SXin LI     num = len >> 3;
604cd882207SXin LI     len &= 7;
605cd882207SXin LI 
606cd882207SXin LI     /* Do three interleaved CRCs to realize the throughput of one crc32x
607e37bb444SXin LI        instruction per cycle. Each CRC is calculated on Z_BATCH words. The
608e37bb444SXin LI        three CRCs are combined into a single CRC after each set of batches. */
609cd882207SXin LI     while (num >= 3 * Z_BATCH) {
610cd882207SXin LI         crc1 = 0;
611cd882207SXin LI         crc2 = 0;
612cd882207SXin LI         for (i = 0; i < Z_BATCH; i++) {
613cd882207SXin LI             val0 = word[i];
614cd882207SXin LI             val1 = word[i + Z_BATCH];
615cd882207SXin LI             val2 = word[i + 2 * Z_BATCH];
616cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
617cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
618cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
619cd882207SXin LI         }
620cd882207SXin LI         word += 3 * Z_BATCH;
621cd882207SXin LI         num -= 3 * Z_BATCH;
622cd882207SXin LI         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
623cd882207SXin LI         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
624cd882207SXin LI     }
625cd882207SXin LI 
626cd882207SXin LI     /* Do one last smaller batch with the remaining words, if there are enough
627cd882207SXin LI        to pay for the combination of CRCs. */
628cd882207SXin LI     last = num / 3;
629cd882207SXin LI     if (last >= Z_BATCH_MIN) {
630cd882207SXin LI         last2 = last << 1;
631cd882207SXin LI         crc1 = 0;
632cd882207SXin LI         crc2 = 0;
633cd882207SXin LI         for (i = 0; i < last; i++) {
634cd882207SXin LI             val0 = word[i];
635cd882207SXin LI             val1 = word[i + last];
636cd882207SXin LI             val2 = word[i + last2];
637cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
638cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
639cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
640cd882207SXin LI         }
641cd882207SXin LI         word += 3 * last;
642cd882207SXin LI         num -= 3 * last;
643cd882207SXin LI         val = x2nmodp(last, 6);
644cd882207SXin LI         crc = multmodp(val, crc) ^ crc1;
645cd882207SXin LI         crc = multmodp(val, crc) ^ crc2;
646cd882207SXin LI     }
647cd882207SXin LI 
648cd882207SXin LI     /* Compute the CRC on any remaining words. */
649cd882207SXin LI     for (i = 0; i < num; i++) {
650cd882207SXin LI         val0 = word[i];
651cd882207SXin LI         __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
652cd882207SXin LI     }
653cd882207SXin LI     word += num;
654cd882207SXin LI 
655cd882207SXin LI     /* Complete the CRC on any remaining bytes. */
656cd882207SXin LI     buf = (const unsigned char FAR *)word;
657cd882207SXin LI     while (len) {
658cd882207SXin LI         len--;
659cd882207SXin LI         val = *buf++;
660cd882207SXin LI         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
661cd882207SXin LI     }
662cd882207SXin LI 
663cd882207SXin LI     /* Return the CRC, post-conditioned. */
664cd882207SXin LI     return crc ^ 0xffffffff;
665cd882207SXin LI }
666cd882207SXin LI 
667cd882207SXin LI #else
668cd882207SXin LI 
669cd882207SXin LI #ifdef W
670cd882207SXin LI 
671cd882207SXin LI /*
672cd882207SXin LI   Return the CRC of the W bytes in the word_t data, taking the
673cd882207SXin LI   least-significant byte of the word as the first byte of data, without any pre
674cd882207SXin LI   or post conditioning. This is used to combine the CRCs of each braid.
675cd882207SXin LI  */
crc_word(z_word_t data)676*4717628eSXin LI local z_crc_t crc_word(z_word_t data) {
677cd882207SXin LI     int k;
678cd882207SXin LI     for (k = 0; k < W; k++)
679cd882207SXin LI         data = (data >> 8) ^ crc_table[data & 0xff];
680cd882207SXin LI     return (z_crc_t)data;
681cd882207SXin LI }
682cd882207SXin LI 
crc_word_big(z_word_t data)683*4717628eSXin LI local z_word_t crc_word_big(z_word_t data) {
684cd882207SXin LI     int k;
685cd882207SXin LI     for (k = 0; k < W; k++)
686cd882207SXin LI         data = (data << 8) ^
687cd882207SXin LI             crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
688cd882207SXin LI     return data;
689cd882207SXin LI }
690cd882207SXin LI 
691cd882207SXin LI #endif
692c9083b85SXin LI 
693c9083b85SXin LI /* ========================================================================= */
crc32_z(unsigned long crc,const unsigned char FAR * buf,z_size_t len)694*4717628eSXin LI unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
695*4717628eSXin LI                               z_size_t len) {
696cd882207SXin LI     /* Return initial CRC, if requested. */
697cd882207SXin LI     if (buf == Z_NULL) return 0;
698c9083b85SXin LI 
699c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE
700cd882207SXin LI     once(&made, make_crc_table);
701c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */
702c9083b85SXin LI 
703cd882207SXin LI     /* Pre-condition the CRC */
704c61bc111SXin LI     crc = (~crc) & 0xffffffff;
705c9083b85SXin LI 
706cd882207SXin LI #ifdef W
707cd882207SXin LI 
708cd882207SXin LI     /* If provided enough bytes, do a braided CRC calculation. */
709cd882207SXin LI     if (len >= N * W + W - 1) {
710cd882207SXin LI         z_size_t blks;
711cd882207SXin LI         z_word_t const *words;
712cd882207SXin LI         unsigned endian;
713cd882207SXin LI         int k;
714cd882207SXin LI 
715cd882207SXin LI         /* Compute the CRC up to a z_word_t boundary. */
716cd882207SXin LI         while (len && ((z_size_t)buf & (W - 1)) != 0) {
717cd882207SXin LI             len--;
718cd882207SXin LI             crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
719cd882207SXin LI         }
720cd882207SXin LI 
721cd882207SXin LI         /* Compute the CRC on as many N z_word_t blocks as are available. */
722cd882207SXin LI         blks = len / (N * W);
723cd882207SXin LI         len -= blks * N * W;
724cd882207SXin LI         words = (z_word_t const *)buf;
725cd882207SXin LI 
726cd882207SXin LI         /* Do endian check at execution time instead of compile time, since ARM
727*4717628eSXin LI            processors can change the endianness at execution time. If the
728*4717628eSXin LI            compiler knows what the endianness will be, it can optimize out the
729cd882207SXin LI            check and the unused branch. */
730c9083b85SXin LI         endian = 1;
731cd882207SXin LI         if (*(unsigned char *)&endian) {
732cd882207SXin LI             /* Little endian. */
733cd882207SXin LI 
734cd882207SXin LI             z_crc_t crc0;
735cd882207SXin LI             z_word_t word0;
736cd882207SXin LI #if N > 1
737cd882207SXin LI             z_crc_t crc1;
738cd882207SXin LI             z_word_t word1;
739cd882207SXin LI #if N > 2
740cd882207SXin LI             z_crc_t crc2;
741cd882207SXin LI             z_word_t word2;
742cd882207SXin LI #if N > 3
743cd882207SXin LI             z_crc_t crc3;
744cd882207SXin LI             z_word_t word3;
745cd882207SXin LI #if N > 4
746cd882207SXin LI             z_crc_t crc4;
747cd882207SXin LI             z_word_t word4;
748cd882207SXin LI #if N > 5
749cd882207SXin LI             z_crc_t crc5;
750cd882207SXin LI             z_word_t word5;
751cd882207SXin LI #endif
752cd882207SXin LI #endif
753cd882207SXin LI #endif
754cd882207SXin LI #endif
755cd882207SXin LI #endif
756cd882207SXin LI 
757cd882207SXin LI             /* Initialize the CRC for each braid. */
758cd882207SXin LI             crc0 = crc;
759cd882207SXin LI #if N > 1
760cd882207SXin LI             crc1 = 0;
761cd882207SXin LI #if N > 2
762cd882207SXin LI             crc2 = 0;
763cd882207SXin LI #if N > 3
764cd882207SXin LI             crc3 = 0;
765cd882207SXin LI #if N > 4
766cd882207SXin LI             crc4 = 0;
767cd882207SXin LI #if N > 5
768cd882207SXin LI             crc5 = 0;
769cd882207SXin LI #endif
770cd882207SXin LI #endif
771cd882207SXin LI #endif
772cd882207SXin LI #endif
773cd882207SXin LI #endif
774cd882207SXin LI 
775cd882207SXin LI             /*
776cd882207SXin LI               Process the first blks-1 blocks, computing the CRCs on each braid
777cd882207SXin LI               independently.
778cd882207SXin LI              */
779cd882207SXin LI             while (--blks) {
780cd882207SXin LI                 /* Load the word for each braid into registers. */
781cd882207SXin LI                 word0 = crc0 ^ words[0];
782cd882207SXin LI #if N > 1
783cd882207SXin LI                 word1 = crc1 ^ words[1];
784cd882207SXin LI #if N > 2
785cd882207SXin LI                 word2 = crc2 ^ words[2];
786cd882207SXin LI #if N > 3
787cd882207SXin LI                 word3 = crc3 ^ words[3];
788cd882207SXin LI #if N > 4
789cd882207SXin LI                 word4 = crc4 ^ words[4];
790cd882207SXin LI #if N > 5
791cd882207SXin LI                 word5 = crc5 ^ words[5];
792cd882207SXin LI #endif
793cd882207SXin LI #endif
794cd882207SXin LI #endif
795cd882207SXin LI #endif
796cd882207SXin LI #endif
797cd882207SXin LI                 words += N;
798cd882207SXin LI 
799cd882207SXin LI                 /* Compute and update the CRC for each word. The loop should
800cd882207SXin LI                    get unrolled. */
801cd882207SXin LI                 crc0 = crc_braid_table[0][word0 & 0xff];
802cd882207SXin LI #if N > 1
803cd882207SXin LI                 crc1 = crc_braid_table[0][word1 & 0xff];
804cd882207SXin LI #if N > 2
805cd882207SXin LI                 crc2 = crc_braid_table[0][word2 & 0xff];
806cd882207SXin LI #if N > 3
807cd882207SXin LI                 crc3 = crc_braid_table[0][word3 & 0xff];
808cd882207SXin LI #if N > 4
809cd882207SXin LI                 crc4 = crc_braid_table[0][word4 & 0xff];
810cd882207SXin LI #if N > 5
811cd882207SXin LI                 crc5 = crc_braid_table[0][word5 & 0xff];
812cd882207SXin LI #endif
813cd882207SXin LI #endif
814cd882207SXin LI #endif
815cd882207SXin LI #endif
816cd882207SXin LI #endif
817cd882207SXin LI                 for (k = 1; k < W; k++) {
818cd882207SXin LI                     crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
819cd882207SXin LI #if N > 1
820cd882207SXin LI                     crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
821cd882207SXin LI #if N > 2
822cd882207SXin LI                     crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
823cd882207SXin LI #if N > 3
824cd882207SXin LI                     crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
825cd882207SXin LI #if N > 4
826cd882207SXin LI                     crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
827cd882207SXin LI #if N > 5
828cd882207SXin LI                     crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
829cd882207SXin LI #endif
830cd882207SXin LI #endif
831cd882207SXin LI #endif
832cd882207SXin LI #endif
833cd882207SXin LI #endif
834c9083b85SXin LI                 }
835cd882207SXin LI             }
836cd882207SXin LI 
837cd882207SXin LI             /*
838cd882207SXin LI               Process the last block, combining the CRCs of the N braids at the
839cd882207SXin LI               same time.
840cd882207SXin LI              */
841cd882207SXin LI             crc = crc_word(crc0 ^ words[0]);
842cd882207SXin LI #if N > 1
843cd882207SXin LI             crc = crc_word(crc1 ^ words[1] ^ crc);
844cd882207SXin LI #if N > 2
845cd882207SXin LI             crc = crc_word(crc2 ^ words[2] ^ crc);
846cd882207SXin LI #if N > 3
847cd882207SXin LI             crc = crc_word(crc3 ^ words[3] ^ crc);
848cd882207SXin LI #if N > 4
849cd882207SXin LI             crc = crc_word(crc4 ^ words[4] ^ crc);
850cd882207SXin LI #if N > 5
851cd882207SXin LI             crc = crc_word(crc5 ^ words[5] ^ crc);
852cd882207SXin LI #endif
853cd882207SXin LI #endif
854cd882207SXin LI #endif
855cd882207SXin LI #endif
856cd882207SXin LI #endif
857cd882207SXin LI             words += N;
858cd882207SXin LI         }
859cd882207SXin LI         else {
860cd882207SXin LI             /* Big endian. */
861cd882207SXin LI 
862cd882207SXin LI             z_word_t crc0, word0, comb;
863cd882207SXin LI #if N > 1
864cd882207SXin LI             z_word_t crc1, word1;
865cd882207SXin LI #if N > 2
866cd882207SXin LI             z_word_t crc2, word2;
867cd882207SXin LI #if N > 3
868cd882207SXin LI             z_word_t crc3, word3;
869cd882207SXin LI #if N > 4
870cd882207SXin LI             z_word_t crc4, word4;
871cd882207SXin LI #if N > 5
872cd882207SXin LI             z_word_t crc5, word5;
873cd882207SXin LI #endif
874cd882207SXin LI #endif
875cd882207SXin LI #endif
876cd882207SXin LI #endif
877cd882207SXin LI #endif
878cd882207SXin LI 
879cd882207SXin LI             /* Initialize the CRC for each braid. */
880cd882207SXin LI             crc0 = byte_swap(crc);
881cd882207SXin LI #if N > 1
882cd882207SXin LI             crc1 = 0;
883cd882207SXin LI #if N > 2
884cd882207SXin LI             crc2 = 0;
885cd882207SXin LI #if N > 3
886cd882207SXin LI             crc3 = 0;
887cd882207SXin LI #if N > 4
888cd882207SXin LI             crc4 = 0;
889cd882207SXin LI #if N > 5
890cd882207SXin LI             crc5 = 0;
891cd882207SXin LI #endif
892cd882207SXin LI #endif
893cd882207SXin LI #endif
894cd882207SXin LI #endif
895cd882207SXin LI #endif
896cd882207SXin LI 
897cd882207SXin LI             /*
898cd882207SXin LI               Process the first blks-1 blocks, computing the CRCs on each braid
899cd882207SXin LI               independently.
900cd882207SXin LI              */
901cd882207SXin LI             while (--blks) {
902cd882207SXin LI                 /* Load the word for each braid into registers. */
903cd882207SXin LI                 word0 = crc0 ^ words[0];
904cd882207SXin LI #if N > 1
905cd882207SXin LI                 word1 = crc1 ^ words[1];
906cd882207SXin LI #if N > 2
907cd882207SXin LI                 word2 = crc2 ^ words[2];
908cd882207SXin LI #if N > 3
909cd882207SXin LI                 word3 = crc3 ^ words[3];
910cd882207SXin LI #if N > 4
911cd882207SXin LI                 word4 = crc4 ^ words[4];
912cd882207SXin LI #if N > 5
913cd882207SXin LI                 word5 = crc5 ^ words[5];
914cd882207SXin LI #endif
915cd882207SXin LI #endif
916cd882207SXin LI #endif
917cd882207SXin LI #endif
918cd882207SXin LI #endif
919cd882207SXin LI                 words += N;
920cd882207SXin LI 
921cd882207SXin LI                 /* Compute and update the CRC for each word. The loop should
922cd882207SXin LI                    get unrolled. */
923cd882207SXin LI                 crc0 = crc_braid_big_table[0][word0 & 0xff];
924cd882207SXin LI #if N > 1
925cd882207SXin LI                 crc1 = crc_braid_big_table[0][word1 & 0xff];
926cd882207SXin LI #if N > 2
927cd882207SXin LI                 crc2 = crc_braid_big_table[0][word2 & 0xff];
928cd882207SXin LI #if N > 3
929cd882207SXin LI                 crc3 = crc_braid_big_table[0][word3 & 0xff];
930cd882207SXin LI #if N > 4
931cd882207SXin LI                 crc4 = crc_braid_big_table[0][word4 & 0xff];
932cd882207SXin LI #if N > 5
933cd882207SXin LI                 crc5 = crc_braid_big_table[0][word5 & 0xff];
934cd882207SXin LI #endif
935cd882207SXin LI #endif
936cd882207SXin LI #endif
937cd882207SXin LI #endif
938cd882207SXin LI #endif
939cd882207SXin LI                 for (k = 1; k < W; k++) {
940cd882207SXin LI                     crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
941cd882207SXin LI #if N > 1
942cd882207SXin LI                     crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
943cd882207SXin LI #if N > 2
944cd882207SXin LI                     crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
945cd882207SXin LI #if N > 3
946cd882207SXin LI                     crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
947cd882207SXin LI #if N > 4
948cd882207SXin LI                     crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
949cd882207SXin LI #if N > 5
950cd882207SXin LI                     crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
951cd882207SXin LI #endif
952cd882207SXin LI #endif
953cd882207SXin LI #endif
954cd882207SXin LI #endif
955cd882207SXin LI #endif
956cd882207SXin LI                 }
957cd882207SXin LI             }
958cd882207SXin LI 
959cd882207SXin LI             /*
960cd882207SXin LI               Process the last block, combining the CRCs of the N braids at the
961cd882207SXin LI               same time.
962cd882207SXin LI              */
963cd882207SXin LI             comb = crc_word_big(crc0 ^ words[0]);
964cd882207SXin LI #if N > 1
965cd882207SXin LI             comb = crc_word_big(crc1 ^ words[1] ^ comb);
966cd882207SXin LI #if N > 2
967cd882207SXin LI             comb = crc_word_big(crc2 ^ words[2] ^ comb);
968cd882207SXin LI #if N > 3
969cd882207SXin LI             comb = crc_word_big(crc3 ^ words[3] ^ comb);
970cd882207SXin LI #if N > 4
971cd882207SXin LI             comb = crc_word_big(crc4 ^ words[4] ^ comb);
972cd882207SXin LI #if N > 5
973cd882207SXin LI             comb = crc_word_big(crc5 ^ words[5] ^ comb);
974cd882207SXin LI #endif
975cd882207SXin LI #endif
976cd882207SXin LI #endif
977cd882207SXin LI #endif
978cd882207SXin LI #endif
979cd882207SXin LI             words += N;
980cd882207SXin LI             crc = byte_swap(comb);
981cd882207SXin LI         }
982cd882207SXin LI 
983cd882207SXin LI         /*
984cd882207SXin LI           Update the pointer to the remaining bytes to process.
985cd882207SXin LI          */
986cd882207SXin LI         buf = (unsigned char const *)words;
987cd882207SXin LI     }
988cd882207SXin LI 
989cd882207SXin LI #endif /* W */
990cd882207SXin LI 
991cd882207SXin LI     /* Complete the computation of the CRC on any remaining bytes. */
992c9083b85SXin LI     while (len >= 8) {
993c9083b85SXin LI         len -= 8;
994cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
995cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
996cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
997cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
998cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
999cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1000cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1001cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1002c9083b85SXin LI     }
1003cd882207SXin LI     while (len) {
1004cd882207SXin LI         len--;
1005cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1006c9083b85SXin LI     }
1007c9083b85SXin LI 
1008cd882207SXin LI     /* Return the CRC, post-conditioned. */
1009cd882207SXin LI     return crc ^ 0xffffffff;
1010cd882207SXin LI }
1011cd882207SXin LI 
1012cd882207SXin LI #endif
1013cd882207SXin LI 
1014c9083b85SXin LI /* ========================================================================= */
crc32(unsigned long crc,const unsigned char FAR * buf,uInt len)1015*4717628eSXin LI unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf,
1016*4717628eSXin LI                             uInt len) {
1017c9083b85SXin LI     return crc32_z(crc, buf, len);
1018c9083b85SXin LI }
1019c9083b85SXin LI 
1020c9083b85SXin LI /* ========================================================================= */
crc32_combine64(uLong crc1,uLong crc2,z_off64_t len2)1021*4717628eSXin LI uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) {
1022cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE
1023cd882207SXin LI     once(&made, make_crc_table);
1024cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */
1025c61bc111SXin LI     return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1026c9083b85SXin LI }
1027c9083b85SXin LI 
1028c9083b85SXin LI /* ========================================================================= */
crc32_combine(uLong crc1,uLong crc2,z_off_t len2)1029*4717628eSXin LI uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) {
1030e37bb444SXin LI     return crc32_combine64(crc1, crc2, (z_off64_t)len2);
1031c9083b85SXin LI }
1032c9083b85SXin LI 
1033cd882207SXin LI /* ========================================================================= */
crc32_combine_gen64(z_off64_t len2)1034*4717628eSXin LI uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) {
1035cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE
1036cd882207SXin LI     once(&made, make_crc_table);
1037cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */
1038cd882207SXin LI     return x2nmodp(len2, 3);
1039cd882207SXin LI }
1040cd882207SXin LI 
1041cd882207SXin LI /* ========================================================================= */
crc32_combine_gen(z_off_t len2)1042*4717628eSXin LI uLong ZEXPORT crc32_combine_gen(z_off_t len2) {
1043e37bb444SXin LI     return crc32_combine_gen64((z_off64_t)len2);
1044cd882207SXin LI }
1045cd882207SXin LI 
1046cd882207SXin LI /* ========================================================================= */
crc32_combine_op(uLong crc1,uLong crc2,uLong op)1047*4717628eSXin LI uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) {
1048c61bc111SXin LI     return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
1049c9083b85SXin LI }
1050