xref: /freebsd/sys/contrib/zlib/crc32.c (revision e37bb444aa945ed0725766e986698a09bd61b1b2)
1c9083b85SXin LI /* crc32.c -- compute the CRC-32 of a data stream
2cd882207SXin LI  * Copyright (C) 1995-2022 Mark Adler
3c9083b85SXin LI  * For conditions of distribution and use, see copyright notice in zlib.h
4c9083b85SXin LI  *
5cd882207SXin LI  * This interleaved implementation of a CRC makes use of pipelined multiple
6cd882207SXin LI  * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7cd882207SXin LI  * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8c9083b85SXin LI  */
9c9083b85SXin LI 
10c9083b85SXin LI /* @(#) $Id$ */
11c9083b85SXin LI 
12c9083b85SXin LI /*
13c9083b85SXin LI   Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14c9083b85SXin LI   protection on the static variables used to control the first-use generation
15c9083b85SXin LI   of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16c9083b85SXin LI   first call get_crc_table() to initialize the tables before allowing more than
17c9083b85SXin LI   one thread to use crc32().
18c9083b85SXin LI 
19cd882207SXin LI   MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20cd882207SXin LI   produced, so that this one source file can be compiled to an executable.
21c9083b85SXin LI  */
22c9083b85SXin LI 
23c9083b85SXin LI #ifdef MAKECRCH
24c9083b85SXin LI #  include <stdio.h>
25c9083b85SXin LI #  ifndef DYNAMIC_CRC_TABLE
26c9083b85SXin LI #    define DYNAMIC_CRC_TABLE
27c9083b85SXin LI #  endif /* !DYNAMIC_CRC_TABLE */
28c9083b85SXin LI #endif /* MAKECRCH */
29c9083b85SXin LI 
30cd882207SXin LI #include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
31c9083b85SXin LI 
32cd882207SXin LI  /*
33cd882207SXin LI   A CRC of a message is computed on N braids of words in the message, where
34cd882207SXin LI   each word consists of W bytes (4 or 8). If N is 3, for example, then three
35cd882207SXin LI   running sparse CRCs are calculated respectively on each braid, at these
36cd882207SXin LI   indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37cd882207SXin LI   This is done starting at a word boundary, and continues until as many blocks
38cd882207SXin LI   of N * W bytes as are available have been processed. The results are combined
39cd882207SXin LI   into a single CRC at the end. For this code, N must be in the range 1..6 and
40cd882207SXin LI   W must be 4 or 8. The upper limit on N can be increased if desired by adding
41cd882207SXin LI   more #if blocks, extending the patterns apparent in the code. In addition,
42cd882207SXin LI   crc32.h would need to be regenerated, if the maximum N value is increased.
43cd882207SXin LI 
44cd882207SXin LI   N and W are chosen empirically by benchmarking the execution time on a given
45cd882207SXin LI   processor. The choices for N and W below were based on testing on Intel Kaby
46cd882207SXin LI   Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47cd882207SXin LI   Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48cd882207SXin LI   with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49cd882207SXin LI   They were all tested with either gcc or clang, all using the -O3 optimization
50cd882207SXin LI   level. Your mileage may vary.
51cd882207SXin LI  */
52cd882207SXin LI 
53cd882207SXin LI /* Define N */
54cd882207SXin LI #ifdef Z_TESTN
55cd882207SXin LI #  define N Z_TESTN
56c9083b85SXin LI #else
57cd882207SXin LI #  define N 5
58cd882207SXin LI #endif
59cd882207SXin LI #if N < 1 || N > 6
60cd882207SXin LI #  error N must be in 1..6
61cd882207SXin LI #endif
62c9083b85SXin LI 
63cd882207SXin LI /*
64cd882207SXin LI   z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65cd882207SXin LI   z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66cd882207SXin LI   that bytes are eight bits.
67cd882207SXin LI  */
68c9083b85SXin LI 
69cd882207SXin LI /*
70cd882207SXin LI   Define W and the associated z_word_t type. If W is not defined, then a
71cd882207SXin LI   braided calculation is not used, and the associated tables and code are not
72cd882207SXin LI   compiled.
73cd882207SXin LI  */
74cd882207SXin LI #ifdef Z_TESTW
75cd882207SXin LI #  if Z_TESTW-1 != -1
76cd882207SXin LI #    define W Z_TESTW
77cd882207SXin LI #  endif
78cd882207SXin LI #else
79cd882207SXin LI #  ifdef MAKECRCH
80cd882207SXin LI #    define W 8         /* required for MAKECRCH */
81cd882207SXin LI #  else
82cd882207SXin LI #    if defined(__x86_64__) || defined(__aarch64__)
83cd882207SXin LI #      define W 8
84cd882207SXin LI #    else
85cd882207SXin LI #      define W 4
86cd882207SXin LI #    endif
87cd882207SXin LI #  endif
88cd882207SXin LI #endif
89cd882207SXin LI #ifdef W
90cd882207SXin LI #  if W == 8 && defined(Z_U8)
91cd882207SXin LI      typedef Z_U8 z_word_t;
92cd882207SXin LI #  elif defined(Z_U4)
93cd882207SXin LI #    undef W
94cd882207SXin LI #    define W 4
95cd882207SXin LI      typedef Z_U4 z_word_t;
96cd882207SXin LI #  else
97cd882207SXin LI #    undef W
98cd882207SXin LI #  endif
99cd882207SXin LI #endif
100cd882207SXin LI 
101*e37bb444SXin LI /* If available, use the ARM processor CRC32 instruction. */
102*e37bb444SXin LI #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
103*e37bb444SXin LI #  define ARMCRC32
104*e37bb444SXin LI #endif
105*e37bb444SXin LI 
106cd882207SXin LI /* Local functions. */
107cd882207SXin LI local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
108cd882207SXin LI local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
109cd882207SXin LI 
110*e37bb444SXin LI #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
111*e37bb444SXin LI     local z_word_t byte_swap OF((z_word_t word));
112*e37bb444SXin LI #endif
113*e37bb444SXin LI 
114*e37bb444SXin LI #if defined(W) && !defined(ARMCRC32)
115*e37bb444SXin LI     local z_crc_t crc_word OF((z_word_t data));
116*e37bb444SXin LI     local z_word_t crc_word_big OF((z_word_t data));
117cd882207SXin LI #endif
118cd882207SXin LI 
119cd882207SXin LI #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
120cd882207SXin LI /*
121cd882207SXin LI   Swap the bytes in a z_word_t to convert between little and big endian. Any
122cd882207SXin LI   self-respecting compiler will optimize this to a single machine byte-swap
123cd882207SXin LI   instruction, if one is available. This assumes that word_t is either 32 bits
124cd882207SXin LI   or 64 bits.
125cd882207SXin LI  */
1267550e242SJohn Baldwin local z_word_t byte_swap OF((z_word_t word));
1277550e242SJohn Baldwin 
128cd882207SXin LI local z_word_t byte_swap(word)
129cd882207SXin LI     z_word_t word;
130cd882207SXin LI {
131cd882207SXin LI #  if W == 8
132cd882207SXin LI     return
133cd882207SXin LI         (word & 0xff00000000000000) >> 56 |
134cd882207SXin LI         (word & 0xff000000000000) >> 40 |
135cd882207SXin LI         (word & 0xff0000000000) >> 24 |
136cd882207SXin LI         (word & 0xff00000000) >> 8 |
137cd882207SXin LI         (word & 0xff000000) << 8 |
138cd882207SXin LI         (word & 0xff0000) << 24 |
139cd882207SXin LI         (word & 0xff00) << 40 |
140cd882207SXin LI         (word & 0xff) << 56;
141cd882207SXin LI #  else   /* W == 4 */
142cd882207SXin LI     return
143cd882207SXin LI         (word & 0xff000000) >> 24 |
144cd882207SXin LI         (word & 0xff0000) >> 8 |
145cd882207SXin LI         (word & 0xff00) << 8 |
146cd882207SXin LI         (word & 0xff) << 24;
147cd882207SXin LI #  endif
148cd882207SXin LI }
149cd882207SXin LI #endif
150cd882207SXin LI 
151cd882207SXin LI /* CRC polynomial. */
152cd882207SXin LI #define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
153c9083b85SXin LI 
154c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE
155c9083b85SXin LI 
156cd882207SXin LI local z_crc_t FAR crc_table[256];
157cd882207SXin LI local z_crc_t FAR x2n_table[32];
158c9083b85SXin LI local void make_crc_table OF((void));
159cd882207SXin LI #ifdef W
160cd882207SXin LI    local z_word_t FAR crc_big_table[256];
161cd882207SXin LI    local z_crc_t FAR crc_braid_table[W][256];
162cd882207SXin LI    local z_word_t FAR crc_braid_big_table[W][256];
163cd882207SXin LI    local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
164cd882207SXin LI #endif
165c9083b85SXin LI #ifdef MAKECRCH
166cd882207SXin LI    local void write_table OF((FILE *, const z_crc_t FAR *, int));
167cd882207SXin LI    local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
168cd882207SXin LI    local void write_table64 OF((FILE *, const z_word_t FAR *, int));
169c9083b85SXin LI #endif /* MAKECRCH */
170cd882207SXin LI 
171cd882207SXin LI /*
172cd882207SXin LI   Define a once() function depending on the availability of atomics. If this is
173cd882207SXin LI   compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
174cd882207SXin LI   multiple threads, and if atomics are not available, then get_crc_table() must
175cd882207SXin LI   be called to initialize the tables and must return before any threads are
176cd882207SXin LI   allowed to compute or combine CRCs.
177cd882207SXin LI  */
178cd882207SXin LI 
179cd882207SXin LI /* Definition of once functionality. */
180cd882207SXin LI typedef struct once_s once_t;
181cd882207SXin LI local void once OF((once_t *, void (*)(void)));
182cd882207SXin LI 
183cd882207SXin LI /* Check for the availability of atomics. */
184cd882207SXin LI #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
185cd882207SXin LI     !defined(__STDC_NO_ATOMICS__)
186cd882207SXin LI 
187cd882207SXin LI #include <stdatomic.h>
188cd882207SXin LI 
189cd882207SXin LI /* Structure for once(), which must be initialized with ONCE_INIT. */
190cd882207SXin LI struct once_s {
191cd882207SXin LI     atomic_flag begun;
192cd882207SXin LI     atomic_int done;
193cd882207SXin LI };
194cd882207SXin LI #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
195cd882207SXin LI 
196cd882207SXin LI /*
197cd882207SXin LI   Run the provided init() function exactly once, even if multiple threads
198cd882207SXin LI   invoke once() at the same time. The state must be a once_t initialized with
199cd882207SXin LI   ONCE_INIT.
200cd882207SXin LI  */
201cd882207SXin LI local void once(state, init)
202cd882207SXin LI     once_t *state;
203cd882207SXin LI     void (*init)(void);
204cd882207SXin LI {
205cd882207SXin LI     if (!atomic_load(&state->done)) {
206cd882207SXin LI         if (atomic_flag_test_and_set(&state->begun))
207cd882207SXin LI             while (!atomic_load(&state->done))
208cd882207SXin LI                 ;
209cd882207SXin LI         else {
210cd882207SXin LI             init();
211cd882207SXin LI             atomic_store(&state->done, 1);
212cd882207SXin LI         }
213cd882207SXin LI     }
214cd882207SXin LI }
215cd882207SXin LI 
216cd882207SXin LI #else   /* no atomics */
217cd882207SXin LI 
218cd882207SXin LI /* Structure for once(), which must be initialized with ONCE_INIT. */
219cd882207SXin LI struct once_s {
220cd882207SXin LI     volatile int begun;
221cd882207SXin LI     volatile int done;
222cd882207SXin LI };
223cd882207SXin LI #define ONCE_INIT {0, 0}
224cd882207SXin LI 
225cd882207SXin LI /* Test and set. Alas, not atomic, but tries to minimize the period of
226cd882207SXin LI    vulnerability. */
227cd882207SXin LI local int test_and_set OF((int volatile *));
228cd882207SXin LI local int test_and_set(flag)
229cd882207SXin LI     int volatile *flag;
230cd882207SXin LI {
231cd882207SXin LI     int was;
232cd882207SXin LI 
233cd882207SXin LI     was = *flag;
234cd882207SXin LI     *flag = 1;
235cd882207SXin LI     return was;
236cd882207SXin LI }
237cd882207SXin LI 
238cd882207SXin LI /* Run the provided init() function once. This is not thread-safe. */
239cd882207SXin LI local void once(state, init)
240cd882207SXin LI     once_t *state;
241cd882207SXin LI     void (*init)(void);
242cd882207SXin LI {
243cd882207SXin LI     if (!state->done) {
244cd882207SXin LI         if (test_and_set(&state->begun))
245cd882207SXin LI             while (!state->done)
246cd882207SXin LI                 ;
247cd882207SXin LI         else {
248cd882207SXin LI             init();
249cd882207SXin LI             state->done = 1;
250cd882207SXin LI         }
251cd882207SXin LI     }
252cd882207SXin LI }
253cd882207SXin LI 
254cd882207SXin LI #endif
255cd882207SXin LI 
256cd882207SXin LI /* State for once(). */
257cd882207SXin LI local once_t made = ONCE_INIT;
258cd882207SXin LI 
259c9083b85SXin LI /*
260c9083b85SXin LI   Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
261c9083b85SXin LI   x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
262c9083b85SXin LI 
263c9083b85SXin LI   Polynomials over GF(2) are represented in binary, one bit per coefficient,
264c9083b85SXin LI   with the lowest powers in the most significant bit. Then adding polynomials
265c9083b85SXin LI   is just exclusive-or, and multiplying a polynomial by x is a right shift by
266c9083b85SXin LI   one. If we call the above polynomial p, and represent a byte as the
267c9083b85SXin LI   polynomial q, also with the lowest power in the most significant bit (so the
268cd882207SXin LI   byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
269c9083b85SXin LI   where a mod b means the remainder after dividing a by b.
270c9083b85SXin LI 
271c9083b85SXin LI   This calculation is done using the shift-register method of multiplying and
272c9083b85SXin LI   taking the remainder. The register is initialized to zero, and for each
273c9083b85SXin LI   incoming bit, x^32 is added mod p to the register if the bit is a one (where
274cd882207SXin LI   x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
275cd882207SXin LI   (which is shifting right by one and adding x^32 mod p if the bit shifted out
276cd882207SXin LI   is a one). We start with the highest power (least significant bit) of q and
277cd882207SXin LI   repeat for all eight bits of q.
278c9083b85SXin LI 
279cd882207SXin LI   The table is simply the CRC of all possible eight bit values. This is all the
280cd882207SXin LI   information needed to generate CRCs on data a byte at a time for all
281cd882207SXin LI   combinations of CRC register values and incoming bytes.
282c9083b85SXin LI  */
283cd882207SXin LI 
284c9083b85SXin LI local void make_crc_table()
285c9083b85SXin LI {
286cd882207SXin LI     unsigned i, j, n;
287cd882207SXin LI     z_crc_t p;
288c9083b85SXin LI 
289cd882207SXin LI     /* initialize the CRC of bytes tables */
290cd882207SXin LI     for (i = 0; i < 256; i++) {
291cd882207SXin LI         p = i;
292cd882207SXin LI         for (j = 0; j < 8; j++)
293cd882207SXin LI             p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
294cd882207SXin LI         crc_table[i] = p;
295cd882207SXin LI #ifdef W
296cd882207SXin LI         crc_big_table[i] = byte_swap(p);
297cd882207SXin LI #endif
298c9083b85SXin LI     }
299c9083b85SXin LI 
300cd882207SXin LI     /* initialize the x^2^n mod p(x) table */
301cd882207SXin LI     p = (z_crc_t)1 << 30;         /* x^1 */
302cd882207SXin LI     x2n_table[0] = p;
303cd882207SXin LI     for (n = 1; n < 32; n++)
304cd882207SXin LI         x2n_table[n] = p = multmodp(p, p);
305c9083b85SXin LI 
306cd882207SXin LI #ifdef W
307cd882207SXin LI     /* initialize the braiding tables -- needs x2n_table[] */
308cd882207SXin LI     braid(crc_braid_table, crc_braid_big_table, N, W);
309cd882207SXin LI #endif
310c9083b85SXin LI 
311c9083b85SXin LI #ifdef MAKECRCH
312c9083b85SXin LI     {
313cd882207SXin LI         /*
314cd882207SXin LI           The crc32.h header file contains tables for both 32-bit and 64-bit
315cd882207SXin LI           z_word_t's, and so requires a 64-bit type be available. In that case,
316cd882207SXin LI           z_word_t must be defined to be 64-bits. This code then also generates
317cd882207SXin LI           and writes out the tables for the case that z_word_t is 32 bits.
318cd882207SXin LI          */
319cd882207SXin LI #if !defined(W) || W != 8
320cd882207SXin LI #  error Need a 64-bit integer type in order to generate crc32.h.
321cd882207SXin LI #endif
322c9083b85SXin LI         FILE *out;
323cd882207SXin LI         int k, n;
324cd882207SXin LI         z_crc_t ltl[8][256];
325cd882207SXin LI         z_word_t big[8][256];
326c9083b85SXin LI 
327c9083b85SXin LI         out = fopen("crc32.h", "w");
328c9083b85SXin LI         if (out == NULL) return;
329cd882207SXin LI 
330cd882207SXin LI         /* write out little-endian CRC table to crc32.h */
331cd882207SXin LI         fprintf(out,
332cd882207SXin LI             "/* crc32.h -- tables for rapid CRC calculation\n"
333cd882207SXin LI             " * Generated automatically by crc32.c\n */\n"
334cd882207SXin LI             "\n"
335cd882207SXin LI             "local const z_crc_t FAR crc_table[] = {\n"
336cd882207SXin LI             "    ");
337cd882207SXin LI         write_table(out, crc_table, 256);
338cd882207SXin LI         fprintf(out,
339cd882207SXin LI             "};\n");
340cd882207SXin LI 
341cd882207SXin LI         /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
342cd882207SXin LI         fprintf(out,
343cd882207SXin LI             "\n"
344cd882207SXin LI             "#ifdef W\n"
345cd882207SXin LI             "\n"
346cd882207SXin LI             "#if W == 8\n"
347cd882207SXin LI             "\n"
348cd882207SXin LI             "local const z_word_t FAR crc_big_table[] = {\n"
349cd882207SXin LI             "    ");
350cd882207SXin LI         write_table64(out, crc_big_table, 256);
351cd882207SXin LI         fprintf(out,
352cd882207SXin LI             "};\n");
353cd882207SXin LI 
354cd882207SXin LI         /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
355cd882207SXin LI         fprintf(out,
356cd882207SXin LI             "\n"
357cd882207SXin LI             "#else /* W == 4 */\n"
358cd882207SXin LI             "\n"
359cd882207SXin LI             "local const z_word_t FAR crc_big_table[] = {\n"
360cd882207SXin LI             "    ");
361cd882207SXin LI         write_table32hi(out, crc_big_table, 256);
362cd882207SXin LI         fprintf(out,
363cd882207SXin LI             "};\n"
364cd882207SXin LI             "\n"
365cd882207SXin LI             "#endif\n");
366cd882207SXin LI 
367cd882207SXin LI         /* write out braid tables for each value of N */
368cd882207SXin LI         for (n = 1; n <= 6; n++) {
369cd882207SXin LI             fprintf(out,
370cd882207SXin LI             "\n"
371cd882207SXin LI             "#if N == %d\n", n);
372cd882207SXin LI 
373cd882207SXin LI             /* compute braid tables for this N and 64-bit word_t */
374cd882207SXin LI             braid(ltl, big, n, 8);
375cd882207SXin LI 
376cd882207SXin LI             /* write out braid tables for 64-bit z_word_t to crc32.h */
377cd882207SXin LI             fprintf(out,
378cd882207SXin LI             "\n"
379cd882207SXin LI             "#if W == 8\n"
380cd882207SXin LI             "\n"
381cd882207SXin LI             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
382cd882207SXin LI             for (k = 0; k < 8; k++) {
383cd882207SXin LI                 fprintf(out, "   {");
384cd882207SXin LI                 write_table(out, ltl[k], 256);
385cd882207SXin LI                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
386c9083b85SXin LI             }
387cd882207SXin LI             fprintf(out,
388cd882207SXin LI             "};\n"
389cd882207SXin LI             "\n"
390cd882207SXin LI             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
391cd882207SXin LI             for (k = 0; k < 8; k++) {
392cd882207SXin LI                 fprintf(out, "   {");
393cd882207SXin LI                 write_table64(out, big[k], 256);
394cd882207SXin LI                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
395cd882207SXin LI             }
396cd882207SXin LI             fprintf(out,
397cd882207SXin LI             "};\n");
398cd882207SXin LI 
399cd882207SXin LI             /* compute braid tables for this N and 32-bit word_t */
400cd882207SXin LI             braid(ltl, big, n, 4);
401cd882207SXin LI 
402cd882207SXin LI             /* write out braid tables for 32-bit z_word_t to crc32.h */
403cd882207SXin LI             fprintf(out,
404cd882207SXin LI             "\n"
405cd882207SXin LI             "#else /* W == 4 */\n"
406cd882207SXin LI             "\n"
407cd882207SXin LI             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
408cd882207SXin LI             for (k = 0; k < 4; k++) {
409cd882207SXin LI                 fprintf(out, "   {");
410cd882207SXin LI                 write_table(out, ltl[k], 256);
411cd882207SXin LI                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
412cd882207SXin LI             }
413cd882207SXin LI             fprintf(out,
414cd882207SXin LI             "};\n"
415cd882207SXin LI             "\n"
416cd882207SXin LI             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
417cd882207SXin LI             for (k = 0; k < 4; k++) {
418cd882207SXin LI                 fprintf(out, "   {");
419cd882207SXin LI                 write_table32hi(out, big[k], 256);
420cd882207SXin LI                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
421cd882207SXin LI             }
422cd882207SXin LI             fprintf(out,
423cd882207SXin LI             "};\n"
424cd882207SXin LI             "\n"
425cd882207SXin LI             "#endif\n"
426cd882207SXin LI             "\n"
427cd882207SXin LI             "#endif\n");
428cd882207SXin LI         }
429cd882207SXin LI         fprintf(out,
430cd882207SXin LI             "\n"
431cd882207SXin LI             "#endif\n");
432cd882207SXin LI 
433cd882207SXin LI         /* write out zeros operator table to crc32.h */
434cd882207SXin LI         fprintf(out,
435cd882207SXin LI             "\n"
436cd882207SXin LI             "local const z_crc_t FAR x2n_table[] = {\n"
437cd882207SXin LI             "    ");
438cd882207SXin LI         write_table(out, x2n_table, 32);
439cd882207SXin LI         fprintf(out,
440cd882207SXin LI             "};\n");
441c9083b85SXin LI         fclose(out);
442c9083b85SXin LI     }
443c9083b85SXin LI #endif /* MAKECRCH */
444c9083b85SXin LI }
445c9083b85SXin LI 
446c9083b85SXin LI #ifdef MAKECRCH
447cd882207SXin LI 
448cd882207SXin LI /*
449cd882207SXin LI    Write the 32-bit values in table[0..k-1] to out, five per line in
450cd882207SXin LI    hexadecimal separated by commas.
451cd882207SXin LI  */
452cd882207SXin LI local void write_table(out, table, k)
453c9083b85SXin LI     FILE *out;
454c9083b85SXin LI     const z_crc_t FAR *table;
455cd882207SXin LI     int k;
456c9083b85SXin LI {
457c9083b85SXin LI     int n;
458c9083b85SXin LI 
459cd882207SXin LI     for (n = 0; n < k; n++)
460cd882207SXin LI         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
461c9083b85SXin LI                 (unsigned long)(table[n]),
462cd882207SXin LI                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
463c9083b85SXin LI }
464cd882207SXin LI 
465cd882207SXin LI /*
466cd882207SXin LI    Write the high 32-bits of each value in table[0..k-1] to out, five per line
467cd882207SXin LI    in hexadecimal separated by commas.
468cd882207SXin LI  */
469cd882207SXin LI local void write_table32hi(out, table, k)
470cd882207SXin LI FILE *out;
471cd882207SXin LI const z_word_t FAR *table;
472cd882207SXin LI int k;
473cd882207SXin LI {
474cd882207SXin LI     int n;
475cd882207SXin LI 
476cd882207SXin LI     for (n = 0; n < k; n++)
477cd882207SXin LI         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
478cd882207SXin LI                 (unsigned long)(table[n] >> 32),
479cd882207SXin LI                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
480cd882207SXin LI }
481cd882207SXin LI 
482cd882207SXin LI /*
483cd882207SXin LI   Write the 64-bit values in table[0..k-1] to out, three per line in
484cd882207SXin LI   hexadecimal separated by commas. This assumes that if there is a 64-bit
485cd882207SXin LI   type, then there is also a long long integer type, and it is at least 64
486cd882207SXin LI   bits. If not, then the type cast and format string can be adjusted
487cd882207SXin LI   accordingly.
488cd882207SXin LI  */
489cd882207SXin LI local void write_table64(out, table, k)
490cd882207SXin LI     FILE *out;
491cd882207SXin LI     const z_word_t FAR *table;
492cd882207SXin LI     int k;
493cd882207SXin LI {
494cd882207SXin LI     int n;
495cd882207SXin LI 
496cd882207SXin LI     for (n = 0; n < k; n++)
497cd882207SXin LI         fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
498cd882207SXin LI                 (unsigned long long)(table[n]),
499cd882207SXin LI                 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
500cd882207SXin LI }
501cd882207SXin LI 
502cd882207SXin LI /* Actually do the deed. */
503cd882207SXin LI int main()
504cd882207SXin LI {
505cd882207SXin LI     make_crc_table();
506cd882207SXin LI     return 0;
507cd882207SXin LI }
508cd882207SXin LI 
509c9083b85SXin LI #endif /* MAKECRCH */
510c9083b85SXin LI 
511cd882207SXin LI #ifdef W
512cd882207SXin LI /*
513cd882207SXin LI   Generate the little and big-endian braid tables for the given n and z_word_t
514cd882207SXin LI   size w. Each array must have room for w blocks of 256 elements.
515cd882207SXin LI  */
516cd882207SXin LI local void braid(ltl, big, n, w)
517cd882207SXin LI     z_crc_t ltl[][256];
518cd882207SXin LI     z_word_t big[][256];
519cd882207SXin LI     int n;
520cd882207SXin LI     int w;
521cd882207SXin LI {
522cd882207SXin LI     int k;
523cd882207SXin LI     z_crc_t i, p, q;
524cd882207SXin LI     for (k = 0; k < w; k++) {
525cd882207SXin LI         p = x2nmodp((n * w + 3 - k) << 3, 0);
526cd882207SXin LI         ltl[k][0] = 0;
527cd882207SXin LI         big[w - 1 - k][0] = 0;
528cd882207SXin LI         for (i = 1; i < 256; i++) {
529cd882207SXin LI             ltl[k][i] = q = multmodp(i << 24, p);
530cd882207SXin LI             big[w - 1 - k][i] = byte_swap(q);
531cd882207SXin LI         }
532cd882207SXin LI     }
533cd882207SXin LI }
534cd882207SXin LI #endif
535cd882207SXin LI 
536c9083b85SXin LI #else /* !DYNAMIC_CRC_TABLE */
537c9083b85SXin LI /* ========================================================================
538cd882207SXin LI  * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
539cd882207SXin LI  * of x for combining CRC-32s, all made by make_crc_table().
540c9083b85SXin LI  */
541c9083b85SXin LI #include "crc32.h"
542c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */
543c9083b85SXin LI 
544cd882207SXin LI /* ========================================================================
545cd882207SXin LI  * Routines used for CRC calculation. Some are also required for the table
546cd882207SXin LI  * generation above.
547cd882207SXin LI  */
548cd882207SXin LI 
549cd882207SXin LI /*
550cd882207SXin LI   Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
551cd882207SXin LI   reflected. For speed, this requires that a not be zero.
552cd882207SXin LI  */
553cd882207SXin LI local z_crc_t multmodp(a, b)
554cd882207SXin LI     z_crc_t a;
555cd882207SXin LI     z_crc_t b;
556cd882207SXin LI {
557cd882207SXin LI     z_crc_t m, p;
558cd882207SXin LI 
559cd882207SXin LI     m = (z_crc_t)1 << 31;
560cd882207SXin LI     p = 0;
561cd882207SXin LI     for (;;) {
562cd882207SXin LI         if (a & m) {
563cd882207SXin LI             p ^= b;
564cd882207SXin LI             if ((a & (m - 1)) == 0)
565cd882207SXin LI                 break;
566cd882207SXin LI         }
567cd882207SXin LI         m >>= 1;
568cd882207SXin LI         b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
569cd882207SXin LI     }
570cd882207SXin LI     return p;
571cd882207SXin LI }
572cd882207SXin LI 
573cd882207SXin LI /*
574cd882207SXin LI   Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
575cd882207SXin LI   initialized.
576cd882207SXin LI  */
577cd882207SXin LI local z_crc_t x2nmodp(n, k)
578cd882207SXin LI     z_off64_t n;
579cd882207SXin LI     unsigned k;
580cd882207SXin LI {
581cd882207SXin LI     z_crc_t p;
582cd882207SXin LI 
583cd882207SXin LI     p = (z_crc_t)1 << 31;           /* x^0 == 1 */
584cd882207SXin LI     while (n) {
585cd882207SXin LI         if (n & 1)
586cd882207SXin LI             p = multmodp(x2n_table[k & 31], p);
587cd882207SXin LI         n >>= 1;
588cd882207SXin LI         k++;
589cd882207SXin LI     }
590cd882207SXin LI     return p;
591cd882207SXin LI }
592cd882207SXin LI 
593c9083b85SXin LI /* =========================================================================
594cd882207SXin LI  * This function can be used by asm versions of crc32(), and to force the
595cd882207SXin LI  * generation of the CRC tables in a threaded application.
596c9083b85SXin LI  */
597c9083b85SXin LI const z_crc_t FAR * ZEXPORT get_crc_table()
598c9083b85SXin LI {
599c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE
600cd882207SXin LI     once(&made, make_crc_table);
601c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */
602c9083b85SXin LI     return (const z_crc_t FAR *)crc_table;
603c9083b85SXin LI }
604c9083b85SXin LI 
605cd882207SXin LI /* =========================================================================
606cd882207SXin LI  * Use ARM machine instructions if available. This will compute the CRC about
607cd882207SXin LI  * ten times faster than the braided calculation. This code does not check for
608cd882207SXin LI  * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
609cd882207SXin LI  * only be defined if the compilation specifies an ARM processor architecture
610cd882207SXin LI  * that has the instructions. For example, compiling with -march=armv8.1-a or
611cd882207SXin LI  * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
612cd882207SXin LI  * instructions.
613cd882207SXin LI  */
614cd882207SXin LI #ifdef ARMCRC32
615cd882207SXin LI 
616cd882207SXin LI /*
617cd882207SXin LI    Constants empirically determined to maximize speed. These values are from
618cd882207SXin LI    measurements on a Cortex-A57. Your mileage may vary.
619cd882207SXin LI  */
620cd882207SXin LI #define Z_BATCH 3990                /* number of words in a batch */
621cd882207SXin LI #define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
622cd882207SXin LI #define Z_BATCH_MIN 800             /* fewest words in a final batch */
623cd882207SXin LI 
624cd882207SXin LI unsigned long ZEXPORT crc32_z(crc, buf, len)
625cd882207SXin LI     unsigned long crc;
626cd882207SXin LI     const unsigned char FAR *buf;
627cd882207SXin LI     z_size_t len;
628cd882207SXin LI {
629cd882207SXin LI     z_crc_t val;
630cd882207SXin LI     z_word_t crc1, crc2;
631cd882207SXin LI     const z_word_t *word;
632cd882207SXin LI     z_word_t val0, val1, val2;
633cd882207SXin LI     z_size_t last, last2, i;
634cd882207SXin LI     z_size_t num;
635cd882207SXin LI 
636cd882207SXin LI     /* Return initial CRC, if requested. */
637cd882207SXin LI     if (buf == Z_NULL) return 0;
638cd882207SXin LI 
639cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE
640cd882207SXin LI     once(&made, make_crc_table);
641cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */
642cd882207SXin LI 
643cd882207SXin LI     /* Pre-condition the CRC */
644c61bc111SXin LI     crc = (~crc) & 0xffffffff;
645cd882207SXin LI 
646cd882207SXin LI     /* Compute the CRC up to a word boundary. */
647cd882207SXin LI     while (len && ((z_size_t)buf & 7) != 0) {
648cd882207SXin LI         len--;
649cd882207SXin LI         val = *buf++;
650cd882207SXin LI         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
651cd882207SXin LI     }
652cd882207SXin LI 
653cd882207SXin LI     /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
654cd882207SXin LI     word = (z_word_t const *)buf;
655cd882207SXin LI     num = len >> 3;
656cd882207SXin LI     len &= 7;
657cd882207SXin LI 
658cd882207SXin LI     /* Do three interleaved CRCs to realize the throughput of one crc32x
659*e37bb444SXin LI        instruction per cycle. Each CRC is calculated on Z_BATCH words. The
660*e37bb444SXin LI        three CRCs are combined into a single CRC after each set of batches. */
661cd882207SXin LI     while (num >= 3 * Z_BATCH) {
662cd882207SXin LI         crc1 = 0;
663cd882207SXin LI         crc2 = 0;
664cd882207SXin LI         for (i = 0; i < Z_BATCH; i++) {
665cd882207SXin LI             val0 = word[i];
666cd882207SXin LI             val1 = word[i + Z_BATCH];
667cd882207SXin LI             val2 = word[i + 2 * Z_BATCH];
668cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
669cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
670cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
671cd882207SXin LI         }
672cd882207SXin LI         word += 3 * Z_BATCH;
673cd882207SXin LI         num -= 3 * Z_BATCH;
674cd882207SXin LI         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
675cd882207SXin LI         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
676cd882207SXin LI     }
677cd882207SXin LI 
678cd882207SXin LI     /* Do one last smaller batch with the remaining words, if there are enough
679cd882207SXin LI        to pay for the combination of CRCs. */
680cd882207SXin LI     last = num / 3;
681cd882207SXin LI     if (last >= Z_BATCH_MIN) {
682cd882207SXin LI         last2 = last << 1;
683cd882207SXin LI         crc1 = 0;
684cd882207SXin LI         crc2 = 0;
685cd882207SXin LI         for (i = 0; i < last; i++) {
686cd882207SXin LI             val0 = word[i];
687cd882207SXin LI             val1 = word[i + last];
688cd882207SXin LI             val2 = word[i + last2];
689cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
690cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
691cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
692cd882207SXin LI         }
693cd882207SXin LI         word += 3 * last;
694cd882207SXin LI         num -= 3 * last;
695cd882207SXin LI         val = x2nmodp(last, 6);
696cd882207SXin LI         crc = multmodp(val, crc) ^ crc1;
697cd882207SXin LI         crc = multmodp(val, crc) ^ crc2;
698cd882207SXin LI     }
699cd882207SXin LI 
700cd882207SXin LI     /* Compute the CRC on any remaining words. */
701cd882207SXin LI     for (i = 0; i < num; i++) {
702cd882207SXin LI         val0 = word[i];
703cd882207SXin LI         __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
704cd882207SXin LI     }
705cd882207SXin LI     word += num;
706cd882207SXin LI 
707cd882207SXin LI     /* Complete the CRC on any remaining bytes. */
708cd882207SXin LI     buf = (const unsigned char FAR *)word;
709cd882207SXin LI     while (len) {
710cd882207SXin LI         len--;
711cd882207SXin LI         val = *buf++;
712cd882207SXin LI         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
713cd882207SXin LI     }
714cd882207SXin LI 
715cd882207SXin LI     /* Return the CRC, post-conditioned. */
716cd882207SXin LI     return crc ^ 0xffffffff;
717cd882207SXin LI }
718cd882207SXin LI 
719cd882207SXin LI #else
720cd882207SXin LI 
721cd882207SXin LI #ifdef W
722cd882207SXin LI 
723cd882207SXin LI /*
724cd882207SXin LI   Return the CRC of the W bytes in the word_t data, taking the
725cd882207SXin LI   least-significant byte of the word as the first byte of data, without any pre
726cd882207SXin LI   or post conditioning. This is used to combine the CRCs of each braid.
727cd882207SXin LI  */
7287550e242SJohn Baldwin local z_crc_t crc_word OF((z_word_t data));
7297550e242SJohn Baldwin 
730cd882207SXin LI local z_crc_t crc_word(data)
731cd882207SXin LI     z_word_t data;
732cd882207SXin LI {
733cd882207SXin LI     int k;
734cd882207SXin LI     for (k = 0; k < W; k++)
735cd882207SXin LI         data = (data >> 8) ^ crc_table[data & 0xff];
736cd882207SXin LI     return (z_crc_t)data;
737cd882207SXin LI }
738cd882207SXin LI 
7397550e242SJohn Baldwin local z_word_t crc_word_big OF((z_word_t data));
7407550e242SJohn Baldwin 
741cd882207SXin LI local z_word_t crc_word_big(data)
742cd882207SXin LI     z_word_t data;
743cd882207SXin LI {
744cd882207SXin LI     int k;
745cd882207SXin LI     for (k = 0; k < W; k++)
746cd882207SXin LI         data = (data << 8) ^
747cd882207SXin LI             crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
748cd882207SXin LI     return data;
749cd882207SXin LI }
750cd882207SXin LI 
751cd882207SXin LI #endif
752c9083b85SXin LI 
753c9083b85SXin LI /* ========================================================================= */
754c9083b85SXin LI unsigned long ZEXPORT crc32_z(crc, buf, len)
755c9083b85SXin LI     unsigned long crc;
756c9083b85SXin LI     const unsigned char FAR *buf;
757c9083b85SXin LI     z_size_t len;
758c9083b85SXin LI {
759cd882207SXin LI     /* Return initial CRC, if requested. */
760cd882207SXin LI     if (buf == Z_NULL) return 0;
761c9083b85SXin LI 
762c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE
763cd882207SXin LI     once(&made, make_crc_table);
764c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */
765c9083b85SXin LI 
766cd882207SXin LI     /* Pre-condition the CRC */
767c61bc111SXin LI     crc = (~crc) & 0xffffffff;
768c9083b85SXin LI 
769cd882207SXin LI #ifdef W
770cd882207SXin LI 
771cd882207SXin LI     /* If provided enough bytes, do a braided CRC calculation. */
772cd882207SXin LI     if (len >= N * W + W - 1) {
773cd882207SXin LI         z_size_t blks;
774cd882207SXin LI         z_word_t const *words;
775cd882207SXin LI         unsigned endian;
776cd882207SXin LI         int k;
777cd882207SXin LI 
778cd882207SXin LI         /* Compute the CRC up to a z_word_t boundary. */
779cd882207SXin LI         while (len && ((z_size_t)buf & (W - 1)) != 0) {
780cd882207SXin LI             len--;
781cd882207SXin LI             crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
782cd882207SXin LI         }
783cd882207SXin LI 
784cd882207SXin LI         /* Compute the CRC on as many N z_word_t blocks as are available. */
785cd882207SXin LI         blks = len / (N * W);
786cd882207SXin LI         len -= blks * N * W;
787cd882207SXin LI         words = (z_word_t const *)buf;
788cd882207SXin LI 
789cd882207SXin LI         /* Do endian check at execution time instead of compile time, since ARM
790cd882207SXin LI            processors can change the endianess at execution time. If the
791cd882207SXin LI            compiler knows what the endianess will be, it can optimize out the
792cd882207SXin LI            check and the unused branch. */
793c9083b85SXin LI         endian = 1;
794cd882207SXin LI         if (*(unsigned char *)&endian) {
795cd882207SXin LI             /* Little endian. */
796cd882207SXin LI 
797cd882207SXin LI             z_crc_t crc0;
798cd882207SXin LI             z_word_t word0;
799cd882207SXin LI #if N > 1
800cd882207SXin LI             z_crc_t crc1;
801cd882207SXin LI             z_word_t word1;
802cd882207SXin LI #if N > 2
803cd882207SXin LI             z_crc_t crc2;
804cd882207SXin LI             z_word_t word2;
805cd882207SXin LI #if N > 3
806cd882207SXin LI             z_crc_t crc3;
807cd882207SXin LI             z_word_t word3;
808cd882207SXin LI #if N > 4
809cd882207SXin LI             z_crc_t crc4;
810cd882207SXin LI             z_word_t word4;
811cd882207SXin LI #if N > 5
812cd882207SXin LI             z_crc_t crc5;
813cd882207SXin LI             z_word_t word5;
814cd882207SXin LI #endif
815cd882207SXin LI #endif
816cd882207SXin LI #endif
817cd882207SXin LI #endif
818cd882207SXin LI #endif
819cd882207SXin LI 
820cd882207SXin LI             /* Initialize the CRC for each braid. */
821cd882207SXin LI             crc0 = crc;
822cd882207SXin LI #if N > 1
823cd882207SXin LI             crc1 = 0;
824cd882207SXin LI #if N > 2
825cd882207SXin LI             crc2 = 0;
826cd882207SXin LI #if N > 3
827cd882207SXin LI             crc3 = 0;
828cd882207SXin LI #if N > 4
829cd882207SXin LI             crc4 = 0;
830cd882207SXin LI #if N > 5
831cd882207SXin LI             crc5 = 0;
832cd882207SXin LI #endif
833cd882207SXin LI #endif
834cd882207SXin LI #endif
835cd882207SXin LI #endif
836cd882207SXin LI #endif
837cd882207SXin LI 
838cd882207SXin LI             /*
839cd882207SXin LI               Process the first blks-1 blocks, computing the CRCs on each braid
840cd882207SXin LI               independently.
841cd882207SXin LI              */
842cd882207SXin LI             while (--blks) {
843cd882207SXin LI                 /* Load the word for each braid into registers. */
844cd882207SXin LI                 word0 = crc0 ^ words[0];
845cd882207SXin LI #if N > 1
846cd882207SXin LI                 word1 = crc1 ^ words[1];
847cd882207SXin LI #if N > 2
848cd882207SXin LI                 word2 = crc2 ^ words[2];
849cd882207SXin LI #if N > 3
850cd882207SXin LI                 word3 = crc3 ^ words[3];
851cd882207SXin LI #if N > 4
852cd882207SXin LI                 word4 = crc4 ^ words[4];
853cd882207SXin LI #if N > 5
854cd882207SXin LI                 word5 = crc5 ^ words[5];
855cd882207SXin LI #endif
856cd882207SXin LI #endif
857cd882207SXin LI #endif
858cd882207SXin LI #endif
859cd882207SXin LI #endif
860cd882207SXin LI                 words += N;
861cd882207SXin LI 
862cd882207SXin LI                 /* Compute and update the CRC for each word. The loop should
863cd882207SXin LI                    get unrolled. */
864cd882207SXin LI                 crc0 = crc_braid_table[0][word0 & 0xff];
865cd882207SXin LI #if N > 1
866cd882207SXin LI                 crc1 = crc_braid_table[0][word1 & 0xff];
867cd882207SXin LI #if N > 2
868cd882207SXin LI                 crc2 = crc_braid_table[0][word2 & 0xff];
869cd882207SXin LI #if N > 3
870cd882207SXin LI                 crc3 = crc_braid_table[0][word3 & 0xff];
871cd882207SXin LI #if N > 4
872cd882207SXin LI                 crc4 = crc_braid_table[0][word4 & 0xff];
873cd882207SXin LI #if N > 5
874cd882207SXin LI                 crc5 = crc_braid_table[0][word5 & 0xff];
875cd882207SXin LI #endif
876cd882207SXin LI #endif
877cd882207SXin LI #endif
878cd882207SXin LI #endif
879cd882207SXin LI #endif
880cd882207SXin LI                 for (k = 1; k < W; k++) {
881cd882207SXin LI                     crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
882cd882207SXin LI #if N > 1
883cd882207SXin LI                     crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
884cd882207SXin LI #if N > 2
885cd882207SXin LI                     crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
886cd882207SXin LI #if N > 3
887cd882207SXin LI                     crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
888cd882207SXin LI #if N > 4
889cd882207SXin LI                     crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
890cd882207SXin LI #if N > 5
891cd882207SXin LI                     crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
892cd882207SXin LI #endif
893cd882207SXin LI #endif
894cd882207SXin LI #endif
895cd882207SXin LI #endif
896cd882207SXin LI #endif
897c9083b85SXin LI                 }
898cd882207SXin LI             }
899cd882207SXin LI 
900cd882207SXin LI             /*
901cd882207SXin LI               Process the last block, combining the CRCs of the N braids at the
902cd882207SXin LI               same time.
903cd882207SXin LI              */
904cd882207SXin LI             crc = crc_word(crc0 ^ words[0]);
905cd882207SXin LI #if N > 1
906cd882207SXin LI             crc = crc_word(crc1 ^ words[1] ^ crc);
907cd882207SXin LI #if N > 2
908cd882207SXin LI             crc = crc_word(crc2 ^ words[2] ^ crc);
909cd882207SXin LI #if N > 3
910cd882207SXin LI             crc = crc_word(crc3 ^ words[3] ^ crc);
911cd882207SXin LI #if N > 4
912cd882207SXin LI             crc = crc_word(crc4 ^ words[4] ^ crc);
913cd882207SXin LI #if N > 5
914cd882207SXin LI             crc = crc_word(crc5 ^ words[5] ^ crc);
915cd882207SXin LI #endif
916cd882207SXin LI #endif
917cd882207SXin LI #endif
918cd882207SXin LI #endif
919cd882207SXin LI #endif
920cd882207SXin LI             words += N;
921cd882207SXin LI         }
922cd882207SXin LI         else {
923cd882207SXin LI             /* Big endian. */
924cd882207SXin LI 
925cd882207SXin LI             z_word_t crc0, word0, comb;
926cd882207SXin LI #if N > 1
927cd882207SXin LI             z_word_t crc1, word1;
928cd882207SXin LI #if N > 2
929cd882207SXin LI             z_word_t crc2, word2;
930cd882207SXin LI #if N > 3
931cd882207SXin LI             z_word_t crc3, word3;
932cd882207SXin LI #if N > 4
933cd882207SXin LI             z_word_t crc4, word4;
934cd882207SXin LI #if N > 5
935cd882207SXin LI             z_word_t crc5, word5;
936cd882207SXin LI #endif
937cd882207SXin LI #endif
938cd882207SXin LI #endif
939cd882207SXin LI #endif
940cd882207SXin LI #endif
941cd882207SXin LI 
942cd882207SXin LI             /* Initialize the CRC for each braid. */
943cd882207SXin LI             crc0 = byte_swap(crc);
944cd882207SXin LI #if N > 1
945cd882207SXin LI             crc1 = 0;
946cd882207SXin LI #if N > 2
947cd882207SXin LI             crc2 = 0;
948cd882207SXin LI #if N > 3
949cd882207SXin LI             crc3 = 0;
950cd882207SXin LI #if N > 4
951cd882207SXin LI             crc4 = 0;
952cd882207SXin LI #if N > 5
953cd882207SXin LI             crc5 = 0;
954cd882207SXin LI #endif
955cd882207SXin LI #endif
956cd882207SXin LI #endif
957cd882207SXin LI #endif
958cd882207SXin LI #endif
959cd882207SXin LI 
960cd882207SXin LI             /*
961cd882207SXin LI               Process the first blks-1 blocks, computing the CRCs on each braid
962cd882207SXin LI               independently.
963cd882207SXin LI              */
964cd882207SXin LI             while (--blks) {
965cd882207SXin LI                 /* Load the word for each braid into registers. */
966cd882207SXin LI                 word0 = crc0 ^ words[0];
967cd882207SXin LI #if N > 1
968cd882207SXin LI                 word1 = crc1 ^ words[1];
969cd882207SXin LI #if N > 2
970cd882207SXin LI                 word2 = crc2 ^ words[2];
971cd882207SXin LI #if N > 3
972cd882207SXin LI                 word3 = crc3 ^ words[3];
973cd882207SXin LI #if N > 4
974cd882207SXin LI                 word4 = crc4 ^ words[4];
975cd882207SXin LI #if N > 5
976cd882207SXin LI                 word5 = crc5 ^ words[5];
977cd882207SXin LI #endif
978cd882207SXin LI #endif
979cd882207SXin LI #endif
980cd882207SXin LI #endif
981cd882207SXin LI #endif
982cd882207SXin LI                 words += N;
983cd882207SXin LI 
984cd882207SXin LI                 /* Compute and update the CRC for each word. The loop should
985cd882207SXin LI                    get unrolled. */
986cd882207SXin LI                 crc0 = crc_braid_big_table[0][word0 & 0xff];
987cd882207SXin LI #if N > 1
988cd882207SXin LI                 crc1 = crc_braid_big_table[0][word1 & 0xff];
989cd882207SXin LI #if N > 2
990cd882207SXin LI                 crc2 = crc_braid_big_table[0][word2 & 0xff];
991cd882207SXin LI #if N > 3
992cd882207SXin LI                 crc3 = crc_braid_big_table[0][word3 & 0xff];
993cd882207SXin LI #if N > 4
994cd882207SXin LI                 crc4 = crc_braid_big_table[0][word4 & 0xff];
995cd882207SXin LI #if N > 5
996cd882207SXin LI                 crc5 = crc_braid_big_table[0][word5 & 0xff];
997cd882207SXin LI #endif
998cd882207SXin LI #endif
999cd882207SXin LI #endif
1000cd882207SXin LI #endif
1001cd882207SXin LI #endif
1002cd882207SXin LI                 for (k = 1; k < W; k++) {
1003cd882207SXin LI                     crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
1004cd882207SXin LI #if N > 1
1005cd882207SXin LI                     crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
1006cd882207SXin LI #if N > 2
1007cd882207SXin LI                     crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
1008cd882207SXin LI #if N > 3
1009cd882207SXin LI                     crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
1010cd882207SXin LI #if N > 4
1011cd882207SXin LI                     crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
1012cd882207SXin LI #if N > 5
1013cd882207SXin LI                     crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
1014cd882207SXin LI #endif
1015cd882207SXin LI #endif
1016cd882207SXin LI #endif
1017cd882207SXin LI #endif
1018cd882207SXin LI #endif
1019cd882207SXin LI                 }
1020cd882207SXin LI             }
1021cd882207SXin LI 
1022cd882207SXin LI             /*
1023cd882207SXin LI               Process the last block, combining the CRCs of the N braids at the
1024cd882207SXin LI               same time.
1025cd882207SXin LI              */
1026cd882207SXin LI             comb = crc_word_big(crc0 ^ words[0]);
1027cd882207SXin LI #if N > 1
1028cd882207SXin LI             comb = crc_word_big(crc1 ^ words[1] ^ comb);
1029cd882207SXin LI #if N > 2
1030cd882207SXin LI             comb = crc_word_big(crc2 ^ words[2] ^ comb);
1031cd882207SXin LI #if N > 3
1032cd882207SXin LI             comb = crc_word_big(crc3 ^ words[3] ^ comb);
1033cd882207SXin LI #if N > 4
1034cd882207SXin LI             comb = crc_word_big(crc4 ^ words[4] ^ comb);
1035cd882207SXin LI #if N > 5
1036cd882207SXin LI             comb = crc_word_big(crc5 ^ words[5] ^ comb);
1037cd882207SXin LI #endif
1038cd882207SXin LI #endif
1039cd882207SXin LI #endif
1040cd882207SXin LI #endif
1041cd882207SXin LI #endif
1042cd882207SXin LI             words += N;
1043cd882207SXin LI             crc = byte_swap(comb);
1044cd882207SXin LI         }
1045cd882207SXin LI 
1046cd882207SXin LI         /*
1047cd882207SXin LI           Update the pointer to the remaining bytes to process.
1048cd882207SXin LI          */
1049cd882207SXin LI         buf = (unsigned char const *)words;
1050cd882207SXin LI     }
1051cd882207SXin LI 
1052cd882207SXin LI #endif /* W */
1053cd882207SXin LI 
1054cd882207SXin LI     /* Complete the computation of the CRC on any remaining bytes. */
1055c9083b85SXin LI     while (len >= 8) {
1056c9083b85SXin LI         len -= 8;
1057cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1058cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1059cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1060cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1061cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1062cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1063cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1064cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1065c9083b85SXin LI     }
1066cd882207SXin LI     while (len) {
1067cd882207SXin LI         len--;
1068cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1069c9083b85SXin LI     }
1070c9083b85SXin LI 
1071cd882207SXin LI     /* Return the CRC, post-conditioned. */
1072cd882207SXin LI     return crc ^ 0xffffffff;
1073cd882207SXin LI }
1074cd882207SXin LI 
1075cd882207SXin LI #endif
1076cd882207SXin LI 
1077c9083b85SXin LI /* ========================================================================= */
1078c9083b85SXin LI unsigned long ZEXPORT crc32(crc, buf, len)
1079c9083b85SXin LI     unsigned long crc;
1080c9083b85SXin LI     const unsigned char FAR *buf;
1081c9083b85SXin LI     uInt len;
1082c9083b85SXin LI {
1083c9083b85SXin LI     return crc32_z(crc, buf, len);
1084c9083b85SXin LI }
1085c9083b85SXin LI 
1086c9083b85SXin LI /* ========================================================================= */
1087cd882207SXin LI uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
1088c9083b85SXin LI     uLong crc1;
1089c9083b85SXin LI     uLong crc2;
1090c9083b85SXin LI     z_off64_t len2;
1091c9083b85SXin LI {
1092cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE
1093cd882207SXin LI     once(&made, make_crc_table);
1094cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */
1095c61bc111SXin LI     return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1096c9083b85SXin LI }
1097c9083b85SXin LI 
1098c9083b85SXin LI /* ========================================================================= */
1099c9083b85SXin LI uLong ZEXPORT crc32_combine(crc1, crc2, len2)
1100c9083b85SXin LI     uLong crc1;
1101c9083b85SXin LI     uLong crc2;
1102c9083b85SXin LI     z_off_t len2;
1103c9083b85SXin LI {
1104*e37bb444SXin LI     return crc32_combine64(crc1, crc2, (z_off64_t)len2);
1105c9083b85SXin LI }
1106c9083b85SXin LI 
1107cd882207SXin LI /* ========================================================================= */
1108cd882207SXin LI uLong ZEXPORT crc32_combine_gen64(len2)
1109c9083b85SXin LI     z_off64_t len2;
1110c9083b85SXin LI {
1111cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE
1112cd882207SXin LI     once(&made, make_crc_table);
1113cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */
1114cd882207SXin LI     return x2nmodp(len2, 3);
1115cd882207SXin LI }
1116cd882207SXin LI 
1117cd882207SXin LI /* ========================================================================= */
1118cd882207SXin LI uLong ZEXPORT crc32_combine_gen(len2)
1119cd882207SXin LI     z_off_t len2;
1120cd882207SXin LI {
1121*e37bb444SXin LI     return crc32_combine_gen64((z_off64_t)len2);
1122cd882207SXin LI }
1123cd882207SXin LI 
1124cd882207SXin LI /* ========================================================================= */
1125*e37bb444SXin LI uLong ZEXPORT crc32_combine_op(crc1, crc2, op)
1126cd882207SXin LI     uLong crc1;
1127cd882207SXin LI     uLong crc2;
1128cd882207SXin LI     uLong op;
1129cd882207SXin LI {
1130c61bc111SXin LI     return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
1131c9083b85SXin LI }
1132