xref: /freebsd/sys/contrib/zlib/crc32.c (revision c61bc1110c90cb67a60cb08c4f3bc24f75d0b385)
1c9083b85SXin LI /* crc32.c -- compute the CRC-32 of a data stream
2cd882207SXin LI  * Copyright (C) 1995-2022 Mark Adler
3c9083b85SXin LI  * For conditions of distribution and use, see copyright notice in zlib.h
4c9083b85SXin LI  *
5cd882207SXin LI  * This interleaved implementation of a CRC makes use of pipelined multiple
6cd882207SXin LI  * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7cd882207SXin LI  * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8c9083b85SXin LI  */
9c9083b85SXin LI 
10c9083b85SXin LI /* @(#) $Id$ */
11c9083b85SXin LI 
12c9083b85SXin LI /*
13c9083b85SXin LI   Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14c9083b85SXin LI   protection on the static variables used to control the first-use generation
15c9083b85SXin LI   of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16c9083b85SXin LI   first call get_crc_table() to initialize the tables before allowing more than
17c9083b85SXin LI   one thread to use crc32().
18c9083b85SXin LI 
19cd882207SXin LI   MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20cd882207SXin LI   produced, so that this one source file can be compiled to an executable.
21c9083b85SXin LI  */
22c9083b85SXin LI 
23c9083b85SXin LI #ifdef MAKECRCH
24c9083b85SXin LI #  include <stdio.h>
25c9083b85SXin LI #  ifndef DYNAMIC_CRC_TABLE
26c9083b85SXin LI #    define DYNAMIC_CRC_TABLE
27c9083b85SXin LI #  endif /* !DYNAMIC_CRC_TABLE */
28c9083b85SXin LI #endif /* MAKECRCH */
29c9083b85SXin LI 
30cd882207SXin LI #include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
31c9083b85SXin LI 
32cd882207SXin LI  /*
33cd882207SXin LI   A CRC of a message is computed on N braids of words in the message, where
34cd882207SXin LI   each word consists of W bytes (4 or 8). If N is 3, for example, then three
35cd882207SXin LI   running sparse CRCs are calculated respectively on each braid, at these
36cd882207SXin LI   indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37cd882207SXin LI   This is done starting at a word boundary, and continues until as many blocks
38cd882207SXin LI   of N * W bytes as are available have been processed. The results are combined
39cd882207SXin LI   into a single CRC at the end. For this code, N must be in the range 1..6 and
40cd882207SXin LI   W must be 4 or 8. The upper limit on N can be increased if desired by adding
41cd882207SXin LI   more #if blocks, extending the patterns apparent in the code. In addition,
42cd882207SXin LI   crc32.h would need to be regenerated, if the maximum N value is increased.
43cd882207SXin LI 
44cd882207SXin LI   N and W are chosen empirically by benchmarking the execution time on a given
45cd882207SXin LI   processor. The choices for N and W below were based on testing on Intel Kaby
46cd882207SXin LI   Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47cd882207SXin LI   Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48cd882207SXin LI   with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49cd882207SXin LI   They were all tested with either gcc or clang, all using the -O3 optimization
50cd882207SXin LI   level. Your mileage may vary.
51cd882207SXin LI  */
52cd882207SXin LI 
53cd882207SXin LI /* Define N */
54cd882207SXin LI #ifdef Z_TESTN
55cd882207SXin LI #  define N Z_TESTN
56c9083b85SXin LI #else
57cd882207SXin LI #  define N 5
58cd882207SXin LI #endif
59cd882207SXin LI #if N < 1 || N > 6
60cd882207SXin LI #  error N must be in 1..6
61cd882207SXin LI #endif
62c9083b85SXin LI 
63cd882207SXin LI /*
64cd882207SXin LI   z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65cd882207SXin LI   z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66cd882207SXin LI   that bytes are eight bits.
67cd882207SXin LI  */
68c9083b85SXin LI 
69cd882207SXin LI /*
70cd882207SXin LI   Define W and the associated z_word_t type. If W is not defined, then a
71cd882207SXin LI   braided calculation is not used, and the associated tables and code are not
72cd882207SXin LI   compiled.
73cd882207SXin LI  */
74cd882207SXin LI #ifdef Z_TESTW
75cd882207SXin LI #  if Z_TESTW-1 != -1
76cd882207SXin LI #    define W Z_TESTW
77cd882207SXin LI #  endif
78cd882207SXin LI #else
79cd882207SXin LI #  ifdef MAKECRCH
80cd882207SXin LI #    define W 8         /* required for MAKECRCH */
81cd882207SXin LI #  else
82cd882207SXin LI #    if defined(__x86_64__) || defined(__aarch64__)
83cd882207SXin LI #      define W 8
84cd882207SXin LI #    else
85cd882207SXin LI #      define W 4
86cd882207SXin LI #    endif
87cd882207SXin LI #  endif
88cd882207SXin LI #endif
89cd882207SXin LI #ifdef W
90cd882207SXin LI #  if W == 8 && defined(Z_U8)
91cd882207SXin LI      typedef Z_U8 z_word_t;
92cd882207SXin LI #  elif defined(Z_U4)
93cd882207SXin LI #    undef W
94cd882207SXin LI #    define W 4
95cd882207SXin LI      typedef Z_U4 z_word_t;
96cd882207SXin LI #  else
97cd882207SXin LI #    undef W
98cd882207SXin LI #  endif
99cd882207SXin LI #endif
100cd882207SXin LI 
101cd882207SXin LI /* Local functions. */
102cd882207SXin LI local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
103cd882207SXin LI local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
104cd882207SXin LI 
105cd882207SXin LI /* If available, use the ARM processor CRC32 instruction. */
106cd882207SXin LI #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
107cd882207SXin LI #  define ARMCRC32
108cd882207SXin LI #endif
109cd882207SXin LI 
110cd882207SXin LI #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
111cd882207SXin LI /*
112cd882207SXin LI   Swap the bytes in a z_word_t to convert between little and big endian. Any
113cd882207SXin LI   self-respecting compiler will optimize this to a single machine byte-swap
114cd882207SXin LI   instruction, if one is available. This assumes that word_t is either 32 bits
115cd882207SXin LI   or 64 bits.
116cd882207SXin LI  */
117cd882207SXin LI local z_word_t byte_swap(word)
118cd882207SXin LI     z_word_t word;
119cd882207SXin LI {
120cd882207SXin LI #  if W == 8
121cd882207SXin LI     return
122cd882207SXin LI         (word & 0xff00000000000000) >> 56 |
123cd882207SXin LI         (word & 0xff000000000000) >> 40 |
124cd882207SXin LI         (word & 0xff0000000000) >> 24 |
125cd882207SXin LI         (word & 0xff00000000) >> 8 |
126cd882207SXin LI         (word & 0xff000000) << 8 |
127cd882207SXin LI         (word & 0xff0000) << 24 |
128cd882207SXin LI         (word & 0xff00) << 40 |
129cd882207SXin LI         (word & 0xff) << 56;
130cd882207SXin LI #  else   /* W == 4 */
131cd882207SXin LI     return
132cd882207SXin LI         (word & 0xff000000) >> 24 |
133cd882207SXin LI         (word & 0xff0000) >> 8 |
134cd882207SXin LI         (word & 0xff00) << 8 |
135cd882207SXin LI         (word & 0xff) << 24;
136cd882207SXin LI #  endif
137cd882207SXin LI }
138cd882207SXin LI #endif
139cd882207SXin LI 
140cd882207SXin LI /* CRC polynomial. */
141cd882207SXin LI #define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
142c9083b85SXin LI 
143c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE
144c9083b85SXin LI 
145cd882207SXin LI local z_crc_t FAR crc_table[256];
146cd882207SXin LI local z_crc_t FAR x2n_table[32];
147c9083b85SXin LI local void make_crc_table OF((void));
148cd882207SXin LI #ifdef W
149cd882207SXin LI    local z_word_t FAR crc_big_table[256];
150cd882207SXin LI    local z_crc_t FAR crc_braid_table[W][256];
151cd882207SXin LI    local z_word_t FAR crc_braid_big_table[W][256];
152cd882207SXin LI    local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
153cd882207SXin LI #endif
154c9083b85SXin LI #ifdef MAKECRCH
155cd882207SXin LI    local void write_table OF((FILE *, const z_crc_t FAR *, int));
156cd882207SXin LI    local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
157cd882207SXin LI    local void write_table64 OF((FILE *, const z_word_t FAR *, int));
158c9083b85SXin LI #endif /* MAKECRCH */
159cd882207SXin LI 
160cd882207SXin LI /*
161cd882207SXin LI   Define a once() function depending on the availability of atomics. If this is
162cd882207SXin LI   compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
163cd882207SXin LI   multiple threads, and if atomics are not available, then get_crc_table() must
164cd882207SXin LI   be called to initialize the tables and must return before any threads are
165cd882207SXin LI   allowed to compute or combine CRCs.
166cd882207SXin LI  */
167cd882207SXin LI 
168cd882207SXin LI /* Definition of once functionality. */
169cd882207SXin LI typedef struct once_s once_t;
170cd882207SXin LI local void once OF((once_t *, void (*)(void)));
171cd882207SXin LI 
172cd882207SXin LI /* Check for the availability of atomics. */
173cd882207SXin LI #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
174cd882207SXin LI     !defined(__STDC_NO_ATOMICS__)
175cd882207SXin LI 
176cd882207SXin LI #include <stdatomic.h>
177cd882207SXin LI 
178cd882207SXin LI /* Structure for once(), which must be initialized with ONCE_INIT. */
179cd882207SXin LI struct once_s {
180cd882207SXin LI     atomic_flag begun;
181cd882207SXin LI     atomic_int done;
182cd882207SXin LI };
183cd882207SXin LI #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
184cd882207SXin LI 
185cd882207SXin LI /*
186cd882207SXin LI   Run the provided init() function exactly once, even if multiple threads
187cd882207SXin LI   invoke once() at the same time. The state must be a once_t initialized with
188cd882207SXin LI   ONCE_INIT.
189cd882207SXin LI  */
190cd882207SXin LI local void once(state, init)
191cd882207SXin LI     once_t *state;
192cd882207SXin LI     void (*init)(void);
193cd882207SXin LI {
194cd882207SXin LI     if (!atomic_load(&state->done)) {
195cd882207SXin LI         if (atomic_flag_test_and_set(&state->begun))
196cd882207SXin LI             while (!atomic_load(&state->done))
197cd882207SXin LI                 ;
198cd882207SXin LI         else {
199cd882207SXin LI             init();
200cd882207SXin LI             atomic_store(&state->done, 1);
201cd882207SXin LI         }
202cd882207SXin LI     }
203cd882207SXin LI }
204cd882207SXin LI 
205cd882207SXin LI #else   /* no atomics */
206cd882207SXin LI 
207cd882207SXin LI /* Structure for once(), which must be initialized with ONCE_INIT. */
208cd882207SXin LI struct once_s {
209cd882207SXin LI     volatile int begun;
210cd882207SXin LI     volatile int done;
211cd882207SXin LI };
212cd882207SXin LI #define ONCE_INIT {0, 0}
213cd882207SXin LI 
214cd882207SXin LI /* Test and set. Alas, not atomic, but tries to minimize the period of
215cd882207SXin LI    vulnerability. */
216cd882207SXin LI local int test_and_set OF((int volatile *));
217cd882207SXin LI local int test_and_set(flag)
218cd882207SXin LI     int volatile *flag;
219cd882207SXin LI {
220cd882207SXin LI     int was;
221cd882207SXin LI 
222cd882207SXin LI     was = *flag;
223cd882207SXin LI     *flag = 1;
224cd882207SXin LI     return was;
225cd882207SXin LI }
226cd882207SXin LI 
227cd882207SXin LI /* Run the provided init() function once. This is not thread-safe. */
228cd882207SXin LI local void once(state, init)
229cd882207SXin LI     once_t *state;
230cd882207SXin LI     void (*init)(void);
231cd882207SXin LI {
232cd882207SXin LI     if (!state->done) {
233cd882207SXin LI         if (test_and_set(&state->begun))
234cd882207SXin LI             while (!state->done)
235cd882207SXin LI                 ;
236cd882207SXin LI         else {
237cd882207SXin LI             init();
238cd882207SXin LI             state->done = 1;
239cd882207SXin LI         }
240cd882207SXin LI     }
241cd882207SXin LI }
242cd882207SXin LI 
243cd882207SXin LI #endif
244cd882207SXin LI 
245cd882207SXin LI /* State for once(). */
246cd882207SXin LI local once_t made = ONCE_INIT;
247cd882207SXin LI 
248c9083b85SXin LI /*
249c9083b85SXin LI   Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
250c9083b85SXin LI   x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
251c9083b85SXin LI 
252c9083b85SXin LI   Polynomials over GF(2) are represented in binary, one bit per coefficient,
253c9083b85SXin LI   with the lowest powers in the most significant bit. Then adding polynomials
254c9083b85SXin LI   is just exclusive-or, and multiplying a polynomial by x is a right shift by
255c9083b85SXin LI   one. If we call the above polynomial p, and represent a byte as the
256c9083b85SXin LI   polynomial q, also with the lowest power in the most significant bit (so the
257cd882207SXin LI   byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
258c9083b85SXin LI   where a mod b means the remainder after dividing a by b.
259c9083b85SXin LI 
260c9083b85SXin LI   This calculation is done using the shift-register method of multiplying and
261c9083b85SXin LI   taking the remainder. The register is initialized to zero, and for each
262c9083b85SXin LI   incoming bit, x^32 is added mod p to the register if the bit is a one (where
263cd882207SXin LI   x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
264cd882207SXin LI   (which is shifting right by one and adding x^32 mod p if the bit shifted out
265cd882207SXin LI   is a one). We start with the highest power (least significant bit) of q and
266cd882207SXin LI   repeat for all eight bits of q.
267c9083b85SXin LI 
268cd882207SXin LI   The table is simply the CRC of all possible eight bit values. This is all the
269cd882207SXin LI   information needed to generate CRCs on data a byte at a time for all
270cd882207SXin LI   combinations of CRC register values and incoming bytes.
271c9083b85SXin LI  */
272cd882207SXin LI 
273c9083b85SXin LI local void make_crc_table()
274c9083b85SXin LI {
275cd882207SXin LI     unsigned i, j, n;
276cd882207SXin LI     z_crc_t p;
277c9083b85SXin LI 
278cd882207SXin LI     /* initialize the CRC of bytes tables */
279cd882207SXin LI     for (i = 0; i < 256; i++) {
280cd882207SXin LI         p = i;
281cd882207SXin LI         for (j = 0; j < 8; j++)
282cd882207SXin LI             p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
283cd882207SXin LI         crc_table[i] = p;
284cd882207SXin LI #ifdef W
285cd882207SXin LI         crc_big_table[i] = byte_swap(p);
286cd882207SXin LI #endif
287c9083b85SXin LI     }
288c9083b85SXin LI 
289cd882207SXin LI     /* initialize the x^2^n mod p(x) table */
290cd882207SXin LI     p = (z_crc_t)1 << 30;         /* x^1 */
291cd882207SXin LI     x2n_table[0] = p;
292cd882207SXin LI     for (n = 1; n < 32; n++)
293cd882207SXin LI         x2n_table[n] = p = multmodp(p, p);
294c9083b85SXin LI 
295cd882207SXin LI #ifdef W
296cd882207SXin LI     /* initialize the braiding tables -- needs x2n_table[] */
297cd882207SXin LI     braid(crc_braid_table, crc_braid_big_table, N, W);
298cd882207SXin LI #endif
299c9083b85SXin LI 
300c9083b85SXin LI #ifdef MAKECRCH
301c9083b85SXin LI     {
302cd882207SXin LI         /*
303cd882207SXin LI           The crc32.h header file contains tables for both 32-bit and 64-bit
304cd882207SXin LI           z_word_t's, and so requires a 64-bit type be available. In that case,
305cd882207SXin LI           z_word_t must be defined to be 64-bits. This code then also generates
306cd882207SXin LI           and writes out the tables for the case that z_word_t is 32 bits.
307cd882207SXin LI          */
308cd882207SXin LI #if !defined(W) || W != 8
309cd882207SXin LI #  error Need a 64-bit integer type in order to generate crc32.h.
310cd882207SXin LI #endif
311c9083b85SXin LI         FILE *out;
312cd882207SXin LI         int k, n;
313cd882207SXin LI         z_crc_t ltl[8][256];
314cd882207SXin LI         z_word_t big[8][256];
315c9083b85SXin LI 
316c9083b85SXin LI         out = fopen("crc32.h", "w");
317c9083b85SXin LI         if (out == NULL) return;
318cd882207SXin LI 
319cd882207SXin LI         /* write out little-endian CRC table to crc32.h */
320cd882207SXin LI         fprintf(out,
321cd882207SXin LI             "/* crc32.h -- tables for rapid CRC calculation\n"
322cd882207SXin LI             " * Generated automatically by crc32.c\n */\n"
323cd882207SXin LI             "\n"
324cd882207SXin LI             "local const z_crc_t FAR crc_table[] = {\n"
325cd882207SXin LI             "    ");
326cd882207SXin LI         write_table(out, crc_table, 256);
327cd882207SXin LI         fprintf(out,
328cd882207SXin LI             "};\n");
329cd882207SXin LI 
330cd882207SXin LI         /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
331cd882207SXin LI         fprintf(out,
332cd882207SXin LI             "\n"
333cd882207SXin LI             "#ifdef W\n"
334cd882207SXin LI             "\n"
335cd882207SXin LI             "#if W == 8\n"
336cd882207SXin LI             "\n"
337cd882207SXin LI             "local const z_word_t FAR crc_big_table[] = {\n"
338cd882207SXin LI             "    ");
339cd882207SXin LI         write_table64(out, crc_big_table, 256);
340cd882207SXin LI         fprintf(out,
341cd882207SXin LI             "};\n");
342cd882207SXin LI 
343cd882207SXin LI         /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
344cd882207SXin LI         fprintf(out,
345cd882207SXin LI             "\n"
346cd882207SXin LI             "#else /* W == 4 */\n"
347cd882207SXin LI             "\n"
348cd882207SXin LI             "local const z_word_t FAR crc_big_table[] = {\n"
349cd882207SXin LI             "    ");
350cd882207SXin LI         write_table32hi(out, crc_big_table, 256);
351cd882207SXin LI         fprintf(out,
352cd882207SXin LI             "};\n"
353cd882207SXin LI             "\n"
354cd882207SXin LI             "#endif\n");
355cd882207SXin LI 
356cd882207SXin LI         /* write out braid tables for each value of N */
357cd882207SXin LI         for (n = 1; n <= 6; n++) {
358cd882207SXin LI             fprintf(out,
359cd882207SXin LI             "\n"
360cd882207SXin LI             "#if N == %d\n", n);
361cd882207SXin LI 
362cd882207SXin LI             /* compute braid tables for this N and 64-bit word_t */
363cd882207SXin LI             braid(ltl, big, n, 8);
364cd882207SXin LI 
365cd882207SXin LI             /* write out braid tables for 64-bit z_word_t to crc32.h */
366cd882207SXin LI             fprintf(out,
367cd882207SXin LI             "\n"
368cd882207SXin LI             "#if W == 8\n"
369cd882207SXin LI             "\n"
370cd882207SXin LI             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
371cd882207SXin LI             for (k = 0; k < 8; k++) {
372cd882207SXin LI                 fprintf(out, "   {");
373cd882207SXin LI                 write_table(out, ltl[k], 256);
374cd882207SXin LI                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
375c9083b85SXin LI             }
376cd882207SXin LI             fprintf(out,
377cd882207SXin LI             "};\n"
378cd882207SXin LI             "\n"
379cd882207SXin LI             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
380cd882207SXin LI             for (k = 0; k < 8; k++) {
381cd882207SXin LI                 fprintf(out, "   {");
382cd882207SXin LI                 write_table64(out, big[k], 256);
383cd882207SXin LI                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
384cd882207SXin LI             }
385cd882207SXin LI             fprintf(out,
386cd882207SXin LI             "};\n");
387cd882207SXin LI 
388cd882207SXin LI             /* compute braid tables for this N and 32-bit word_t */
389cd882207SXin LI             braid(ltl, big, n, 4);
390cd882207SXin LI 
391cd882207SXin LI             /* write out braid tables for 32-bit z_word_t to crc32.h */
392cd882207SXin LI             fprintf(out,
393cd882207SXin LI             "\n"
394cd882207SXin LI             "#else /* W == 4 */\n"
395cd882207SXin LI             "\n"
396cd882207SXin LI             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
397cd882207SXin LI             for (k = 0; k < 4; k++) {
398cd882207SXin LI                 fprintf(out, "   {");
399cd882207SXin LI                 write_table(out, ltl[k], 256);
400cd882207SXin LI                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
401cd882207SXin LI             }
402cd882207SXin LI             fprintf(out,
403cd882207SXin LI             "};\n"
404cd882207SXin LI             "\n"
405cd882207SXin LI             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
406cd882207SXin LI             for (k = 0; k < 4; k++) {
407cd882207SXin LI                 fprintf(out, "   {");
408cd882207SXin LI                 write_table32hi(out, big[k], 256);
409cd882207SXin LI                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
410cd882207SXin LI             }
411cd882207SXin LI             fprintf(out,
412cd882207SXin LI             "};\n"
413cd882207SXin LI             "\n"
414cd882207SXin LI             "#endif\n"
415cd882207SXin LI             "\n"
416cd882207SXin LI             "#endif\n");
417cd882207SXin LI         }
418cd882207SXin LI         fprintf(out,
419cd882207SXin LI             "\n"
420cd882207SXin LI             "#endif\n");
421cd882207SXin LI 
422cd882207SXin LI         /* write out zeros operator table to crc32.h */
423cd882207SXin LI         fprintf(out,
424cd882207SXin LI             "\n"
425cd882207SXin LI             "local const z_crc_t FAR x2n_table[] = {\n"
426cd882207SXin LI             "    ");
427cd882207SXin LI         write_table(out, x2n_table, 32);
428cd882207SXin LI         fprintf(out,
429cd882207SXin LI             "};\n");
430c9083b85SXin LI         fclose(out);
431c9083b85SXin LI     }
432c9083b85SXin LI #endif /* MAKECRCH */
433c9083b85SXin LI }
434c9083b85SXin LI 
435c9083b85SXin LI #ifdef MAKECRCH
436cd882207SXin LI 
437cd882207SXin LI /*
438cd882207SXin LI    Write the 32-bit values in table[0..k-1] to out, five per line in
439cd882207SXin LI    hexadecimal separated by commas.
440cd882207SXin LI  */
441cd882207SXin LI local void write_table(out, table, k)
442c9083b85SXin LI     FILE *out;
443c9083b85SXin LI     const z_crc_t FAR *table;
444cd882207SXin LI     int k;
445c9083b85SXin LI {
446c9083b85SXin LI     int n;
447c9083b85SXin LI 
448cd882207SXin LI     for (n = 0; n < k; n++)
449cd882207SXin LI         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
450c9083b85SXin LI                 (unsigned long)(table[n]),
451cd882207SXin LI                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
452c9083b85SXin LI }
453cd882207SXin LI 
454cd882207SXin LI /*
455cd882207SXin LI    Write the high 32-bits of each value in table[0..k-1] to out, five per line
456cd882207SXin LI    in hexadecimal separated by commas.
457cd882207SXin LI  */
458cd882207SXin LI local void write_table32hi(out, table, k)
459cd882207SXin LI FILE *out;
460cd882207SXin LI const z_word_t FAR *table;
461cd882207SXin LI int k;
462cd882207SXin LI {
463cd882207SXin LI     int n;
464cd882207SXin LI 
465cd882207SXin LI     for (n = 0; n < k; n++)
466cd882207SXin LI         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
467cd882207SXin LI                 (unsigned long)(table[n] >> 32),
468cd882207SXin LI                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
469cd882207SXin LI }
470cd882207SXin LI 
471cd882207SXin LI /*
472cd882207SXin LI   Write the 64-bit values in table[0..k-1] to out, three per line in
473cd882207SXin LI   hexadecimal separated by commas. This assumes that if there is a 64-bit
474cd882207SXin LI   type, then there is also a long long integer type, and it is at least 64
475cd882207SXin LI   bits. If not, then the type cast and format string can be adjusted
476cd882207SXin LI   accordingly.
477cd882207SXin LI  */
478cd882207SXin LI local void write_table64(out, table, k)
479cd882207SXin LI     FILE *out;
480cd882207SXin LI     const z_word_t FAR *table;
481cd882207SXin LI     int k;
482cd882207SXin LI {
483cd882207SXin LI     int n;
484cd882207SXin LI 
485cd882207SXin LI     for (n = 0; n < k; n++)
486cd882207SXin LI         fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
487cd882207SXin LI                 (unsigned long long)(table[n]),
488cd882207SXin LI                 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
489cd882207SXin LI }
490cd882207SXin LI 
491cd882207SXin LI /* Actually do the deed. */
492cd882207SXin LI int main()
493cd882207SXin LI {
494cd882207SXin LI     make_crc_table();
495cd882207SXin LI     return 0;
496cd882207SXin LI }
497cd882207SXin LI 
498c9083b85SXin LI #endif /* MAKECRCH */
499c9083b85SXin LI 
500cd882207SXin LI #ifdef W
501cd882207SXin LI /*
502cd882207SXin LI   Generate the little and big-endian braid tables for the given n and z_word_t
503cd882207SXin LI   size w. Each array must have room for w blocks of 256 elements.
504cd882207SXin LI  */
505cd882207SXin LI local void braid(ltl, big, n, w)
506cd882207SXin LI     z_crc_t ltl[][256];
507cd882207SXin LI     z_word_t big[][256];
508cd882207SXin LI     int n;
509cd882207SXin LI     int w;
510cd882207SXin LI {
511cd882207SXin LI     int k;
512cd882207SXin LI     z_crc_t i, p, q;
513cd882207SXin LI     for (k = 0; k < w; k++) {
514cd882207SXin LI         p = x2nmodp((n * w + 3 - k) << 3, 0);
515cd882207SXin LI         ltl[k][0] = 0;
516cd882207SXin LI         big[w - 1 - k][0] = 0;
517cd882207SXin LI         for (i = 1; i < 256; i++) {
518cd882207SXin LI             ltl[k][i] = q = multmodp(i << 24, p);
519cd882207SXin LI             big[w - 1 - k][i] = byte_swap(q);
520cd882207SXin LI         }
521cd882207SXin LI     }
522cd882207SXin LI }
523cd882207SXin LI #endif
524cd882207SXin LI 
525c9083b85SXin LI #else /* !DYNAMIC_CRC_TABLE */
526c9083b85SXin LI /* ========================================================================
527cd882207SXin LI  * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
528cd882207SXin LI  * of x for combining CRC-32s, all made by make_crc_table().
529c9083b85SXin LI  */
530c9083b85SXin LI #include "crc32.h"
531c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */
532c9083b85SXin LI 
533cd882207SXin LI /* ========================================================================
534cd882207SXin LI  * Routines used for CRC calculation. Some are also required for the table
535cd882207SXin LI  * generation above.
536cd882207SXin LI  */
537cd882207SXin LI 
538cd882207SXin LI /*
539cd882207SXin LI   Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
540cd882207SXin LI   reflected. For speed, this requires that a not be zero.
541cd882207SXin LI  */
542cd882207SXin LI local z_crc_t multmodp(a, b)
543cd882207SXin LI     z_crc_t a;
544cd882207SXin LI     z_crc_t b;
545cd882207SXin LI {
546cd882207SXin LI     z_crc_t m, p;
547cd882207SXin LI 
548cd882207SXin LI     m = (z_crc_t)1 << 31;
549cd882207SXin LI     p = 0;
550cd882207SXin LI     for (;;) {
551cd882207SXin LI         if (a & m) {
552cd882207SXin LI             p ^= b;
553cd882207SXin LI             if ((a & (m - 1)) == 0)
554cd882207SXin LI                 break;
555cd882207SXin LI         }
556cd882207SXin LI         m >>= 1;
557cd882207SXin LI         b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
558cd882207SXin LI     }
559cd882207SXin LI     return p;
560cd882207SXin LI }
561cd882207SXin LI 
562cd882207SXin LI /*
563cd882207SXin LI   Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
564cd882207SXin LI   initialized.
565cd882207SXin LI  */
566cd882207SXin LI local z_crc_t x2nmodp(n, k)
567cd882207SXin LI     z_off64_t n;
568cd882207SXin LI     unsigned k;
569cd882207SXin LI {
570cd882207SXin LI     z_crc_t p;
571cd882207SXin LI 
572cd882207SXin LI     p = (z_crc_t)1 << 31;           /* x^0 == 1 */
573cd882207SXin LI     while (n) {
574cd882207SXin LI         if (n & 1)
575cd882207SXin LI             p = multmodp(x2n_table[k & 31], p);
576cd882207SXin LI         n >>= 1;
577cd882207SXin LI         k++;
578cd882207SXin LI     }
579cd882207SXin LI     return p;
580cd882207SXin LI }
581cd882207SXin LI 
582c9083b85SXin LI /* =========================================================================
583cd882207SXin LI  * This function can be used by asm versions of crc32(), and to force the
584cd882207SXin LI  * generation of the CRC tables in a threaded application.
585c9083b85SXin LI  */
586c9083b85SXin LI const z_crc_t FAR * ZEXPORT get_crc_table()
587c9083b85SXin LI {
588c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE
589cd882207SXin LI     once(&made, make_crc_table);
590c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */
591c9083b85SXin LI     return (const z_crc_t FAR *)crc_table;
592c9083b85SXin LI }
593c9083b85SXin LI 
594cd882207SXin LI /* =========================================================================
595cd882207SXin LI  * Use ARM machine instructions if available. This will compute the CRC about
596cd882207SXin LI  * ten times faster than the braided calculation. This code does not check for
597cd882207SXin LI  * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
598cd882207SXin LI  * only be defined if the compilation specifies an ARM processor architecture
599cd882207SXin LI  * that has the instructions. For example, compiling with -march=armv8.1-a or
600cd882207SXin LI  * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
601cd882207SXin LI  * instructions.
602cd882207SXin LI  */
603cd882207SXin LI #ifdef ARMCRC32
604cd882207SXin LI 
605cd882207SXin LI /*
606cd882207SXin LI    Constants empirically determined to maximize speed. These values are from
607cd882207SXin LI    measurements on a Cortex-A57. Your mileage may vary.
608cd882207SXin LI  */
609cd882207SXin LI #define Z_BATCH 3990                /* number of words in a batch */
610cd882207SXin LI #define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
611cd882207SXin LI #define Z_BATCH_MIN 800             /* fewest words in a final batch */
612cd882207SXin LI 
613cd882207SXin LI unsigned long ZEXPORT crc32_z(crc, buf, len)
614cd882207SXin LI     unsigned long crc;
615cd882207SXin LI     const unsigned char FAR *buf;
616cd882207SXin LI     z_size_t len;
617cd882207SXin LI {
618cd882207SXin LI     z_crc_t val;
619cd882207SXin LI     z_word_t crc1, crc2;
620cd882207SXin LI     const z_word_t *word;
621cd882207SXin LI     z_word_t val0, val1, val2;
622cd882207SXin LI     z_size_t last, last2, i;
623cd882207SXin LI     z_size_t num;
624cd882207SXin LI 
625cd882207SXin LI     /* Return initial CRC, if requested. */
626cd882207SXin LI     if (buf == Z_NULL) return 0;
627cd882207SXin LI 
628cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE
629cd882207SXin LI     once(&made, make_crc_table);
630cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */
631cd882207SXin LI 
632cd882207SXin LI     /* Pre-condition the CRC */
633*c61bc111SXin LI     crc = (~crc) & 0xffffffff;
634cd882207SXin LI 
635cd882207SXin LI     /* Compute the CRC up to a word boundary. */
636cd882207SXin LI     while (len && ((z_size_t)buf & 7) != 0) {
637cd882207SXin LI         len--;
638cd882207SXin LI         val = *buf++;
639cd882207SXin LI         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
640cd882207SXin LI     }
641cd882207SXin LI 
642cd882207SXin LI     /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
643cd882207SXin LI     word = (z_word_t const *)buf;
644cd882207SXin LI     num = len >> 3;
645cd882207SXin LI     len &= 7;
646cd882207SXin LI 
647cd882207SXin LI     /* Do three interleaved CRCs to realize the throughput of one crc32x
648cd882207SXin LI        instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three
649cd882207SXin LI        CRCs are combined into a single CRC after each set of batches. */
650cd882207SXin LI     while (num >= 3 * Z_BATCH) {
651cd882207SXin LI         crc1 = 0;
652cd882207SXin LI         crc2 = 0;
653cd882207SXin LI         for (i = 0; i < Z_BATCH; i++) {
654cd882207SXin LI             val0 = word[i];
655cd882207SXin LI             val1 = word[i + Z_BATCH];
656cd882207SXin LI             val2 = word[i + 2 * Z_BATCH];
657cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
658cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
659cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
660cd882207SXin LI         }
661cd882207SXin LI         word += 3 * Z_BATCH;
662cd882207SXin LI         num -= 3 * Z_BATCH;
663cd882207SXin LI         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
664cd882207SXin LI         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
665cd882207SXin LI     }
666cd882207SXin LI 
667cd882207SXin LI     /* Do one last smaller batch with the remaining words, if there are enough
668cd882207SXin LI        to pay for the combination of CRCs. */
669cd882207SXin LI     last = num / 3;
670cd882207SXin LI     if (last >= Z_BATCH_MIN) {
671cd882207SXin LI         last2 = last << 1;
672cd882207SXin LI         crc1 = 0;
673cd882207SXin LI         crc2 = 0;
674cd882207SXin LI         for (i = 0; i < last; i++) {
675cd882207SXin LI             val0 = word[i];
676cd882207SXin LI             val1 = word[i + last];
677cd882207SXin LI             val2 = word[i + last2];
678cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
679cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
680cd882207SXin LI             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
681cd882207SXin LI         }
682cd882207SXin LI         word += 3 * last;
683cd882207SXin LI         num -= 3 * last;
684cd882207SXin LI         val = x2nmodp(last, 6);
685cd882207SXin LI         crc = multmodp(val, crc) ^ crc1;
686cd882207SXin LI         crc = multmodp(val, crc) ^ crc2;
687cd882207SXin LI     }
688cd882207SXin LI 
689cd882207SXin LI     /* Compute the CRC on any remaining words. */
690cd882207SXin LI     for (i = 0; i < num; i++) {
691cd882207SXin LI         val0 = word[i];
692cd882207SXin LI         __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
693cd882207SXin LI     }
694cd882207SXin LI     word += num;
695cd882207SXin LI 
696cd882207SXin LI     /* Complete the CRC on any remaining bytes. */
697cd882207SXin LI     buf = (const unsigned char FAR *)word;
698cd882207SXin LI     while (len) {
699cd882207SXin LI         len--;
700cd882207SXin LI         val = *buf++;
701cd882207SXin LI         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
702cd882207SXin LI     }
703cd882207SXin LI 
704cd882207SXin LI     /* Return the CRC, post-conditioned. */
705cd882207SXin LI     return crc ^ 0xffffffff;
706cd882207SXin LI }
707cd882207SXin LI 
708cd882207SXin LI #else
709cd882207SXin LI 
710cd882207SXin LI #ifdef W
711cd882207SXin LI 
712cd882207SXin LI /*
713cd882207SXin LI   Return the CRC of the W bytes in the word_t data, taking the
714cd882207SXin LI   least-significant byte of the word as the first byte of data, without any pre
715cd882207SXin LI   or post conditioning. This is used to combine the CRCs of each braid.
716cd882207SXin LI  */
717cd882207SXin LI local z_crc_t crc_word(data)
718cd882207SXin LI     z_word_t data;
719cd882207SXin LI {
720cd882207SXin LI     int k;
721cd882207SXin LI     for (k = 0; k < W; k++)
722cd882207SXin LI         data = (data >> 8) ^ crc_table[data & 0xff];
723cd882207SXin LI     return (z_crc_t)data;
724cd882207SXin LI }
725cd882207SXin LI 
726cd882207SXin LI local z_word_t crc_word_big(data)
727cd882207SXin LI     z_word_t data;
728cd882207SXin LI {
729cd882207SXin LI     int k;
730cd882207SXin LI     for (k = 0; k < W; k++)
731cd882207SXin LI         data = (data << 8) ^
732cd882207SXin LI             crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
733cd882207SXin LI     return data;
734cd882207SXin LI }
735cd882207SXin LI 
736cd882207SXin LI #endif
737c9083b85SXin LI 
738c9083b85SXin LI /* ========================================================================= */
739c9083b85SXin LI unsigned long ZEXPORT crc32_z(crc, buf, len)
740c9083b85SXin LI     unsigned long crc;
741c9083b85SXin LI     const unsigned char FAR *buf;
742c9083b85SXin LI     z_size_t len;
743c9083b85SXin LI {
744cd882207SXin LI     /* Return initial CRC, if requested. */
745cd882207SXin LI     if (buf == Z_NULL) return 0;
746c9083b85SXin LI 
747c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE
748cd882207SXin LI     once(&made, make_crc_table);
749c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */
750c9083b85SXin LI 
751cd882207SXin LI     /* Pre-condition the CRC */
752*c61bc111SXin LI     crc = (~crc) & 0xffffffff;
753c9083b85SXin LI 
754cd882207SXin LI #ifdef W
755cd882207SXin LI 
756cd882207SXin LI     /* If provided enough bytes, do a braided CRC calculation. */
757cd882207SXin LI     if (len >= N * W + W - 1) {
758cd882207SXin LI         z_size_t blks;
759cd882207SXin LI         z_word_t const *words;
760cd882207SXin LI         unsigned endian;
761cd882207SXin LI         int k;
762cd882207SXin LI 
763cd882207SXin LI         /* Compute the CRC up to a z_word_t boundary. */
764cd882207SXin LI         while (len && ((z_size_t)buf & (W - 1)) != 0) {
765cd882207SXin LI             len--;
766cd882207SXin LI             crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
767cd882207SXin LI         }
768cd882207SXin LI 
769cd882207SXin LI         /* Compute the CRC on as many N z_word_t blocks as are available. */
770cd882207SXin LI         blks = len / (N * W);
771cd882207SXin LI         len -= blks * N * W;
772cd882207SXin LI         words = (z_word_t const *)buf;
773cd882207SXin LI 
774cd882207SXin LI         /* Do endian check at execution time instead of compile time, since ARM
775cd882207SXin LI            processors can change the endianess at execution time. If the
776cd882207SXin LI            compiler knows what the endianess will be, it can optimize out the
777cd882207SXin LI            check and the unused branch. */
778c9083b85SXin LI         endian = 1;
779cd882207SXin LI         if (*(unsigned char *)&endian) {
780cd882207SXin LI             /* Little endian. */
781cd882207SXin LI 
782cd882207SXin LI             z_crc_t crc0;
783cd882207SXin LI             z_word_t word0;
784cd882207SXin LI #if N > 1
785cd882207SXin LI             z_crc_t crc1;
786cd882207SXin LI             z_word_t word1;
787cd882207SXin LI #if N > 2
788cd882207SXin LI             z_crc_t crc2;
789cd882207SXin LI             z_word_t word2;
790cd882207SXin LI #if N > 3
791cd882207SXin LI             z_crc_t crc3;
792cd882207SXin LI             z_word_t word3;
793cd882207SXin LI #if N > 4
794cd882207SXin LI             z_crc_t crc4;
795cd882207SXin LI             z_word_t word4;
796cd882207SXin LI #if N > 5
797cd882207SXin LI             z_crc_t crc5;
798cd882207SXin LI             z_word_t word5;
799cd882207SXin LI #endif
800cd882207SXin LI #endif
801cd882207SXin LI #endif
802cd882207SXin LI #endif
803cd882207SXin LI #endif
804cd882207SXin LI 
805cd882207SXin LI             /* Initialize the CRC for each braid. */
806cd882207SXin LI             crc0 = crc;
807cd882207SXin LI #if N > 1
808cd882207SXin LI             crc1 = 0;
809cd882207SXin LI #if N > 2
810cd882207SXin LI             crc2 = 0;
811cd882207SXin LI #if N > 3
812cd882207SXin LI             crc3 = 0;
813cd882207SXin LI #if N > 4
814cd882207SXin LI             crc4 = 0;
815cd882207SXin LI #if N > 5
816cd882207SXin LI             crc5 = 0;
817cd882207SXin LI #endif
818cd882207SXin LI #endif
819cd882207SXin LI #endif
820cd882207SXin LI #endif
821cd882207SXin LI #endif
822cd882207SXin LI 
823cd882207SXin LI             /*
824cd882207SXin LI               Process the first blks-1 blocks, computing the CRCs on each braid
825cd882207SXin LI               independently.
826cd882207SXin LI              */
827cd882207SXin LI             while (--blks) {
828cd882207SXin LI                 /* Load the word for each braid into registers. */
829cd882207SXin LI                 word0 = crc0 ^ words[0];
830cd882207SXin LI #if N > 1
831cd882207SXin LI                 word1 = crc1 ^ words[1];
832cd882207SXin LI #if N > 2
833cd882207SXin LI                 word2 = crc2 ^ words[2];
834cd882207SXin LI #if N > 3
835cd882207SXin LI                 word3 = crc3 ^ words[3];
836cd882207SXin LI #if N > 4
837cd882207SXin LI                 word4 = crc4 ^ words[4];
838cd882207SXin LI #if N > 5
839cd882207SXin LI                 word5 = crc5 ^ words[5];
840cd882207SXin LI #endif
841cd882207SXin LI #endif
842cd882207SXin LI #endif
843cd882207SXin LI #endif
844cd882207SXin LI #endif
845cd882207SXin LI                 words += N;
846cd882207SXin LI 
847cd882207SXin LI                 /* Compute and update the CRC for each word. The loop should
848cd882207SXin LI                    get unrolled. */
849cd882207SXin LI                 crc0 = crc_braid_table[0][word0 & 0xff];
850cd882207SXin LI #if N > 1
851cd882207SXin LI                 crc1 = crc_braid_table[0][word1 & 0xff];
852cd882207SXin LI #if N > 2
853cd882207SXin LI                 crc2 = crc_braid_table[0][word2 & 0xff];
854cd882207SXin LI #if N > 3
855cd882207SXin LI                 crc3 = crc_braid_table[0][word3 & 0xff];
856cd882207SXin LI #if N > 4
857cd882207SXin LI                 crc4 = crc_braid_table[0][word4 & 0xff];
858cd882207SXin LI #if N > 5
859cd882207SXin LI                 crc5 = crc_braid_table[0][word5 & 0xff];
860cd882207SXin LI #endif
861cd882207SXin LI #endif
862cd882207SXin LI #endif
863cd882207SXin LI #endif
864cd882207SXin LI #endif
865cd882207SXin LI                 for (k = 1; k < W; k++) {
866cd882207SXin LI                     crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
867cd882207SXin LI #if N > 1
868cd882207SXin LI                     crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
869cd882207SXin LI #if N > 2
870cd882207SXin LI                     crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
871cd882207SXin LI #if N > 3
872cd882207SXin LI                     crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
873cd882207SXin LI #if N > 4
874cd882207SXin LI                     crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
875cd882207SXin LI #if N > 5
876cd882207SXin LI                     crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
877cd882207SXin LI #endif
878cd882207SXin LI #endif
879cd882207SXin LI #endif
880cd882207SXin LI #endif
881cd882207SXin LI #endif
882c9083b85SXin LI                 }
883cd882207SXin LI             }
884cd882207SXin LI 
885cd882207SXin LI             /*
886cd882207SXin LI               Process the last block, combining the CRCs of the N braids at the
887cd882207SXin LI               same time.
888cd882207SXin LI              */
889cd882207SXin LI             crc = crc_word(crc0 ^ words[0]);
890cd882207SXin LI #if N > 1
891cd882207SXin LI             crc = crc_word(crc1 ^ words[1] ^ crc);
892cd882207SXin LI #if N > 2
893cd882207SXin LI             crc = crc_word(crc2 ^ words[2] ^ crc);
894cd882207SXin LI #if N > 3
895cd882207SXin LI             crc = crc_word(crc3 ^ words[3] ^ crc);
896cd882207SXin LI #if N > 4
897cd882207SXin LI             crc = crc_word(crc4 ^ words[4] ^ crc);
898cd882207SXin LI #if N > 5
899cd882207SXin LI             crc = crc_word(crc5 ^ words[5] ^ crc);
900cd882207SXin LI #endif
901cd882207SXin LI #endif
902cd882207SXin LI #endif
903cd882207SXin LI #endif
904cd882207SXin LI #endif
905cd882207SXin LI             words += N;
906cd882207SXin LI         }
907cd882207SXin LI         else {
908cd882207SXin LI             /* Big endian. */
909cd882207SXin LI 
910cd882207SXin LI             z_word_t crc0, word0, comb;
911cd882207SXin LI #if N > 1
912cd882207SXin LI             z_word_t crc1, word1;
913cd882207SXin LI #if N > 2
914cd882207SXin LI             z_word_t crc2, word2;
915cd882207SXin LI #if N > 3
916cd882207SXin LI             z_word_t crc3, word3;
917cd882207SXin LI #if N > 4
918cd882207SXin LI             z_word_t crc4, word4;
919cd882207SXin LI #if N > 5
920cd882207SXin LI             z_word_t crc5, word5;
921cd882207SXin LI #endif
922cd882207SXin LI #endif
923cd882207SXin LI #endif
924cd882207SXin LI #endif
925cd882207SXin LI #endif
926cd882207SXin LI 
927cd882207SXin LI             /* Initialize the CRC for each braid. */
928cd882207SXin LI             crc0 = byte_swap(crc);
929cd882207SXin LI #if N > 1
930cd882207SXin LI             crc1 = 0;
931cd882207SXin LI #if N > 2
932cd882207SXin LI             crc2 = 0;
933cd882207SXin LI #if N > 3
934cd882207SXin LI             crc3 = 0;
935cd882207SXin LI #if N > 4
936cd882207SXin LI             crc4 = 0;
937cd882207SXin LI #if N > 5
938cd882207SXin LI             crc5 = 0;
939cd882207SXin LI #endif
940cd882207SXin LI #endif
941cd882207SXin LI #endif
942cd882207SXin LI #endif
943cd882207SXin LI #endif
944cd882207SXin LI 
945cd882207SXin LI             /*
946cd882207SXin LI               Process the first blks-1 blocks, computing the CRCs on each braid
947cd882207SXin LI               independently.
948cd882207SXin LI              */
949cd882207SXin LI             while (--blks) {
950cd882207SXin LI                 /* Load the word for each braid into registers. */
951cd882207SXin LI                 word0 = crc0 ^ words[0];
952cd882207SXin LI #if N > 1
953cd882207SXin LI                 word1 = crc1 ^ words[1];
954cd882207SXin LI #if N > 2
955cd882207SXin LI                 word2 = crc2 ^ words[2];
956cd882207SXin LI #if N > 3
957cd882207SXin LI                 word3 = crc3 ^ words[3];
958cd882207SXin LI #if N > 4
959cd882207SXin LI                 word4 = crc4 ^ words[4];
960cd882207SXin LI #if N > 5
961cd882207SXin LI                 word5 = crc5 ^ words[5];
962cd882207SXin LI #endif
963cd882207SXin LI #endif
964cd882207SXin LI #endif
965cd882207SXin LI #endif
966cd882207SXin LI #endif
967cd882207SXin LI                 words += N;
968cd882207SXin LI 
969cd882207SXin LI                 /* Compute and update the CRC for each word. The loop should
970cd882207SXin LI                    get unrolled. */
971cd882207SXin LI                 crc0 = crc_braid_big_table[0][word0 & 0xff];
972cd882207SXin LI #if N > 1
973cd882207SXin LI                 crc1 = crc_braid_big_table[0][word1 & 0xff];
974cd882207SXin LI #if N > 2
975cd882207SXin LI                 crc2 = crc_braid_big_table[0][word2 & 0xff];
976cd882207SXin LI #if N > 3
977cd882207SXin LI                 crc3 = crc_braid_big_table[0][word3 & 0xff];
978cd882207SXin LI #if N > 4
979cd882207SXin LI                 crc4 = crc_braid_big_table[0][word4 & 0xff];
980cd882207SXin LI #if N > 5
981cd882207SXin LI                 crc5 = crc_braid_big_table[0][word5 & 0xff];
982cd882207SXin LI #endif
983cd882207SXin LI #endif
984cd882207SXin LI #endif
985cd882207SXin LI #endif
986cd882207SXin LI #endif
987cd882207SXin LI                 for (k = 1; k < W; k++) {
988cd882207SXin LI                     crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
989cd882207SXin LI #if N > 1
990cd882207SXin LI                     crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
991cd882207SXin LI #if N > 2
992cd882207SXin LI                     crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
993cd882207SXin LI #if N > 3
994cd882207SXin LI                     crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
995cd882207SXin LI #if N > 4
996cd882207SXin LI                     crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
997cd882207SXin LI #if N > 5
998cd882207SXin LI                     crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
999cd882207SXin LI #endif
1000cd882207SXin LI #endif
1001cd882207SXin LI #endif
1002cd882207SXin LI #endif
1003cd882207SXin LI #endif
1004cd882207SXin LI                 }
1005cd882207SXin LI             }
1006cd882207SXin LI 
1007cd882207SXin LI             /*
1008cd882207SXin LI               Process the last block, combining the CRCs of the N braids at the
1009cd882207SXin LI               same time.
1010cd882207SXin LI              */
1011cd882207SXin LI             comb = crc_word_big(crc0 ^ words[0]);
1012cd882207SXin LI #if N > 1
1013cd882207SXin LI             comb = crc_word_big(crc1 ^ words[1] ^ comb);
1014cd882207SXin LI #if N > 2
1015cd882207SXin LI             comb = crc_word_big(crc2 ^ words[2] ^ comb);
1016cd882207SXin LI #if N > 3
1017cd882207SXin LI             comb = crc_word_big(crc3 ^ words[3] ^ comb);
1018cd882207SXin LI #if N > 4
1019cd882207SXin LI             comb = crc_word_big(crc4 ^ words[4] ^ comb);
1020cd882207SXin LI #if N > 5
1021cd882207SXin LI             comb = crc_word_big(crc5 ^ words[5] ^ comb);
1022cd882207SXin LI #endif
1023cd882207SXin LI #endif
1024cd882207SXin LI #endif
1025cd882207SXin LI #endif
1026cd882207SXin LI #endif
1027cd882207SXin LI             words += N;
1028cd882207SXin LI             crc = byte_swap(comb);
1029cd882207SXin LI         }
1030cd882207SXin LI 
1031cd882207SXin LI         /*
1032cd882207SXin LI           Update the pointer to the remaining bytes to process.
1033cd882207SXin LI          */
1034cd882207SXin LI         buf = (unsigned char const *)words;
1035cd882207SXin LI     }
1036cd882207SXin LI 
1037cd882207SXin LI #endif /* W */
1038cd882207SXin LI 
1039cd882207SXin LI     /* Complete the computation of the CRC on any remaining bytes. */
1040c9083b85SXin LI     while (len >= 8) {
1041c9083b85SXin LI         len -= 8;
1042cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1043cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1044cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1045cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1046cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1047cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1048cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1049cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1050c9083b85SXin LI     }
1051cd882207SXin LI     while (len) {
1052cd882207SXin LI         len--;
1053cd882207SXin LI         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1054c9083b85SXin LI     }
1055c9083b85SXin LI 
1056cd882207SXin LI     /* Return the CRC, post-conditioned. */
1057cd882207SXin LI     return crc ^ 0xffffffff;
1058cd882207SXin LI }
1059cd882207SXin LI 
1060cd882207SXin LI #endif
1061cd882207SXin LI 
1062c9083b85SXin LI /* ========================================================================= */
1063c9083b85SXin LI unsigned long ZEXPORT crc32(crc, buf, len)
1064c9083b85SXin LI     unsigned long crc;
1065c9083b85SXin LI     const unsigned char FAR *buf;
1066c9083b85SXin LI     uInt len;
1067c9083b85SXin LI {
1068c9083b85SXin LI     return crc32_z(crc, buf, len);
1069c9083b85SXin LI }
1070c9083b85SXin LI 
1071c9083b85SXin LI /* ========================================================================= */
1072cd882207SXin LI uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
1073c9083b85SXin LI     uLong crc1;
1074c9083b85SXin LI     uLong crc2;
1075c9083b85SXin LI     z_off64_t len2;
1076c9083b85SXin LI {
1077cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE
1078cd882207SXin LI     once(&made, make_crc_table);
1079cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */
1080*c61bc111SXin LI     return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1081c9083b85SXin LI }
1082c9083b85SXin LI 
1083c9083b85SXin LI /* ========================================================================= */
1084c9083b85SXin LI uLong ZEXPORT crc32_combine(crc1, crc2, len2)
1085c9083b85SXin LI     uLong crc1;
1086c9083b85SXin LI     uLong crc2;
1087c9083b85SXin LI     z_off_t len2;
1088c9083b85SXin LI {
1089cd882207SXin LI     return crc32_combine64(crc1, crc2, len2);
1090c9083b85SXin LI }
1091c9083b85SXin LI 
1092cd882207SXin LI /* ========================================================================= */
1093cd882207SXin LI uLong ZEXPORT crc32_combine_gen64(len2)
1094c9083b85SXin LI     z_off64_t len2;
1095c9083b85SXin LI {
1096cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE
1097cd882207SXin LI     once(&made, make_crc_table);
1098cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */
1099cd882207SXin LI     return x2nmodp(len2, 3);
1100cd882207SXin LI }
1101cd882207SXin LI 
1102cd882207SXin LI /* ========================================================================= */
1103cd882207SXin LI uLong ZEXPORT crc32_combine_gen(len2)
1104cd882207SXin LI     z_off_t len2;
1105cd882207SXin LI {
1106cd882207SXin LI     return crc32_combine_gen64(len2);
1107cd882207SXin LI }
1108cd882207SXin LI 
1109cd882207SXin LI /* ========================================================================= */
1110cd882207SXin LI uLong crc32_combine_op(crc1, crc2, op)
1111cd882207SXin LI     uLong crc1;
1112cd882207SXin LI     uLong crc2;
1113cd882207SXin LI     uLong op;
1114cd882207SXin LI {
1115*c61bc111SXin LI     return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
1116c9083b85SXin LI }
1117