1c9083b85SXin LI /* crc32.c -- compute the CRC-32 of a data stream
2cd882207SXin LI * Copyright (C) 1995-2022 Mark Adler
3c9083b85SXin LI * For conditions of distribution and use, see copyright notice in zlib.h
4c9083b85SXin LI *
5cd882207SXin LI * This interleaved implementation of a CRC makes use of pipelined multiple
6cd882207SXin LI * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7cd882207SXin LI * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8c9083b85SXin LI */
9c9083b85SXin LI
10c9083b85SXin LI /* @(#) $Id$ */
11c9083b85SXin LI
12c9083b85SXin LI /*
13c9083b85SXin LI Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14c9083b85SXin LI protection on the static variables used to control the first-use generation
15c9083b85SXin LI of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16c9083b85SXin LI first call get_crc_table() to initialize the tables before allowing more than
17c9083b85SXin LI one thread to use crc32().
18c9083b85SXin LI
19cd882207SXin LI MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20cd882207SXin LI produced, so that this one source file can be compiled to an executable.
21c9083b85SXin LI */
22c9083b85SXin LI
23c9083b85SXin LI #ifdef MAKECRCH
24c9083b85SXin LI # include <stdio.h>
25c9083b85SXin LI # ifndef DYNAMIC_CRC_TABLE
26c9083b85SXin LI # define DYNAMIC_CRC_TABLE
27c9083b85SXin LI # endif /* !DYNAMIC_CRC_TABLE */
28c9083b85SXin LI #endif /* MAKECRCH */
29c9083b85SXin LI
30cd882207SXin LI #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
31c9083b85SXin LI
32cd882207SXin LI /*
33cd882207SXin LI A CRC of a message is computed on N braids of words in the message, where
34cd882207SXin LI each word consists of W bytes (4 or 8). If N is 3, for example, then three
35cd882207SXin LI running sparse CRCs are calculated respectively on each braid, at these
36cd882207SXin LI indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37cd882207SXin LI This is done starting at a word boundary, and continues until as many blocks
38cd882207SXin LI of N * W bytes as are available have been processed. The results are combined
39cd882207SXin LI into a single CRC at the end. For this code, N must be in the range 1..6 and
40cd882207SXin LI W must be 4 or 8. The upper limit on N can be increased if desired by adding
41cd882207SXin LI more #if blocks, extending the patterns apparent in the code. In addition,
42cd882207SXin LI crc32.h would need to be regenerated, if the maximum N value is increased.
43cd882207SXin LI
44cd882207SXin LI N and W are chosen empirically by benchmarking the execution time on a given
45cd882207SXin LI processor. The choices for N and W below were based on testing on Intel Kaby
46cd882207SXin LI Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47cd882207SXin LI Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48cd882207SXin LI with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49cd882207SXin LI They were all tested with either gcc or clang, all using the -O3 optimization
50cd882207SXin LI level. Your mileage may vary.
51cd882207SXin LI */
52cd882207SXin LI
53cd882207SXin LI /* Define N */
54cd882207SXin LI #ifdef Z_TESTN
55cd882207SXin LI # define N Z_TESTN
56c9083b85SXin LI #else
57cd882207SXin LI # define N 5
58cd882207SXin LI #endif
59cd882207SXin LI #if N < 1 || N > 6
60cd882207SXin LI # error N must be in 1..6
61cd882207SXin LI #endif
62c9083b85SXin LI
63cd882207SXin LI /*
64cd882207SXin LI z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65cd882207SXin LI z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66cd882207SXin LI that bytes are eight bits.
67cd882207SXin LI */
68c9083b85SXin LI
69cd882207SXin LI /*
70cd882207SXin LI Define W and the associated z_word_t type. If W is not defined, then a
71cd882207SXin LI braided calculation is not used, and the associated tables and code are not
72cd882207SXin LI compiled.
73cd882207SXin LI */
74cd882207SXin LI #ifdef Z_TESTW
75cd882207SXin LI # if Z_TESTW-1 != -1
76cd882207SXin LI # define W Z_TESTW
77cd882207SXin LI # endif
78cd882207SXin LI #else
79cd882207SXin LI # ifdef MAKECRCH
80cd882207SXin LI # define W 8 /* required for MAKECRCH */
81cd882207SXin LI # else
82cd882207SXin LI # if defined(__x86_64__) || defined(__aarch64__)
83cd882207SXin LI # define W 8
84cd882207SXin LI # else
85cd882207SXin LI # define W 4
86cd882207SXin LI # endif
87cd882207SXin LI # endif
88cd882207SXin LI #endif
89cd882207SXin LI #ifdef W
90cd882207SXin LI # if W == 8 && defined(Z_U8)
91cd882207SXin LI typedef Z_U8 z_word_t;
92cd882207SXin LI # elif defined(Z_U4)
93cd882207SXin LI # undef W
94cd882207SXin LI # define W 4
95cd882207SXin LI typedef Z_U4 z_word_t;
96cd882207SXin LI # else
97cd882207SXin LI # undef W
98cd882207SXin LI # endif
99cd882207SXin LI #endif
100cd882207SXin LI
101e37bb444SXin LI /* If available, use the ARM processor CRC32 instruction. */
102e37bb444SXin LI #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
103e37bb444SXin LI # define ARMCRC32
104e37bb444SXin LI #endif
105e37bb444SXin LI
106cd882207SXin LI #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
107cd882207SXin LI /*
108cd882207SXin LI Swap the bytes in a z_word_t to convert between little and big endian. Any
109cd882207SXin LI self-respecting compiler will optimize this to a single machine byte-swap
110cd882207SXin LI instruction, if one is available. This assumes that word_t is either 32 bits
111cd882207SXin LI or 64 bits.
112cd882207SXin LI */
byte_swap(z_word_t word)113*4717628eSXin LI local z_word_t byte_swap(z_word_t word) {
114cd882207SXin LI # if W == 8
115cd882207SXin LI return
116cd882207SXin LI (word & 0xff00000000000000) >> 56 |
117cd882207SXin LI (word & 0xff000000000000) >> 40 |
118cd882207SXin LI (word & 0xff0000000000) >> 24 |
119cd882207SXin LI (word & 0xff00000000) >> 8 |
120cd882207SXin LI (word & 0xff000000) << 8 |
121cd882207SXin LI (word & 0xff0000) << 24 |
122cd882207SXin LI (word & 0xff00) << 40 |
123cd882207SXin LI (word & 0xff) << 56;
124cd882207SXin LI # else /* W == 4 */
125cd882207SXin LI return
126cd882207SXin LI (word & 0xff000000) >> 24 |
127cd882207SXin LI (word & 0xff0000) >> 8 |
128cd882207SXin LI (word & 0xff00) << 8 |
129cd882207SXin LI (word & 0xff) << 24;
130cd882207SXin LI # endif
131cd882207SXin LI }
132cd882207SXin LI #endif
133cd882207SXin LI
134*4717628eSXin LI #ifdef DYNAMIC_CRC_TABLE
135*4717628eSXin LI /* =========================================================================
136*4717628eSXin LI * Table of powers of x for combining CRC-32s, filled in by make_crc_table()
137*4717628eSXin LI * below.
138*4717628eSXin LI */
139*4717628eSXin LI local z_crc_t FAR x2n_table[32];
140*4717628eSXin LI #else
141*4717628eSXin LI /* =========================================================================
142*4717628eSXin LI * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
143*4717628eSXin LI * of x for combining CRC-32s, all made by make_crc_table().
144*4717628eSXin LI */
145*4717628eSXin LI # include "crc32.h"
146*4717628eSXin LI #endif
147*4717628eSXin LI
148cd882207SXin LI /* CRC polynomial. */
149cd882207SXin LI #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */
150c9083b85SXin LI
151*4717628eSXin LI /*
152*4717628eSXin LI Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
153*4717628eSXin LI reflected. For speed, this requires that a not be zero.
154*4717628eSXin LI */
multmodp(z_crc_t a,z_crc_t b)155*4717628eSXin LI local z_crc_t multmodp(z_crc_t a, z_crc_t b) {
156*4717628eSXin LI z_crc_t m, p;
157c9083b85SXin LI
158*4717628eSXin LI m = (z_crc_t)1 << 31;
159*4717628eSXin LI p = 0;
160*4717628eSXin LI for (;;) {
161*4717628eSXin LI if (a & m) {
162*4717628eSXin LI p ^= b;
163*4717628eSXin LI if ((a & (m - 1)) == 0)
164*4717628eSXin LI break;
165*4717628eSXin LI }
166*4717628eSXin LI m >>= 1;
167*4717628eSXin LI b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
168*4717628eSXin LI }
169*4717628eSXin LI return p;
170*4717628eSXin LI }
171*4717628eSXin LI
172*4717628eSXin LI /*
173*4717628eSXin LI Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
174*4717628eSXin LI initialized.
175*4717628eSXin LI */
x2nmodp(z_off64_t n,unsigned k)176*4717628eSXin LI local z_crc_t x2nmodp(z_off64_t n, unsigned k) {
177*4717628eSXin LI z_crc_t p;
178*4717628eSXin LI
179*4717628eSXin LI p = (z_crc_t)1 << 31; /* x^0 == 1 */
180*4717628eSXin LI while (n) {
181*4717628eSXin LI if (n & 1)
182*4717628eSXin LI p = multmodp(x2n_table[k & 31], p);
183*4717628eSXin LI n >>= 1;
184*4717628eSXin LI k++;
185*4717628eSXin LI }
186*4717628eSXin LI return p;
187*4717628eSXin LI }
188*4717628eSXin LI
189*4717628eSXin LI #ifdef DYNAMIC_CRC_TABLE
190*4717628eSXin LI /* =========================================================================
191*4717628eSXin LI * Build the tables for byte-wise and braided CRC-32 calculations, and a table
192*4717628eSXin LI * of powers of x for combining CRC-32s.
193*4717628eSXin LI */
194cd882207SXin LI local z_crc_t FAR crc_table[256];
195cd882207SXin LI #ifdef W
196cd882207SXin LI local z_word_t FAR crc_big_table[256];
197cd882207SXin LI local z_crc_t FAR crc_braid_table[W][256];
198cd882207SXin LI local z_word_t FAR crc_braid_big_table[W][256];
199*4717628eSXin LI local void braid(z_crc_t [][256], z_word_t [][256], int, int);
200cd882207SXin LI #endif
201c9083b85SXin LI #ifdef MAKECRCH
202*4717628eSXin LI local void write_table(FILE *, const z_crc_t FAR *, int);
203*4717628eSXin LI local void write_table32hi(FILE *, const z_word_t FAR *, int);
204*4717628eSXin LI local void write_table64(FILE *, const z_word_t FAR *, int);
205c9083b85SXin LI #endif /* MAKECRCH */
206cd882207SXin LI
207cd882207SXin LI /*
208cd882207SXin LI Define a once() function depending on the availability of atomics. If this is
209cd882207SXin LI compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
210cd882207SXin LI multiple threads, and if atomics are not available, then get_crc_table() must
211cd882207SXin LI be called to initialize the tables and must return before any threads are
212cd882207SXin LI allowed to compute or combine CRCs.
213cd882207SXin LI */
214cd882207SXin LI
215cd882207SXin LI /* Definition of once functionality. */
216cd882207SXin LI typedef struct once_s once_t;
217cd882207SXin LI
218cd882207SXin LI /* Check for the availability of atomics. */
219cd882207SXin LI #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
220cd882207SXin LI !defined(__STDC_NO_ATOMICS__)
221cd882207SXin LI
222cd882207SXin LI #include <stdatomic.h>
223cd882207SXin LI
224cd882207SXin LI /* Structure for once(), which must be initialized with ONCE_INIT. */
225cd882207SXin LI struct once_s {
226cd882207SXin LI atomic_flag begun;
227cd882207SXin LI atomic_int done;
228cd882207SXin LI };
229cd882207SXin LI #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
230cd882207SXin LI
231cd882207SXin LI /*
232cd882207SXin LI Run the provided init() function exactly once, even if multiple threads
233cd882207SXin LI invoke once() at the same time. The state must be a once_t initialized with
234cd882207SXin LI ONCE_INIT.
235cd882207SXin LI */
once(once_t * state,void (* init)(void))236*4717628eSXin LI local void once(once_t *state, void (*init)(void)) {
237cd882207SXin LI if (!atomic_load(&state->done)) {
238cd882207SXin LI if (atomic_flag_test_and_set(&state->begun))
239cd882207SXin LI while (!atomic_load(&state->done))
240cd882207SXin LI ;
241cd882207SXin LI else {
242cd882207SXin LI init();
243cd882207SXin LI atomic_store(&state->done, 1);
244cd882207SXin LI }
245cd882207SXin LI }
246cd882207SXin LI }
247cd882207SXin LI
248cd882207SXin LI #else /* no atomics */
249cd882207SXin LI
250cd882207SXin LI /* Structure for once(), which must be initialized with ONCE_INIT. */
251cd882207SXin LI struct once_s {
252cd882207SXin LI volatile int begun;
253cd882207SXin LI volatile int done;
254cd882207SXin LI };
255cd882207SXin LI #define ONCE_INIT {0, 0}
256cd882207SXin LI
257cd882207SXin LI /* Test and set. Alas, not atomic, but tries to minimize the period of
258cd882207SXin LI vulnerability. */
test_and_set(int volatile * flag)259*4717628eSXin LI local int test_and_set(int volatile *flag) {
260cd882207SXin LI int was;
261cd882207SXin LI
262cd882207SXin LI was = *flag;
263cd882207SXin LI *flag = 1;
264cd882207SXin LI return was;
265cd882207SXin LI }
266cd882207SXin LI
267cd882207SXin LI /* Run the provided init() function once. This is not thread-safe. */
once(once_t * state,void (* init)(void))268*4717628eSXin LI local void once(once_t *state, void (*init)(void)) {
269cd882207SXin LI if (!state->done) {
270cd882207SXin LI if (test_and_set(&state->begun))
271cd882207SXin LI while (!state->done)
272cd882207SXin LI ;
273cd882207SXin LI else {
274cd882207SXin LI init();
275cd882207SXin LI state->done = 1;
276cd882207SXin LI }
277cd882207SXin LI }
278cd882207SXin LI }
279cd882207SXin LI
280cd882207SXin LI #endif
281cd882207SXin LI
282cd882207SXin LI /* State for once(). */
283cd882207SXin LI local once_t made = ONCE_INIT;
284cd882207SXin LI
285c9083b85SXin LI /*
286c9083b85SXin LI Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
287c9083b85SXin LI x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
288c9083b85SXin LI
289c9083b85SXin LI Polynomials over GF(2) are represented in binary, one bit per coefficient,
290c9083b85SXin LI with the lowest powers in the most significant bit. Then adding polynomials
291c9083b85SXin LI is just exclusive-or, and multiplying a polynomial by x is a right shift by
292c9083b85SXin LI one. If we call the above polynomial p, and represent a byte as the
293c9083b85SXin LI polynomial q, also with the lowest power in the most significant bit (so the
294cd882207SXin LI byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
295c9083b85SXin LI where a mod b means the remainder after dividing a by b.
296c9083b85SXin LI
297c9083b85SXin LI This calculation is done using the shift-register method of multiplying and
298c9083b85SXin LI taking the remainder. The register is initialized to zero, and for each
299c9083b85SXin LI incoming bit, x^32 is added mod p to the register if the bit is a one (where
300cd882207SXin LI x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
301cd882207SXin LI (which is shifting right by one and adding x^32 mod p if the bit shifted out
302cd882207SXin LI is a one). We start with the highest power (least significant bit) of q and
303cd882207SXin LI repeat for all eight bits of q.
304c9083b85SXin LI
305cd882207SXin LI The table is simply the CRC of all possible eight bit values. This is all the
306cd882207SXin LI information needed to generate CRCs on data a byte at a time for all
307cd882207SXin LI combinations of CRC register values and incoming bytes.
308c9083b85SXin LI */
309cd882207SXin LI
make_crc_table(void)310*4717628eSXin LI local void make_crc_table(void) {
311cd882207SXin LI unsigned i, j, n;
312cd882207SXin LI z_crc_t p;
313c9083b85SXin LI
314cd882207SXin LI /* initialize the CRC of bytes tables */
315cd882207SXin LI for (i = 0; i < 256; i++) {
316cd882207SXin LI p = i;
317cd882207SXin LI for (j = 0; j < 8; j++)
318cd882207SXin LI p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
319cd882207SXin LI crc_table[i] = p;
320cd882207SXin LI #ifdef W
321cd882207SXin LI crc_big_table[i] = byte_swap(p);
322cd882207SXin LI #endif
323c9083b85SXin LI }
324c9083b85SXin LI
325cd882207SXin LI /* initialize the x^2^n mod p(x) table */
326cd882207SXin LI p = (z_crc_t)1 << 30; /* x^1 */
327cd882207SXin LI x2n_table[0] = p;
328cd882207SXin LI for (n = 1; n < 32; n++)
329cd882207SXin LI x2n_table[n] = p = multmodp(p, p);
330c9083b85SXin LI
331cd882207SXin LI #ifdef W
332cd882207SXin LI /* initialize the braiding tables -- needs x2n_table[] */
333cd882207SXin LI braid(crc_braid_table, crc_braid_big_table, N, W);
334cd882207SXin LI #endif
335c9083b85SXin LI
336c9083b85SXin LI #ifdef MAKECRCH
337c9083b85SXin LI {
338cd882207SXin LI /*
339cd882207SXin LI The crc32.h header file contains tables for both 32-bit and 64-bit
340cd882207SXin LI z_word_t's, and so requires a 64-bit type be available. In that case,
341cd882207SXin LI z_word_t must be defined to be 64-bits. This code then also generates
342cd882207SXin LI and writes out the tables for the case that z_word_t is 32 bits.
343cd882207SXin LI */
344cd882207SXin LI #if !defined(W) || W != 8
345cd882207SXin LI # error Need a 64-bit integer type in order to generate crc32.h.
346cd882207SXin LI #endif
347c9083b85SXin LI FILE *out;
348cd882207SXin LI int k, n;
349cd882207SXin LI z_crc_t ltl[8][256];
350cd882207SXin LI z_word_t big[8][256];
351c9083b85SXin LI
352c9083b85SXin LI out = fopen("crc32.h", "w");
353c9083b85SXin LI if (out == NULL) return;
354cd882207SXin LI
355cd882207SXin LI /* write out little-endian CRC table to crc32.h */
356cd882207SXin LI fprintf(out,
357cd882207SXin LI "/* crc32.h -- tables for rapid CRC calculation\n"
358cd882207SXin LI " * Generated automatically by crc32.c\n */\n"
359cd882207SXin LI "\n"
360cd882207SXin LI "local const z_crc_t FAR crc_table[] = {\n"
361cd882207SXin LI " ");
362cd882207SXin LI write_table(out, crc_table, 256);
363cd882207SXin LI fprintf(out,
364cd882207SXin LI "};\n");
365cd882207SXin LI
366cd882207SXin LI /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
367cd882207SXin LI fprintf(out,
368cd882207SXin LI "\n"
369cd882207SXin LI "#ifdef W\n"
370cd882207SXin LI "\n"
371cd882207SXin LI "#if W == 8\n"
372cd882207SXin LI "\n"
373cd882207SXin LI "local const z_word_t FAR crc_big_table[] = {\n"
374cd882207SXin LI " ");
375cd882207SXin LI write_table64(out, crc_big_table, 256);
376cd882207SXin LI fprintf(out,
377cd882207SXin LI "};\n");
378cd882207SXin LI
379cd882207SXin LI /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
380cd882207SXin LI fprintf(out,
381cd882207SXin LI "\n"
382cd882207SXin LI "#else /* W == 4 */\n"
383cd882207SXin LI "\n"
384cd882207SXin LI "local const z_word_t FAR crc_big_table[] = {\n"
385cd882207SXin LI " ");
386cd882207SXin LI write_table32hi(out, crc_big_table, 256);
387cd882207SXin LI fprintf(out,
388cd882207SXin LI "};\n"
389cd882207SXin LI "\n"
390cd882207SXin LI "#endif\n");
391cd882207SXin LI
392cd882207SXin LI /* write out braid tables for each value of N */
393cd882207SXin LI for (n = 1; n <= 6; n++) {
394cd882207SXin LI fprintf(out,
395cd882207SXin LI "\n"
396cd882207SXin LI "#if N == %d\n", n);
397cd882207SXin LI
398cd882207SXin LI /* compute braid tables for this N and 64-bit word_t */
399cd882207SXin LI braid(ltl, big, n, 8);
400cd882207SXin LI
401cd882207SXin LI /* write out braid tables for 64-bit z_word_t to crc32.h */
402cd882207SXin LI fprintf(out,
403cd882207SXin LI "\n"
404cd882207SXin LI "#if W == 8\n"
405cd882207SXin LI "\n"
406cd882207SXin LI "local const z_crc_t FAR crc_braid_table[][256] = {\n");
407cd882207SXin LI for (k = 0; k < 8; k++) {
408cd882207SXin LI fprintf(out, " {");
409cd882207SXin LI write_table(out, ltl[k], 256);
410cd882207SXin LI fprintf(out, "}%s", k < 7 ? ",\n" : "");
411c9083b85SXin LI }
412cd882207SXin LI fprintf(out,
413cd882207SXin LI "};\n"
414cd882207SXin LI "\n"
415cd882207SXin LI "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
416cd882207SXin LI for (k = 0; k < 8; k++) {
417cd882207SXin LI fprintf(out, " {");
418cd882207SXin LI write_table64(out, big[k], 256);
419cd882207SXin LI fprintf(out, "}%s", k < 7 ? ",\n" : "");
420cd882207SXin LI }
421cd882207SXin LI fprintf(out,
422cd882207SXin LI "};\n");
423cd882207SXin LI
424cd882207SXin LI /* compute braid tables for this N and 32-bit word_t */
425cd882207SXin LI braid(ltl, big, n, 4);
426cd882207SXin LI
427cd882207SXin LI /* write out braid tables for 32-bit z_word_t to crc32.h */
428cd882207SXin LI fprintf(out,
429cd882207SXin LI "\n"
430cd882207SXin LI "#else /* W == 4 */\n"
431cd882207SXin LI "\n"
432cd882207SXin LI "local const z_crc_t FAR crc_braid_table[][256] = {\n");
433cd882207SXin LI for (k = 0; k < 4; k++) {
434cd882207SXin LI fprintf(out, " {");
435cd882207SXin LI write_table(out, ltl[k], 256);
436cd882207SXin LI fprintf(out, "}%s", k < 3 ? ",\n" : "");
437cd882207SXin LI }
438cd882207SXin LI fprintf(out,
439cd882207SXin LI "};\n"
440cd882207SXin LI "\n"
441cd882207SXin LI "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
442cd882207SXin LI for (k = 0; k < 4; k++) {
443cd882207SXin LI fprintf(out, " {");
444cd882207SXin LI write_table32hi(out, big[k], 256);
445cd882207SXin LI fprintf(out, "}%s", k < 3 ? ",\n" : "");
446cd882207SXin LI }
447cd882207SXin LI fprintf(out,
448cd882207SXin LI "};\n"
449cd882207SXin LI "\n"
450cd882207SXin LI "#endif\n"
451cd882207SXin LI "\n"
452cd882207SXin LI "#endif\n");
453cd882207SXin LI }
454cd882207SXin LI fprintf(out,
455cd882207SXin LI "\n"
456cd882207SXin LI "#endif\n");
457cd882207SXin LI
458cd882207SXin LI /* write out zeros operator table to crc32.h */
459cd882207SXin LI fprintf(out,
460cd882207SXin LI "\n"
461cd882207SXin LI "local const z_crc_t FAR x2n_table[] = {\n"
462cd882207SXin LI " ");
463cd882207SXin LI write_table(out, x2n_table, 32);
464cd882207SXin LI fprintf(out,
465cd882207SXin LI "};\n");
466c9083b85SXin LI fclose(out);
467c9083b85SXin LI }
468c9083b85SXin LI #endif /* MAKECRCH */
469c9083b85SXin LI }
470c9083b85SXin LI
471c9083b85SXin LI #ifdef MAKECRCH
472cd882207SXin LI
473cd882207SXin LI /*
474cd882207SXin LI Write the 32-bit values in table[0..k-1] to out, five per line in
475cd882207SXin LI hexadecimal separated by commas.
476cd882207SXin LI */
write_table(FILE * out,const z_crc_t FAR * table,int k)477*4717628eSXin LI local void write_table(FILE *out, const z_crc_t FAR *table, int k) {
478c9083b85SXin LI int n;
479c9083b85SXin LI
480cd882207SXin LI for (n = 0; n < k; n++)
481cd882207SXin LI fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
482c9083b85SXin LI (unsigned long)(table[n]),
483cd882207SXin LI n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
484c9083b85SXin LI }
485cd882207SXin LI
486cd882207SXin LI /*
487cd882207SXin LI Write the high 32-bits of each value in table[0..k-1] to out, five per line
488cd882207SXin LI in hexadecimal separated by commas.
489cd882207SXin LI */
write_table32hi(FILE * out,const z_word_t FAR * table,int k)490*4717628eSXin LI local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) {
491cd882207SXin LI int n;
492cd882207SXin LI
493cd882207SXin LI for (n = 0; n < k; n++)
494cd882207SXin LI fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
495cd882207SXin LI (unsigned long)(table[n] >> 32),
496cd882207SXin LI n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
497cd882207SXin LI }
498cd882207SXin LI
499cd882207SXin LI /*
500cd882207SXin LI Write the 64-bit values in table[0..k-1] to out, three per line in
501cd882207SXin LI hexadecimal separated by commas. This assumes that if there is a 64-bit
502cd882207SXin LI type, then there is also a long long integer type, and it is at least 64
503cd882207SXin LI bits. If not, then the type cast and format string can be adjusted
504cd882207SXin LI accordingly.
505cd882207SXin LI */
write_table64(FILE * out,const z_word_t FAR * table,int k)506*4717628eSXin LI local void write_table64(FILE *out, const z_word_t FAR *table, int k) {
507cd882207SXin LI int n;
508cd882207SXin LI
509cd882207SXin LI for (n = 0; n < k; n++)
510cd882207SXin LI fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ",
511cd882207SXin LI (unsigned long long)(table[n]),
512cd882207SXin LI n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
513cd882207SXin LI }
514cd882207SXin LI
515cd882207SXin LI /* Actually do the deed. */
main(void)516*4717628eSXin LI int main(void) {
517cd882207SXin LI make_crc_table();
518cd882207SXin LI return 0;
519cd882207SXin LI }
520cd882207SXin LI
521c9083b85SXin LI #endif /* MAKECRCH */
522c9083b85SXin LI
523cd882207SXin LI #ifdef W
524cd882207SXin LI /*
525cd882207SXin LI Generate the little and big-endian braid tables for the given n and z_word_t
526cd882207SXin LI size w. Each array must have room for w blocks of 256 elements.
527cd882207SXin LI */
braid(z_crc_t ltl[][256],z_word_t big[][256],int n,int w)528*4717628eSXin LI local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) {
529cd882207SXin LI int k;
530cd882207SXin LI z_crc_t i, p, q;
531cd882207SXin LI for (k = 0; k < w; k++) {
532cd882207SXin LI p = x2nmodp((n * w + 3 - k) << 3, 0);
533cd882207SXin LI ltl[k][0] = 0;
534cd882207SXin LI big[w - 1 - k][0] = 0;
535cd882207SXin LI for (i = 1; i < 256; i++) {
536cd882207SXin LI ltl[k][i] = q = multmodp(i << 24, p);
537cd882207SXin LI big[w - 1 - k][i] = byte_swap(q);
538cd882207SXin LI }
539cd882207SXin LI }
540cd882207SXin LI }
541cd882207SXin LI #endif
542cd882207SXin LI
543c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */
544c9083b85SXin LI
545c9083b85SXin LI /* =========================================================================
546cd882207SXin LI * This function can be used by asm versions of crc32(), and to force the
547cd882207SXin LI * generation of the CRC tables in a threaded application.
548c9083b85SXin LI */
get_crc_table(void)549*4717628eSXin LI const z_crc_t FAR * ZEXPORT get_crc_table(void) {
550c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE
551cd882207SXin LI once(&made, make_crc_table);
552c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */
553c9083b85SXin LI return (const z_crc_t FAR *)crc_table;
554c9083b85SXin LI }
555c9083b85SXin LI
556cd882207SXin LI /* =========================================================================
557cd882207SXin LI * Use ARM machine instructions if available. This will compute the CRC about
558cd882207SXin LI * ten times faster than the braided calculation. This code does not check for
559cd882207SXin LI * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
560cd882207SXin LI * only be defined if the compilation specifies an ARM processor architecture
561cd882207SXin LI * that has the instructions. For example, compiling with -march=armv8.1-a or
562cd882207SXin LI * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
563cd882207SXin LI * instructions.
564cd882207SXin LI */
565cd882207SXin LI #ifdef ARMCRC32
566cd882207SXin LI
567cd882207SXin LI /*
568cd882207SXin LI Constants empirically determined to maximize speed. These values are from
569cd882207SXin LI measurements on a Cortex-A57. Your mileage may vary.
570cd882207SXin LI */
571cd882207SXin LI #define Z_BATCH 3990 /* number of words in a batch */
572cd882207SXin LI #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */
573cd882207SXin LI #define Z_BATCH_MIN 800 /* fewest words in a final batch */
574cd882207SXin LI
crc32_z(unsigned long crc,const unsigned char FAR * buf,z_size_t len)575*4717628eSXin LI unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
576*4717628eSXin LI z_size_t len) {
577cd882207SXin LI z_crc_t val;
578cd882207SXin LI z_word_t crc1, crc2;
579cd882207SXin LI const z_word_t *word;
580cd882207SXin LI z_word_t val0, val1, val2;
581cd882207SXin LI z_size_t last, last2, i;
582cd882207SXin LI z_size_t num;
583cd882207SXin LI
584cd882207SXin LI /* Return initial CRC, if requested. */
585cd882207SXin LI if (buf == Z_NULL) return 0;
586cd882207SXin LI
587cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE
588cd882207SXin LI once(&made, make_crc_table);
589cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */
590cd882207SXin LI
591cd882207SXin LI /* Pre-condition the CRC */
592c61bc111SXin LI crc = (~crc) & 0xffffffff;
593cd882207SXin LI
594cd882207SXin LI /* Compute the CRC up to a word boundary. */
595cd882207SXin LI while (len && ((z_size_t)buf & 7) != 0) {
596cd882207SXin LI len--;
597cd882207SXin LI val = *buf++;
598cd882207SXin LI __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
599cd882207SXin LI }
600cd882207SXin LI
601cd882207SXin LI /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
602cd882207SXin LI word = (z_word_t const *)buf;
603cd882207SXin LI num = len >> 3;
604cd882207SXin LI len &= 7;
605cd882207SXin LI
606cd882207SXin LI /* Do three interleaved CRCs to realize the throughput of one crc32x
607e37bb444SXin LI instruction per cycle. Each CRC is calculated on Z_BATCH words. The
608e37bb444SXin LI three CRCs are combined into a single CRC after each set of batches. */
609cd882207SXin LI while (num >= 3 * Z_BATCH) {
610cd882207SXin LI crc1 = 0;
611cd882207SXin LI crc2 = 0;
612cd882207SXin LI for (i = 0; i < Z_BATCH; i++) {
613cd882207SXin LI val0 = word[i];
614cd882207SXin LI val1 = word[i + Z_BATCH];
615cd882207SXin LI val2 = word[i + 2 * Z_BATCH];
616cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
617cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
618cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
619cd882207SXin LI }
620cd882207SXin LI word += 3 * Z_BATCH;
621cd882207SXin LI num -= 3 * Z_BATCH;
622cd882207SXin LI crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
623cd882207SXin LI crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
624cd882207SXin LI }
625cd882207SXin LI
626cd882207SXin LI /* Do one last smaller batch with the remaining words, if there are enough
627cd882207SXin LI to pay for the combination of CRCs. */
628cd882207SXin LI last = num / 3;
629cd882207SXin LI if (last >= Z_BATCH_MIN) {
630cd882207SXin LI last2 = last << 1;
631cd882207SXin LI crc1 = 0;
632cd882207SXin LI crc2 = 0;
633cd882207SXin LI for (i = 0; i < last; i++) {
634cd882207SXin LI val0 = word[i];
635cd882207SXin LI val1 = word[i + last];
636cd882207SXin LI val2 = word[i + last2];
637cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
638cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
639cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
640cd882207SXin LI }
641cd882207SXin LI word += 3 * last;
642cd882207SXin LI num -= 3 * last;
643cd882207SXin LI val = x2nmodp(last, 6);
644cd882207SXin LI crc = multmodp(val, crc) ^ crc1;
645cd882207SXin LI crc = multmodp(val, crc) ^ crc2;
646cd882207SXin LI }
647cd882207SXin LI
648cd882207SXin LI /* Compute the CRC on any remaining words. */
649cd882207SXin LI for (i = 0; i < num; i++) {
650cd882207SXin LI val0 = word[i];
651cd882207SXin LI __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
652cd882207SXin LI }
653cd882207SXin LI word += num;
654cd882207SXin LI
655cd882207SXin LI /* Complete the CRC on any remaining bytes. */
656cd882207SXin LI buf = (const unsigned char FAR *)word;
657cd882207SXin LI while (len) {
658cd882207SXin LI len--;
659cd882207SXin LI val = *buf++;
660cd882207SXin LI __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
661cd882207SXin LI }
662cd882207SXin LI
663cd882207SXin LI /* Return the CRC, post-conditioned. */
664cd882207SXin LI return crc ^ 0xffffffff;
665cd882207SXin LI }
666cd882207SXin LI
667cd882207SXin LI #else
668cd882207SXin LI
669cd882207SXin LI #ifdef W
670cd882207SXin LI
671cd882207SXin LI /*
672cd882207SXin LI Return the CRC of the W bytes in the word_t data, taking the
673cd882207SXin LI least-significant byte of the word as the first byte of data, without any pre
674cd882207SXin LI or post conditioning. This is used to combine the CRCs of each braid.
675cd882207SXin LI */
crc_word(z_word_t data)676*4717628eSXin LI local z_crc_t crc_word(z_word_t data) {
677cd882207SXin LI int k;
678cd882207SXin LI for (k = 0; k < W; k++)
679cd882207SXin LI data = (data >> 8) ^ crc_table[data & 0xff];
680cd882207SXin LI return (z_crc_t)data;
681cd882207SXin LI }
682cd882207SXin LI
crc_word_big(z_word_t data)683*4717628eSXin LI local z_word_t crc_word_big(z_word_t data) {
684cd882207SXin LI int k;
685cd882207SXin LI for (k = 0; k < W; k++)
686cd882207SXin LI data = (data << 8) ^
687cd882207SXin LI crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
688cd882207SXin LI return data;
689cd882207SXin LI }
690cd882207SXin LI
691cd882207SXin LI #endif
692c9083b85SXin LI
693c9083b85SXin LI /* ========================================================================= */
crc32_z(unsigned long crc,const unsigned char FAR * buf,z_size_t len)694*4717628eSXin LI unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
695*4717628eSXin LI z_size_t len) {
696cd882207SXin LI /* Return initial CRC, if requested. */
697cd882207SXin LI if (buf == Z_NULL) return 0;
698c9083b85SXin LI
699c9083b85SXin LI #ifdef DYNAMIC_CRC_TABLE
700cd882207SXin LI once(&made, make_crc_table);
701c9083b85SXin LI #endif /* DYNAMIC_CRC_TABLE */
702c9083b85SXin LI
703cd882207SXin LI /* Pre-condition the CRC */
704c61bc111SXin LI crc = (~crc) & 0xffffffff;
705c9083b85SXin LI
706cd882207SXin LI #ifdef W
707cd882207SXin LI
708cd882207SXin LI /* If provided enough bytes, do a braided CRC calculation. */
709cd882207SXin LI if (len >= N * W + W - 1) {
710cd882207SXin LI z_size_t blks;
711cd882207SXin LI z_word_t const *words;
712cd882207SXin LI unsigned endian;
713cd882207SXin LI int k;
714cd882207SXin LI
715cd882207SXin LI /* Compute the CRC up to a z_word_t boundary. */
716cd882207SXin LI while (len && ((z_size_t)buf & (W - 1)) != 0) {
717cd882207SXin LI len--;
718cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
719cd882207SXin LI }
720cd882207SXin LI
721cd882207SXin LI /* Compute the CRC on as many N z_word_t blocks as are available. */
722cd882207SXin LI blks = len / (N * W);
723cd882207SXin LI len -= blks * N * W;
724cd882207SXin LI words = (z_word_t const *)buf;
725cd882207SXin LI
726cd882207SXin LI /* Do endian check at execution time instead of compile time, since ARM
727*4717628eSXin LI processors can change the endianness at execution time. If the
728*4717628eSXin LI compiler knows what the endianness will be, it can optimize out the
729cd882207SXin LI check and the unused branch. */
730c9083b85SXin LI endian = 1;
731cd882207SXin LI if (*(unsigned char *)&endian) {
732cd882207SXin LI /* Little endian. */
733cd882207SXin LI
734cd882207SXin LI z_crc_t crc0;
735cd882207SXin LI z_word_t word0;
736cd882207SXin LI #if N > 1
737cd882207SXin LI z_crc_t crc1;
738cd882207SXin LI z_word_t word1;
739cd882207SXin LI #if N > 2
740cd882207SXin LI z_crc_t crc2;
741cd882207SXin LI z_word_t word2;
742cd882207SXin LI #if N > 3
743cd882207SXin LI z_crc_t crc3;
744cd882207SXin LI z_word_t word3;
745cd882207SXin LI #if N > 4
746cd882207SXin LI z_crc_t crc4;
747cd882207SXin LI z_word_t word4;
748cd882207SXin LI #if N > 5
749cd882207SXin LI z_crc_t crc5;
750cd882207SXin LI z_word_t word5;
751cd882207SXin LI #endif
752cd882207SXin LI #endif
753cd882207SXin LI #endif
754cd882207SXin LI #endif
755cd882207SXin LI #endif
756cd882207SXin LI
757cd882207SXin LI /* Initialize the CRC for each braid. */
758cd882207SXin LI crc0 = crc;
759cd882207SXin LI #if N > 1
760cd882207SXin LI crc1 = 0;
761cd882207SXin LI #if N > 2
762cd882207SXin LI crc2 = 0;
763cd882207SXin LI #if N > 3
764cd882207SXin LI crc3 = 0;
765cd882207SXin LI #if N > 4
766cd882207SXin LI crc4 = 0;
767cd882207SXin LI #if N > 5
768cd882207SXin LI crc5 = 0;
769cd882207SXin LI #endif
770cd882207SXin LI #endif
771cd882207SXin LI #endif
772cd882207SXin LI #endif
773cd882207SXin LI #endif
774cd882207SXin LI
775cd882207SXin LI /*
776cd882207SXin LI Process the first blks-1 blocks, computing the CRCs on each braid
777cd882207SXin LI independently.
778cd882207SXin LI */
779cd882207SXin LI while (--blks) {
780cd882207SXin LI /* Load the word for each braid into registers. */
781cd882207SXin LI word0 = crc0 ^ words[0];
782cd882207SXin LI #if N > 1
783cd882207SXin LI word1 = crc1 ^ words[1];
784cd882207SXin LI #if N > 2
785cd882207SXin LI word2 = crc2 ^ words[2];
786cd882207SXin LI #if N > 3
787cd882207SXin LI word3 = crc3 ^ words[3];
788cd882207SXin LI #if N > 4
789cd882207SXin LI word4 = crc4 ^ words[4];
790cd882207SXin LI #if N > 5
791cd882207SXin LI word5 = crc5 ^ words[5];
792cd882207SXin LI #endif
793cd882207SXin LI #endif
794cd882207SXin LI #endif
795cd882207SXin LI #endif
796cd882207SXin LI #endif
797cd882207SXin LI words += N;
798cd882207SXin LI
799cd882207SXin LI /* Compute and update the CRC for each word. The loop should
800cd882207SXin LI get unrolled. */
801cd882207SXin LI crc0 = crc_braid_table[0][word0 & 0xff];
802cd882207SXin LI #if N > 1
803cd882207SXin LI crc1 = crc_braid_table[0][word1 & 0xff];
804cd882207SXin LI #if N > 2
805cd882207SXin LI crc2 = crc_braid_table[0][word2 & 0xff];
806cd882207SXin LI #if N > 3
807cd882207SXin LI crc3 = crc_braid_table[0][word3 & 0xff];
808cd882207SXin LI #if N > 4
809cd882207SXin LI crc4 = crc_braid_table[0][word4 & 0xff];
810cd882207SXin LI #if N > 5
811cd882207SXin LI crc5 = crc_braid_table[0][word5 & 0xff];
812cd882207SXin LI #endif
813cd882207SXin LI #endif
814cd882207SXin LI #endif
815cd882207SXin LI #endif
816cd882207SXin LI #endif
817cd882207SXin LI for (k = 1; k < W; k++) {
818cd882207SXin LI crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
819cd882207SXin LI #if N > 1
820cd882207SXin LI crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
821cd882207SXin LI #if N > 2
822cd882207SXin LI crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
823cd882207SXin LI #if N > 3
824cd882207SXin LI crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
825cd882207SXin LI #if N > 4
826cd882207SXin LI crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
827cd882207SXin LI #if N > 5
828cd882207SXin LI crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
829cd882207SXin LI #endif
830cd882207SXin LI #endif
831cd882207SXin LI #endif
832cd882207SXin LI #endif
833cd882207SXin LI #endif
834c9083b85SXin LI }
835cd882207SXin LI }
836cd882207SXin LI
837cd882207SXin LI /*
838cd882207SXin LI Process the last block, combining the CRCs of the N braids at the
839cd882207SXin LI same time.
840cd882207SXin LI */
841cd882207SXin LI crc = crc_word(crc0 ^ words[0]);
842cd882207SXin LI #if N > 1
843cd882207SXin LI crc = crc_word(crc1 ^ words[1] ^ crc);
844cd882207SXin LI #if N > 2
845cd882207SXin LI crc = crc_word(crc2 ^ words[2] ^ crc);
846cd882207SXin LI #if N > 3
847cd882207SXin LI crc = crc_word(crc3 ^ words[3] ^ crc);
848cd882207SXin LI #if N > 4
849cd882207SXin LI crc = crc_word(crc4 ^ words[4] ^ crc);
850cd882207SXin LI #if N > 5
851cd882207SXin LI crc = crc_word(crc5 ^ words[5] ^ crc);
852cd882207SXin LI #endif
853cd882207SXin LI #endif
854cd882207SXin LI #endif
855cd882207SXin LI #endif
856cd882207SXin LI #endif
857cd882207SXin LI words += N;
858cd882207SXin LI }
859cd882207SXin LI else {
860cd882207SXin LI /* Big endian. */
861cd882207SXin LI
862cd882207SXin LI z_word_t crc0, word0, comb;
863cd882207SXin LI #if N > 1
864cd882207SXin LI z_word_t crc1, word1;
865cd882207SXin LI #if N > 2
866cd882207SXin LI z_word_t crc2, word2;
867cd882207SXin LI #if N > 3
868cd882207SXin LI z_word_t crc3, word3;
869cd882207SXin LI #if N > 4
870cd882207SXin LI z_word_t crc4, word4;
871cd882207SXin LI #if N > 5
872cd882207SXin LI z_word_t crc5, word5;
873cd882207SXin LI #endif
874cd882207SXin LI #endif
875cd882207SXin LI #endif
876cd882207SXin LI #endif
877cd882207SXin LI #endif
878cd882207SXin LI
879cd882207SXin LI /* Initialize the CRC for each braid. */
880cd882207SXin LI crc0 = byte_swap(crc);
881cd882207SXin LI #if N > 1
882cd882207SXin LI crc1 = 0;
883cd882207SXin LI #if N > 2
884cd882207SXin LI crc2 = 0;
885cd882207SXin LI #if N > 3
886cd882207SXin LI crc3 = 0;
887cd882207SXin LI #if N > 4
888cd882207SXin LI crc4 = 0;
889cd882207SXin LI #if N > 5
890cd882207SXin LI crc5 = 0;
891cd882207SXin LI #endif
892cd882207SXin LI #endif
893cd882207SXin LI #endif
894cd882207SXin LI #endif
895cd882207SXin LI #endif
896cd882207SXin LI
897cd882207SXin LI /*
898cd882207SXin LI Process the first blks-1 blocks, computing the CRCs on each braid
899cd882207SXin LI independently.
900cd882207SXin LI */
901cd882207SXin LI while (--blks) {
902cd882207SXin LI /* Load the word for each braid into registers. */
903cd882207SXin LI word0 = crc0 ^ words[0];
904cd882207SXin LI #if N > 1
905cd882207SXin LI word1 = crc1 ^ words[1];
906cd882207SXin LI #if N > 2
907cd882207SXin LI word2 = crc2 ^ words[2];
908cd882207SXin LI #if N > 3
909cd882207SXin LI word3 = crc3 ^ words[3];
910cd882207SXin LI #if N > 4
911cd882207SXin LI word4 = crc4 ^ words[4];
912cd882207SXin LI #if N > 5
913cd882207SXin LI word5 = crc5 ^ words[5];
914cd882207SXin LI #endif
915cd882207SXin LI #endif
916cd882207SXin LI #endif
917cd882207SXin LI #endif
918cd882207SXin LI #endif
919cd882207SXin LI words += N;
920cd882207SXin LI
921cd882207SXin LI /* Compute and update the CRC for each word. The loop should
922cd882207SXin LI get unrolled. */
923cd882207SXin LI crc0 = crc_braid_big_table[0][word0 & 0xff];
924cd882207SXin LI #if N > 1
925cd882207SXin LI crc1 = crc_braid_big_table[0][word1 & 0xff];
926cd882207SXin LI #if N > 2
927cd882207SXin LI crc2 = crc_braid_big_table[0][word2 & 0xff];
928cd882207SXin LI #if N > 3
929cd882207SXin LI crc3 = crc_braid_big_table[0][word3 & 0xff];
930cd882207SXin LI #if N > 4
931cd882207SXin LI crc4 = crc_braid_big_table[0][word4 & 0xff];
932cd882207SXin LI #if N > 5
933cd882207SXin LI crc5 = crc_braid_big_table[0][word5 & 0xff];
934cd882207SXin LI #endif
935cd882207SXin LI #endif
936cd882207SXin LI #endif
937cd882207SXin LI #endif
938cd882207SXin LI #endif
939cd882207SXin LI for (k = 1; k < W; k++) {
940cd882207SXin LI crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
941cd882207SXin LI #if N > 1
942cd882207SXin LI crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
943cd882207SXin LI #if N > 2
944cd882207SXin LI crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
945cd882207SXin LI #if N > 3
946cd882207SXin LI crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
947cd882207SXin LI #if N > 4
948cd882207SXin LI crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
949cd882207SXin LI #if N > 5
950cd882207SXin LI crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
951cd882207SXin LI #endif
952cd882207SXin LI #endif
953cd882207SXin LI #endif
954cd882207SXin LI #endif
955cd882207SXin LI #endif
956cd882207SXin LI }
957cd882207SXin LI }
958cd882207SXin LI
959cd882207SXin LI /*
960cd882207SXin LI Process the last block, combining the CRCs of the N braids at the
961cd882207SXin LI same time.
962cd882207SXin LI */
963cd882207SXin LI comb = crc_word_big(crc0 ^ words[0]);
964cd882207SXin LI #if N > 1
965cd882207SXin LI comb = crc_word_big(crc1 ^ words[1] ^ comb);
966cd882207SXin LI #if N > 2
967cd882207SXin LI comb = crc_word_big(crc2 ^ words[2] ^ comb);
968cd882207SXin LI #if N > 3
969cd882207SXin LI comb = crc_word_big(crc3 ^ words[3] ^ comb);
970cd882207SXin LI #if N > 4
971cd882207SXin LI comb = crc_word_big(crc4 ^ words[4] ^ comb);
972cd882207SXin LI #if N > 5
973cd882207SXin LI comb = crc_word_big(crc5 ^ words[5] ^ comb);
974cd882207SXin LI #endif
975cd882207SXin LI #endif
976cd882207SXin LI #endif
977cd882207SXin LI #endif
978cd882207SXin LI #endif
979cd882207SXin LI words += N;
980cd882207SXin LI crc = byte_swap(comb);
981cd882207SXin LI }
982cd882207SXin LI
983cd882207SXin LI /*
984cd882207SXin LI Update the pointer to the remaining bytes to process.
985cd882207SXin LI */
986cd882207SXin LI buf = (unsigned char const *)words;
987cd882207SXin LI }
988cd882207SXin LI
989cd882207SXin LI #endif /* W */
990cd882207SXin LI
991cd882207SXin LI /* Complete the computation of the CRC on any remaining bytes. */
992c9083b85SXin LI while (len >= 8) {
993c9083b85SXin LI len -= 8;
994cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
995cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
996cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
997cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
998cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
999cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1000cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1001cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1002c9083b85SXin LI }
1003cd882207SXin LI while (len) {
1004cd882207SXin LI len--;
1005cd882207SXin LI crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1006c9083b85SXin LI }
1007c9083b85SXin LI
1008cd882207SXin LI /* Return the CRC, post-conditioned. */
1009cd882207SXin LI return crc ^ 0xffffffff;
1010cd882207SXin LI }
1011cd882207SXin LI
1012cd882207SXin LI #endif
1013cd882207SXin LI
1014c9083b85SXin LI /* ========================================================================= */
crc32(unsigned long crc,const unsigned char FAR * buf,uInt len)1015*4717628eSXin LI unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf,
1016*4717628eSXin LI uInt len) {
1017c9083b85SXin LI return crc32_z(crc, buf, len);
1018c9083b85SXin LI }
1019c9083b85SXin LI
1020c9083b85SXin LI /* ========================================================================= */
crc32_combine64(uLong crc1,uLong crc2,z_off64_t len2)1021*4717628eSXin LI uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) {
1022cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE
1023cd882207SXin LI once(&made, make_crc_table);
1024cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */
1025c61bc111SXin LI return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1026c9083b85SXin LI }
1027c9083b85SXin LI
1028c9083b85SXin LI /* ========================================================================= */
crc32_combine(uLong crc1,uLong crc2,z_off_t len2)1029*4717628eSXin LI uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) {
1030e37bb444SXin LI return crc32_combine64(crc1, crc2, (z_off64_t)len2);
1031c9083b85SXin LI }
1032c9083b85SXin LI
1033cd882207SXin LI /* ========================================================================= */
crc32_combine_gen64(z_off64_t len2)1034*4717628eSXin LI uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) {
1035cd882207SXin LI #ifdef DYNAMIC_CRC_TABLE
1036cd882207SXin LI once(&made, make_crc_table);
1037cd882207SXin LI #endif /* DYNAMIC_CRC_TABLE */
1038cd882207SXin LI return x2nmodp(len2, 3);
1039cd882207SXin LI }
1040cd882207SXin LI
1041cd882207SXin LI /* ========================================================================= */
crc32_combine_gen(z_off_t len2)1042*4717628eSXin LI uLong ZEXPORT crc32_combine_gen(z_off_t len2) {
1043e37bb444SXin LI return crc32_combine_gen64((z_off64_t)len2);
1044cd882207SXin LI }
1045cd882207SXin LI
1046cd882207SXin LI /* ========================================================================= */
crc32_combine_op(uLong crc1,uLong crc2,uLong op)1047*4717628eSXin LI uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) {
1048c61bc111SXin LI return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
1049c9083b85SXin LI }
1050