Lines Matching +full:8 +full:- +full:n +full:- +full:1
1 /* crc32.c -- compute the CRC-32 of a data stream
2 * Copyright (C) 1995-2022 Mark Adler
6 * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
14 protection on the static variables used to control the first-use generation
33 A CRC of a message is computed on N braids of words in the message, where
34 each word consists of W bytes (4 or 8). If N is 3, for example, then three
36 indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
38 of N * W bytes as are available have been processed. The results are combined
39 into a single CRC at the end. For this code, N must be in the range 1..6 and
40 W must be 4 or 8. The upper limit on N can be increased if desired by adding
42 crc32.h would need to be regenerated, if the maximum N value is increased.
44 N and W are chosen empirically by benchmarking the execution time on a given
45 processor. The choices for N and W below were based on testing on Intel Kaby
46 Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
48 with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49 They were all tested with either gcc or clang, all using the -O3 optimization
53 /* Define N */
55 # define N Z_TESTN macro
57 # define N 5 macro
59 #if N < 1 || N > 6
60 # error N must be in 1..6
75 # if Z_TESTW-1 != -1
80 # define W 8 /* required for MAKECRCH */
83 # define W 8
90 # if W == 8 && defined(Z_U8)
102 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
109 self-respecting compiler will optimize this to a single machine byte-swap
114 # if W == 8 in byte_swap()
119 (word & 0xff00000000) >> 8 | in byte_swap()
120 (word & 0xff000000) << 8 | in byte_swap()
127 (word & 0xff0000) >> 8 | in byte_swap()
128 (word & 0xff00) << 8 | in byte_swap()
136 * Table of powers of x for combining CRC-32s, filled in by make_crc_table()
142 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
143 * of x for combining CRC-32s, all made by make_crc_table().
158 m = (z_crc_t)1 << 31; in multmodp()
163 if ((a & (m - 1)) == 0) in multmodp()
166 m >>= 1; in multmodp()
167 b = b & 1 ? (b >> 1) ^ POLY : b >> 1; in multmodp()
173 Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
176 local z_crc_t x2nmodp(z_off64_t n, unsigned k) { in x2nmodp() argument
179 p = (z_crc_t)1 << 31; /* x^0 == 1 */ in x2nmodp()
180 while (n) { in x2nmodp()
181 if (n & 1) in x2nmodp()
183 n >>= 1; in x2nmodp()
191 * Build the tables for byte-wise and braided CRC-32 calculations, and a table
192 * of powers of x for combining CRC-32s.
237 if (!atomic_load(&state->done)) { in once()
238 if (atomic_flag_test_and_set(&state->begun)) in once()
239 while (!atomic_load(&state->done)) in once()
243 atomic_store(&state->done, 1); in once()
263 *flag = 1; in test_and_set()
267 /* Run the provided init() function once. This is not thread-safe. */
269 if (!state->done) { in once()
270 if (test_and_set(&state->begun)) in once()
271 while (!state->done) in once()
275 state->done = 1; in once()
286 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
287 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
291 is just exclusive-or, and multiplying a polynomial by x is a right shift by
294 byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
297 This calculation is done using the shift-register method of multiplying and
300 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
311 unsigned i, j, n; in make_crc_table() local
317 for (j = 0; j < 8; j++) in make_crc_table()
318 p = p & 1 ? (p >> 1) ^ POLY : p >> 1; in make_crc_table()
325 /* initialize the x^2^n mod p(x) table */ in make_crc_table()
326 p = (z_crc_t)1 << 30; /* x^1 */ in make_crc_table()
328 for (n = 1; n < 32; n++) in make_crc_table()
329 x2n_table[n] = p = multmodp(p, p); in make_crc_table()
332 /* initialize the braiding tables -- needs x2n_table[] */ in make_crc_table()
333 braid(crc_braid_table, crc_braid_big_table, N, W); in make_crc_table()
339 The crc32.h header file contains tables for both 32-bit and 64-bit in make_crc_table()
340 z_word_t's, and so requires a 64-bit type be available. In that case, in make_crc_table()
341 z_word_t must be defined to be 64-bits. This code then also generates in make_crc_table()
344 #if !defined(W) || W != 8 in make_crc_table()
345 # error Need a 64-bit integer type in order to generate crc32.h. in make_crc_table()
348 int k, n; in make_crc_table() local
349 z_crc_t ltl[8][256]; in make_crc_table()
350 z_word_t big[8][256]; in make_crc_table()
355 /* write out little-endian CRC table to crc32.h */ in make_crc_table()
357 "/* crc32.h -- tables for rapid CRC calculation\n" in make_crc_table()
358 " * Generated automatically by crc32.c\n */\n" in make_crc_table()
359 "\n" in make_crc_table()
360 "local const z_crc_t FAR crc_table[] = {\n" in make_crc_table()
364 "};\n"); in make_crc_table()
366 /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ in make_crc_table()
368 "\n" in make_crc_table()
369 "#ifdef W\n" in make_crc_table()
370 "\n" in make_crc_table()
371 "#if W == 8\n" in make_crc_table()
372 "\n" in make_crc_table()
373 "local const z_word_t FAR crc_big_table[] = {\n" in make_crc_table()
377 "};\n"); in make_crc_table()
379 /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ in make_crc_table()
381 "\n" in make_crc_table()
382 "#else /* W == 4 */\n" in make_crc_table()
383 "\n" in make_crc_table()
384 "local const z_word_t FAR crc_big_table[] = {\n" in make_crc_table()
388 "};\n" in make_crc_table()
389 "\n" in make_crc_table()
390 "#endif\n"); in make_crc_table()
392 /* write out braid tables for each value of N */ in make_crc_table()
393 for (n = 1; n <= 6; n++) { in make_crc_table()
395 "\n" in make_crc_table()
396 "#if N == %d\n", n); in make_crc_table()
398 /* compute braid tables for this N and 64-bit word_t */ in make_crc_table()
399 braid(ltl, big, n, 8); in make_crc_table()
401 /* write out braid tables for 64-bit z_word_t to crc32.h */ in make_crc_table()
403 "\n" in make_crc_table()
404 "#if W == 8\n" in make_crc_table()
405 "\n" in make_crc_table()
406 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); in make_crc_table()
407 for (k = 0; k < 8; k++) { in make_crc_table()
410 fprintf(out, "}%s", k < 7 ? ",\n" : ""); in make_crc_table()
413 "};\n" in make_crc_table()
414 "\n" in make_crc_table()
415 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); in make_crc_table()
416 for (k = 0; k < 8; k++) { in make_crc_table()
419 fprintf(out, "}%s", k < 7 ? ",\n" : ""); in make_crc_table()
422 "};\n"); in make_crc_table()
424 /* compute braid tables for this N and 32-bit word_t */ in make_crc_table()
425 braid(ltl, big, n, 4); in make_crc_table()
427 /* write out braid tables for 32-bit z_word_t to crc32.h */ in make_crc_table()
429 "\n" in make_crc_table()
430 "#else /* W == 4 */\n" in make_crc_table()
431 "\n" in make_crc_table()
432 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); in make_crc_table()
436 fprintf(out, "}%s", k < 3 ? ",\n" : ""); in make_crc_table()
439 "};\n" in make_crc_table()
440 "\n" in make_crc_table()
441 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); in make_crc_table()
445 fprintf(out, "}%s", k < 3 ? ",\n" : ""); in make_crc_table()
448 "};\n" in make_crc_table()
449 "\n" in make_crc_table()
450 "#endif\n" in make_crc_table()
451 "\n" in make_crc_table()
452 "#endif\n"); in make_crc_table()
455 "\n" in make_crc_table()
456 "#endif\n"); in make_crc_table()
460 "\n" in make_crc_table()
461 "local const z_crc_t FAR x2n_table[] = {\n" in make_crc_table()
465 "};\n"); in make_crc_table()
474 Write the 32-bit values in table[0..k-1] to out, five per line in
478 int n; in write_table() local
480 for (n = 0; n < k; n++) in write_table()
481 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", in write_table()
482 (unsigned long)(table[n]), in write_table()
483 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); in write_table()
487 Write the high 32-bits of each value in table[0..k-1] to out, five per line
491 int n; in write_table32hi() local
493 for (n = 0; n < k; n++) in write_table32hi()
494 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", in write_table32hi()
495 (unsigned long)(table[n] >> 32), in write_table32hi()
496 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); in write_table32hi()
500 Write the 64-bit values in table[0..k-1] to out, three per line in
501 hexadecimal separated by commas. This assumes that if there is a 64-bit
507 int n; in write_table64() local
509 for (n = 0; n < k; n++) in write_table64()
510 fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", in write_table64()
511 (unsigned long long)(table[n]), in write_table64()
512 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); in write_table64()
525 Generate the little and big-endian braid tables for the given n and z_word_t
528 local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) { in braid() argument
532 p = x2nmodp((n * w + 3 - k) << 3, 0); in braid()
534 big[w - 1 - k][0] = 0; in braid()
535 for (i = 1; i < 256; i++) { in braid()
537 big[w - 1 - k][i] = byte_swap(q); in braid()
561 * that has the instructions. For example, compiling with -march=armv8.1-a or
562 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
569 measurements on a Cortex-A57. Your mileage may vary.
591 /* Pre-condition the CRC */ in crc32_z()
596 len--; in crc32_z()
601 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ in crc32_z()
621 num -= 3 * Z_BATCH; in crc32_z()
630 last2 = last << 1; in crc32_z()
642 num -= 3 * last; in crc32_z()
658 len--; in crc32_z()
663 /* Return the CRC, post-conditioned. */ in crc32_z()
673 least-significant byte of the word as the first byte of data, without any pre
679 data = (data >> 8) ^ crc_table[data & 0xff]; in crc_word()
686 data = (data << 8) ^ in crc_word_big()
687 crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; in crc_word_big()
703 /* Pre-condition the CRC */ in crc32_z()
709 if (len >= N * W + W - 1) { in crc32_z()
716 while (len && ((z_size_t)buf & (W - 1)) != 0) { in crc32_z()
717 len--; in crc32_z()
718 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; in crc32_z()
721 /* Compute the CRC on as many N z_word_t blocks as are available. */ in crc32_z()
722 blks = len / (N * W); in crc32_z()
723 len -= blks * N * W; in crc32_z()
730 endian = 1; in crc32_z()
736 #if N > 1 in crc32_z()
739 #if N > 2 in crc32_z()
742 #if N > 3 in crc32_z()
745 #if N > 4 in crc32_z()
748 #if N > 5 in crc32_z()
759 #if N > 1 in crc32_z()
761 #if N > 2 in crc32_z()
763 #if N > 3 in crc32_z()
765 #if N > 4 in crc32_z()
767 #if N > 5 in crc32_z()
776 Process the first blks-1 blocks, computing the CRCs on each braid in crc32_z()
779 while (--blks) { in crc32_z()
782 #if N > 1 in crc32_z()
783 word1 = crc1 ^ words[1]; in crc32_z()
784 #if N > 2 in crc32_z()
786 #if N > 3 in crc32_z()
788 #if N > 4 in crc32_z()
790 #if N > 5 in crc32_z()
797 words += N; in crc32_z()
802 #if N > 1 in crc32_z()
804 #if N > 2 in crc32_z()
806 #if N > 3 in crc32_z()
808 #if N > 4 in crc32_z()
810 #if N > 5 in crc32_z()
817 for (k = 1; k < W; k++) { in crc32_z()
819 #if N > 1 in crc32_z()
821 #if N > 2 in crc32_z()
823 #if N > 3 in crc32_z()
825 #if N > 4 in crc32_z()
827 #if N > 5 in crc32_z()
838 Process the last block, combining the CRCs of the N braids at the in crc32_z()
842 #if N > 1 in crc32_z()
843 crc = crc_word(crc1 ^ words[1] ^ crc); in crc32_z()
844 #if N > 2 in crc32_z()
846 #if N > 3 in crc32_z()
848 #if N > 4 in crc32_z()
850 #if N > 5 in crc32_z()
857 words += N; in crc32_z()
863 #if N > 1 in crc32_z()
865 #if N > 2 in crc32_z()
867 #if N > 3 in crc32_z()
869 #if N > 4 in crc32_z()
871 #if N > 5 in crc32_z()
881 #if N > 1 in crc32_z()
883 #if N > 2 in crc32_z()
885 #if N > 3 in crc32_z()
887 #if N > 4 in crc32_z()
889 #if N > 5 in crc32_z()
898 Process the first blks-1 blocks, computing the CRCs on each braid in crc32_z()
901 while (--blks) { in crc32_z()
904 #if N > 1 in crc32_z()
905 word1 = crc1 ^ words[1]; in crc32_z()
906 #if N > 2 in crc32_z()
908 #if N > 3 in crc32_z()
910 #if N > 4 in crc32_z()
912 #if N > 5 in crc32_z()
919 words += N; in crc32_z()
924 #if N > 1 in crc32_z()
926 #if N > 2 in crc32_z()
928 #if N > 3 in crc32_z()
930 #if N > 4 in crc32_z()
932 #if N > 5 in crc32_z()
939 for (k = 1; k < W; k++) { in crc32_z()
941 #if N > 1 in crc32_z()
943 #if N > 2 in crc32_z()
945 #if N > 3 in crc32_z()
947 #if N > 4 in crc32_z()
949 #if N > 5 in crc32_z()
960 Process the last block, combining the CRCs of the N braids at the in crc32_z()
964 #if N > 1 in crc32_z()
965 comb = crc_word_big(crc1 ^ words[1] ^ comb); in crc32_z()
966 #if N > 2 in crc32_z()
968 #if N > 3 in crc32_z()
970 #if N > 4 in crc32_z()
972 #if N > 5 in crc32_z()
979 words += N; in crc32_z()
992 while (len >= 8) { in crc32_z()
993 len -= 8; in crc32_z()
994 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; in crc32_z()
995 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; in crc32_z()
996 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; in crc32_z()
997 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; in crc32_z()
998 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; in crc32_z()
999 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; in crc32_z()
1000 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; in crc32_z()
1001 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; in crc32_z()
1004 len--; in crc32_z()
1005 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; in crc32_z()
1008 /* Return the CRC, post-conditioned. */ in crc32_z()