Lines Matching +full:32 +full:- +full:bits
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
10 * The first step is to reduce it to 1024 bits. We do this in 8 parallel
12 * have more than 32 kB of data to checksum we repeat this step multiple
13 * times, passing in the previous 1024 bits.
15 * The next step is to reduce the 1024 bits to 64 bits. This step adds
16 * 32 bits of 0s to the end - this matches what a CRC does. We just
17 * calculate constants that land the data in this 32 bits.
20 * for n = CRC using POWER8 instructions. We use x = 32.
28 #include <asm/ppc-opcode.h>
66 std r31,-8(r1)
67 std r30,-16(r1)
68 std r29,-24(r1)
69 std r28,-32(r1)
70 std r27,-40(r1)
71 std r26,-48(r1)
72 std r25,-56(r1)
75 li off32,32
101 vspltisw v0,-1
110 vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */
112 vsldoi v8,v8,zeroes,4 /* shift into top 32 bits */
141 * data - 128 / 16 = 8
148 addi r7,r7,-1
156 /* zero v0-v7 which will contain our checksums */
170 * already in v16-v23.
248 * to complete - first iteration load, second iteration vpmsum, third
358 * vpmsumd produces a 96 bit result in the least significant bits
360 * left 32 bits so it occupies the least significant bits in the
373 /* xor with last 1024 bits */
419 * Reduce the previously calculated 1024 bits to 64 bits, shifting
420 * 32 bits to include the trailing 32 bits of zeros
441 /* Now reduce the tail (0 - 112 bytes) */
525 * doing the computation 2x bits higher (ie 64 bits) and shifting the
526 * result back down 2x bits, we round down to the nearest multiple.
531 vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */
538 vsldoi v0,v0,zeroes,8 /* shift result into top 64 bits */
544 * our vector registers goes from 0-63 instead of 63-0. We can reflect
547 vand v1,v0,mask_32bit /* bottom 32 bits of a */
549 vand v1,v1,mask_32bit /* bottom 32bits of ma */
551 vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */
554 * Since we are bit reflected, the result (ie the low 32 bits) is in
555 * the high 32 bits. We just need to shift it left 4 bytes
559 vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */
580 ld r31,-8(r1)
581 ld r30,-16(r1)
582 ld r29,-24(r1)
583 ld r28,-32(r1)
584 ld r27,-40(r1)
585 ld r26,-48(r1)
586 ld r25,-56(r1)