Lines Matching +full:16 +full:g

68 	movdqu	\offset+16(buf), %xmm12
95 # Assumes len >= 16.
107 movdqu 16*0(buf), %xmm0
108 movdqu 16*1(buf), %xmm1
109 movdqu 16*2(buf), %xmm2
110 movdqu 16*3(buf), %xmm3
111 movdqu 16*4(buf), %xmm4
112 movdqu 16*5(buf), %xmm5
113 movdqu 16*6(buf), %xmm6
114 movdqu 16*7(buf), %xmm7
125 # XOR the first 16 data *bits* with the initial CRC value.
147 # Now fold the 112 bytes in xmm0-xmm6 into the 16 bytes in xmm7.
159 # Fold across 16 bytes.
165 # Then subtract 16 to simplify the termination condition of the
167 add $128-16, len
169 # While >= 16 data bytes remain (not counting xmm7), fold the 16 bytes
180 add $16, buf
181 sub $16, len
185 # Add 16 to get the correct number of data bytes remaining in 0...15
186 # (not counting xmm7), following the previous extra subtraction by 16.
187 add $16, len
191 # Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first 16
194 # the bytes into a first chunk of 'len' bytes and a second chunk of 16
199 # xmm1 = last 16 original data bytes
200 movdqu -16(buf, len), %xmm1
204 lea .Lbyteshift_table+16(%rip), %rax
209 # xmm7 = first chunk: xmm7 right-shifted by '16-len' bytes.
214 # then '16-len' bytes from xmm2 (high-order bytes).
225 # Reduce the 128-bit value M(x), stored in xmm7, to the final 16-bit CRC
227 # Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'.
234 pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high bits * x^48 * (x^80 mod G(x))
243 pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # high 32 bits * x^48 * (x^48 mod G(x))
246 # Load G(x) and floor(x^48 / G(x)).
251 pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high 32 bits * floor(x^48 / G(x))
253 pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # *= G(x)
255 pxor %xmm7, %xmm0 # + low 16 nonzero bits
256 # Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0.
261 .align 16
263 # Checksumming a buffer of length 16...255 bytes
265 # Load the first 16 data bytes.
268 add $16, buf
270 # XOR the first 16 data *bits* with the initial CRC value.
276 cmp $16, len
277 je .Lreduce_final_16_bytes # len == 16
280 add $16, len
285 .align 16
288 # G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
290 .quad 0x0000000000006123 # x^(8*128) mod G(x)
291 .quad 0x0000000000002295 # x^(8*128+64) mod G(x)
293 .quad 0x0000000000001069 # x^(4*128) mod G(x)
294 .quad 0x000000000000dd31 # x^(4*128+64) mod G(x)
296 .quad 0x000000000000857d # x^(2*128) mod G(x)
297 .quad 0x0000000000007acc # x^(2*128+64) mod G(x)
299 .quad 0x000000000000a010 # x^(1*128) mod G(x)
300 .quad 0x0000000000001faa # x^(1*128+64) mod G(x)
302 .quad 0x1368000000000000 # x^48 * (x^48 mod G(x))
303 .quad 0x2d56000000000000 # x^48 * (x^80 mod G(x))
305 .quad 0x0000000000018bb7 # G(x)
306 .quad 0x00000001f65a57f8 # floor(x^48 / G(x))
308 .section .rodata.cst16.mask1, "aM", @progbits, 16
309 .align 16
313 .section .rodata.cst16.mask2, "aM", @progbits, 16
314 .align 16
318 .section .rodata.cst16.bswap_mask, "aM", @progbits, 16
319 .align 16
324 .align 16
325 # For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 - len]
327 # 0x80} XOR the index vector to shift right by '16 - len' bytes.