/* $FreeBSD$ */ /* Do not modify. This file is auto-generated from aes-gcm-avx512.pl. */ .globl ossl_vaes_vpclmulqdq_capable .type ossl_vaes_vpclmulqdq_capable,@function .align 32 ossl_vaes_vpclmulqdq_capable: movq OPENSSL_ia32cap_P+8(%rip),%rcx movq $6600291188736,%rdx xorl %eax,%eax andq %rdx,%rcx cmpq %rdx,%rcx cmoveq %rcx,%rax .byte 0xf3,0xc3 .size ossl_vaes_vpclmulqdq_capable, .-ossl_vaes_vpclmulqdq_capable .text .globl ossl_aes_gcm_init_avx512 .type ossl_aes_gcm_init_avx512,@function .align 32 ossl_aes_gcm_init_avx512: .cfi_startproc .byte 243,15,30,250 vpxorq %xmm16,%xmm16,%xmm16 movl 240(%rdi),%eax cmpl $9,%eax je .Laes_128_duiuljAybFADyhe cmpl $11,%eax je .Laes_192_duiuljAybFADyhe cmpl $13,%eax je .Laes_256_duiuljAybFADyhe jmp .Lexit_aes_duiuljAybFADyhe .align 32 .Laes_128_duiuljAybFADyhe: vpxorq 0(%rdi),%xmm16,%xmm16 vaesenc 16(%rdi),%xmm16,%xmm16 vaesenc 32(%rdi),%xmm16,%xmm16 vaesenc 48(%rdi),%xmm16,%xmm16 vaesenc 64(%rdi),%xmm16,%xmm16 vaesenc 80(%rdi),%xmm16,%xmm16 vaesenc 96(%rdi),%xmm16,%xmm16 vaesenc 112(%rdi),%xmm16,%xmm16 vaesenc 128(%rdi),%xmm16,%xmm16 vaesenc 144(%rdi),%xmm16,%xmm16 vaesenclast 160(%rdi),%xmm16,%xmm16 jmp .Lexit_aes_duiuljAybFADyhe .align 32 .Laes_192_duiuljAybFADyhe: vpxorq 0(%rdi),%xmm16,%xmm16 vaesenc 16(%rdi),%xmm16,%xmm16 vaesenc 32(%rdi),%xmm16,%xmm16 vaesenc 48(%rdi),%xmm16,%xmm16 vaesenc 64(%rdi),%xmm16,%xmm16 vaesenc 80(%rdi),%xmm16,%xmm16 vaesenc 96(%rdi),%xmm16,%xmm16 vaesenc 112(%rdi),%xmm16,%xmm16 vaesenc 128(%rdi),%xmm16,%xmm16 vaesenc 144(%rdi),%xmm16,%xmm16 vaesenc 160(%rdi),%xmm16,%xmm16 vaesenc 176(%rdi),%xmm16,%xmm16 vaesenclast 192(%rdi),%xmm16,%xmm16 jmp .Lexit_aes_duiuljAybFADyhe .align 32 .Laes_256_duiuljAybFADyhe: vpxorq 0(%rdi),%xmm16,%xmm16 vaesenc 16(%rdi),%xmm16,%xmm16 vaesenc 32(%rdi),%xmm16,%xmm16 vaesenc 48(%rdi),%xmm16,%xmm16 vaesenc 64(%rdi),%xmm16,%xmm16 vaesenc 80(%rdi),%xmm16,%xmm16 vaesenc 96(%rdi),%xmm16,%xmm16 vaesenc 112(%rdi),%xmm16,%xmm16 vaesenc 128(%rdi),%xmm16,%xmm16 vaesenc 144(%rdi),%xmm16,%xmm16 vaesenc 160(%rdi),%xmm16,%xmm16 vaesenc 176(%rdi),%xmm16,%xmm16 vaesenc 192(%rdi),%xmm16,%xmm16 vaesenc 208(%rdi),%xmm16,%xmm16 vaesenclast 224(%rdi),%xmm16,%xmm16 jmp .Lexit_aes_duiuljAybFADyhe .Lexit_aes_duiuljAybFADyhe: vpshufb SHUF_MASK(%rip),%xmm16,%xmm16 vmovdqa64 %xmm16,%xmm2 vpsllq $1,%xmm16,%xmm16 vpsrlq $63,%xmm2,%xmm2 vmovdqa %xmm2,%xmm1 vpslldq $8,%xmm2,%xmm2 vpsrldq $8,%xmm1,%xmm1 vporq %xmm2,%xmm16,%xmm16 vpshufd $36,%xmm1,%xmm2 vpcmpeqd TWOONE(%rip),%xmm2,%xmm2 vpand POLY(%rip),%xmm2,%xmm2 vpxorq %xmm2,%xmm16,%xmm16 vmovdqu64 %xmm16,336(%rsi) vshufi32x4 $0x00,%ymm16,%ymm16,%ymm4 vmovdqa %ymm4,%ymm3 vpclmulqdq $0x11,%ymm4,%ymm3,%ymm0 vpclmulqdq $0x00,%ymm4,%ymm3,%ymm1 vpclmulqdq $0x01,%ymm4,%ymm3,%ymm2 vpclmulqdq $0x10,%ymm4,%ymm3,%ymm3 vpxorq %ymm2,%ymm3,%ymm3 vpsrldq $8,%ymm3,%ymm2 vpslldq $8,%ymm3,%ymm3 vpxorq %ymm2,%ymm0,%ymm0 vpxorq %ymm1,%ymm3,%ymm3 vmovdqu64 POLY2(%rip),%ymm2 vpclmulqdq $0x01,%ymm3,%ymm2,%ymm1 vpslldq $8,%ymm1,%ymm1 vpxorq %ymm1,%ymm3,%ymm3 vpclmulqdq $0x00,%ymm3,%ymm2,%ymm1 vpsrldq $4,%ymm1,%ymm1 vpclmulqdq $0x10,%ymm3,%ymm2,%ymm3 vpslldq $4,%ymm3,%ymm3 vpternlogq $0x96,%ymm1,%ymm0,%ymm3 vmovdqu64 %xmm3,320(%rsi) vinserti64x2 $1,%xmm16,%ymm3,%ymm4 vmovdqa64 %ymm4,%ymm5 vpclmulqdq $0x11,%ymm3,%ymm4,%ymm0 vpclmulqdq $0x00,%ymm3,%ymm4,%ymm1 vpclmulqdq $0x01,%ymm3,%ymm4,%ymm2 vpclmulqdq $0x10,%ymm3,%ymm4,%ymm4 vpxorq %ymm2,%ymm4,%ymm4 vpsrldq $8,%ymm4,%ymm2 vpslldq $8,%ymm4,%ymm4 vpxorq %ymm2,%ymm0,%ymm0 vpxorq %ymm1,%ymm4,%ymm4 vmovdqu64 POLY2(%rip),%ymm2 vpclmulqdq $0x01,%ymm4,%ymm2,%ymm1 vpslldq $8,%ymm1,%ymm1 vpxorq %ymm1,%ymm4,%ymm4 vpclmulqdq $0x00,%ymm4,%ymm2,%ymm1 vpsrldq $4,%ymm1,%ymm1 vpclmulqdq $0x10,%ymm4,%ymm2,%ymm4 vpslldq $4,%ymm4,%ymm4 vpternlogq $0x96,%ymm1,%ymm0,%ymm4 vmovdqu64 %ymm4,288(%rsi) vinserti64x4 $1,%ymm5,%zmm4,%zmm4 vshufi64x2 $0x00,%zmm4,%zmm4,%zmm3 vmovdqa64 %zmm4,%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm0 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm1 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm2 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm2,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm2 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm2,%zmm0,%zmm0 vpxorq %zmm1,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm2 vpclmulqdq $0x01,%zmm4,%zmm2,%zmm1 vpslldq $8,%zmm1,%zmm1 vpxorq %zmm1,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm2,%zmm1 vpsrldq $4,%zmm1,%zmm1 vpclmulqdq $0x10,%zmm4,%zmm2,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm1,%zmm0,%zmm4 vmovdqu64 %zmm4,224(%rsi) vshufi64x2 $0x00,%zmm4,%zmm4,%zmm3 vpclmulqdq $0x11,%zmm3,%zmm5,%zmm0 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm1 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm2 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm2,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm2 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm2,%zmm0,%zmm0 vpxorq %zmm1,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm2 vpclmulqdq $0x01,%zmm5,%zmm2,%zmm1 vpslldq $8,%zmm1,%zmm1 vpxorq %zmm1,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm2,%zmm1 vpsrldq $4,%zmm1,%zmm1 vpclmulqdq $0x10,%zmm5,%zmm2,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm1,%zmm0,%zmm5 vmovdqu64 %zmm5,160(%rsi) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm0 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm1 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm2 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm2,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm2 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm2,%zmm0,%zmm0 vpxorq %zmm1,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm2 vpclmulqdq $0x01,%zmm4,%zmm2,%zmm1 vpslldq $8,%zmm1,%zmm1 vpxorq %zmm1,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm2,%zmm1 vpsrldq $4,%zmm1,%zmm1 vpclmulqdq $0x10,%zmm4,%zmm2,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm1,%zmm0,%zmm4 vmovdqu64 %zmm4,96(%rsi) vzeroupper .Labort_init: .byte 0xf3,0xc3 .cfi_endproc .size ossl_aes_gcm_init_avx512, .-ossl_aes_gcm_init_avx512 .globl ossl_aes_gcm_setiv_avx512 .type ossl_aes_gcm_setiv_avx512,@function .align 32 ossl_aes_gcm_setiv_avx512: .cfi_startproc .Lsetiv_seh_begin: .byte 243,15,30,250 pushq %rbx .cfi_adjust_cfa_offset 8 .cfi_offset %rbx,-16 .Lsetiv_seh_push_rbx: pushq %rbp .cfi_adjust_cfa_offset 8 .cfi_offset %rbp,-24 .Lsetiv_seh_push_rbp: pushq %r12 .cfi_adjust_cfa_offset 8 .cfi_offset %r12,-32 .Lsetiv_seh_push_r12: pushq %r13 .cfi_adjust_cfa_offset 8 .cfi_offset %r13,-40 .Lsetiv_seh_push_r13: pushq %r14 .cfi_adjust_cfa_offset 8 .cfi_offset %r14,-48 .Lsetiv_seh_push_r14: pushq %r15 .cfi_adjust_cfa_offset 8 .cfi_offset %r15,-56 .Lsetiv_seh_push_r15: leaq 0(%rsp),%rbp .cfi_def_cfa_register %rbp .Lsetiv_seh_setfp: .Lsetiv_seh_prolog_end: subq $820,%rsp andq $(-64),%rsp cmpq $12,%rcx je iv_len_12_init_IV vpxor %xmm2,%xmm2,%xmm2 movq %rdx,%r10 movq %rcx,%r11 orq %r11,%r11 jz .L_CALC_AAD_done_mBgdvxqgFGebeug xorq %rbx,%rbx vmovdqa64 SHUF_MASK(%rip),%zmm16 .L_get_AAD_loop48x16_mBgdvxqgFGebeug: cmpq $768,%r11 jl .L_exit_AAD_loop48x16_mBgdvxqgFGebeug vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 testq %rbx,%rbx jnz .L_skip_hkeys_precomputation_EzsAegbBbaerfwt vmovdqu64 288(%rsi),%zmm1 vmovdqu64 %zmm1,704(%rsp) vmovdqu64 224(%rsi),%zmm9 vmovdqu64 %zmm9,640(%rsp) vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 vmovdqu64 160(%rsi),%zmm10 vmovdqu64 %zmm10,576(%rsp) vmovdqu64 96(%rsi),%zmm12 vmovdqu64 %zmm12,512(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,448(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,384(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,320(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,256(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,192(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,128(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,64(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,0(%rsp) .L_skip_hkeys_precomputation_EzsAegbBbaerfwt: movq $1,%rbx vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 0(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 64(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 128(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 192(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 256(%r10),%zmm11 vmovdqu64 320(%r10),%zmm3 vmovdqu64 384(%r10),%zmm4 vmovdqu64 448(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 256(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 320(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 384(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 448(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 512(%r10),%zmm11 vmovdqu64 576(%r10),%zmm3 vmovdqu64 640(%r10),%zmm4 vmovdqu64 704(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 512(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 576(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 640(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 704(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 vpsrldq $4,%xmm9,%xmm9 vpclmulqdq $0x10,%xmm1,%xmm10,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm6,%xmm9,%xmm2 subq $768,%r11 je .L_CALC_AAD_done_mBgdvxqgFGebeug addq $768,%r10 jmp .L_get_AAD_loop48x16_mBgdvxqgFGebeug .L_exit_AAD_loop48x16_mBgdvxqgFGebeug: cmpq $512,%r11 jl .L_less_than_32x16_mBgdvxqgFGebeug vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 testq %rbx,%rbx jnz .L_skip_hkeys_precomputation_xCxmdbgxoCdwefc vmovdqu64 288(%rsi),%zmm1 vmovdqu64 %zmm1,704(%rsp) vmovdqu64 224(%rsi),%zmm9 vmovdqu64 %zmm9,640(%rsp) vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 vmovdqu64 160(%rsi),%zmm10 vmovdqu64 %zmm10,576(%rsp) vmovdqu64 96(%rsi),%zmm12 vmovdqu64 %zmm12,512(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,448(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,384(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,320(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,256(%rsp) .L_skip_hkeys_precomputation_xCxmdbgxoCdwefc: movq $1,%rbx vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 256(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 320(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 384(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 448(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 256(%r10),%zmm11 vmovdqu64 320(%r10),%zmm3 vmovdqu64 384(%r10),%zmm4 vmovdqu64 448(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 512(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 576(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 640(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 704(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 vpsrldq $4,%xmm9,%xmm9 vpclmulqdq $0x10,%xmm1,%xmm10,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm6,%xmm9,%xmm2 subq $512,%r11 je .L_CALC_AAD_done_mBgdvxqgFGebeug addq $512,%r10 jmp .L_less_than_16x16_mBgdvxqgFGebeug .L_less_than_32x16_mBgdvxqgFGebeug: cmpq $256,%r11 jl .L_less_than_16x16_mBgdvxqgFGebeug vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 96(%rsi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 160(%rsi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 224(%rsi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 288(%rsi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 vpsrldq $4,%xmm9,%xmm9 vpclmulqdq $0x10,%xmm1,%xmm10,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm6,%xmm9,%xmm2 subq $256,%r11 je .L_CALC_AAD_done_mBgdvxqgFGebeug addq $256,%r10 .L_less_than_16x16_mBgdvxqgFGebeug: leaq byte64_len_to_mask_table(%rip),%r12 leaq (%r12,%r11,8),%r12 addl $15,%r11d shrl $4,%r11d cmpl $2,%r11d jb .L_AAD_blocks_1_mBgdvxqgFGebeug je .L_AAD_blocks_2_mBgdvxqgFGebeug cmpl $4,%r11d jb .L_AAD_blocks_3_mBgdvxqgFGebeug je .L_AAD_blocks_4_mBgdvxqgFGebeug cmpl $6,%r11d jb .L_AAD_blocks_5_mBgdvxqgFGebeug je .L_AAD_blocks_6_mBgdvxqgFGebeug cmpl $8,%r11d jb .L_AAD_blocks_7_mBgdvxqgFGebeug je .L_AAD_blocks_8_mBgdvxqgFGebeug cmpl $10,%r11d jb .L_AAD_blocks_9_mBgdvxqgFGebeug je .L_AAD_blocks_10_mBgdvxqgFGebeug cmpl $12,%r11d jb .L_AAD_blocks_11_mBgdvxqgFGebeug je .L_AAD_blocks_12_mBgdvxqgFGebeug cmpl $14,%r11d jb .L_AAD_blocks_13_mBgdvxqgFGebeug je .L_AAD_blocks_14_mBgdvxqgFGebeug cmpl $15,%r11d je .L_AAD_blocks_15_mBgdvxqgFGebeug .L_AAD_blocks_16_mBgdvxqgFGebeug: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%zmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 96(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 160(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 224(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm9,%zmm11,%zmm1 vpternlogq $0x96,%zmm10,%zmm3,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm12,%zmm11,%zmm7 vpternlogq $0x96,%zmm13,%zmm3,%zmm8 vmovdqu64 288(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm5,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm5,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm5,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm5,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_mBgdvxqgFGebeug .L_AAD_blocks_15_mBgdvxqgFGebeug: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%zmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 112(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 176(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 240(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 304(%rsi),%ymm15 vinserti64x2 $2,336(%rsi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm5,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm5,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm5,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm5,%zmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_mBgdvxqgFGebeug .L_AAD_blocks_14_mBgdvxqgFGebeug: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%ymm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %ymm16,%ymm5,%ymm5 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 128(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 192(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 256(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 320(%rsi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm5,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm5,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm5,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm5,%ymm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_mBgdvxqgFGebeug .L_AAD_blocks_13_mBgdvxqgFGebeug: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%xmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %xmm16,%xmm5,%xmm5 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 144(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 208(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 272(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 336(%rsi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm5,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm5,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm5,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm5,%xmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_mBgdvxqgFGebeug .L_AAD_blocks_12_mBgdvxqgFGebeug: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 160(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 224(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 288(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_mBgdvxqgFGebeug .L_AAD_blocks_11_mBgdvxqgFGebeug: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 176(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 240(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 304(%rsi),%ymm15 vinserti64x2 $2,336(%rsi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_mBgdvxqgFGebeug .L_AAD_blocks_10_mBgdvxqgFGebeug: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%ymm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %ymm16,%ymm4,%ymm4 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 192(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 256(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 320(%rsi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm4,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm4,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm4,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm4,%ymm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_mBgdvxqgFGebeug .L_AAD_blocks_9_mBgdvxqgFGebeug: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%xmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %xmm16,%xmm4,%xmm4 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 208(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 272(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 336(%rsi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm4,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm4,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm4,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm4,%xmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_mBgdvxqgFGebeug .L_AAD_blocks_8_mBgdvxqgFGebeug: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 224(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 288(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_mBgdvxqgFGebeug .L_AAD_blocks_7_mBgdvxqgFGebeug: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 240(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vmovdqu64 304(%rsi),%ymm15 vinserti64x2 $2,336(%rsi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_mBgdvxqgFGebeug .L_AAD_blocks_6_mBgdvxqgFGebeug: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%ymm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %ymm16,%ymm3,%ymm3 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 256(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vmovdqu64 320(%rsi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm3,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm3,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm3,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm3,%ymm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_mBgdvxqgFGebeug .L_AAD_blocks_5_mBgdvxqgFGebeug: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%xmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %xmm16,%xmm3,%xmm3 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 272(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vmovdqu64 336(%rsi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm3,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm3,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm3,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm3,%xmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_mBgdvxqgFGebeug .L_AAD_blocks_4_mBgdvxqgFGebeug: kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 288(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_mBgdvxqgFGebeug .L_AAD_blocks_3_mBgdvxqgFGebeug: kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 304(%rsi),%ymm15 vinserti64x2 $2,336(%rsi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_mBgdvxqgFGebeug .L_AAD_blocks_2_mBgdvxqgFGebeug: kmovq (%r12),%k1 vmovdqu8 0(%r10),%ymm11{%k1}{z} vpshufb %ymm16,%ymm11,%ymm11 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 320(%rsi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm11,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm11,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm11,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm11,%ymm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_mBgdvxqgFGebeug .L_AAD_blocks_1_mBgdvxqgFGebeug: kmovq (%r12),%k1 vmovdqu8 0(%r10),%xmm11{%k1}{z} vpshufb %xmm16,%xmm11,%xmm11 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 336(%rsi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm11,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm11,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm11,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm11,%xmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 .L_CALC_AAD_done_mBgdvxqgFGebeug: movq %rcx,%r10 shlq $3,%r10 vmovq %r10,%xmm3 vpxorq %xmm2,%xmm3,%xmm2 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x11,%xmm1,%xmm2,%xmm11 vpclmulqdq $0x00,%xmm1,%xmm2,%xmm3 vpclmulqdq $0x01,%xmm1,%xmm2,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm2,%xmm2 vpxorq %xmm4,%xmm2,%xmm2 vpsrldq $8,%xmm2,%xmm4 vpslldq $8,%xmm2,%xmm2 vpxorq %xmm4,%xmm11,%xmm11 vpxorq %xmm3,%xmm2,%xmm2 vmovdqu64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm2,%xmm4,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm2,%xmm2 vpclmulqdq $0x00,%xmm2,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm2,%xmm4,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm3,%xmm11,%xmm2 vpshufb SHUF_MASK(%rip),%xmm2,%xmm2 jmp skip_iv_len_12_init_IV iv_len_12_init_IV: vmovdqu8 ONEf(%rip),%xmm2 movq %rdx,%r11 movl $0x0000000000000fff,%r10d kmovq %r10,%k1 vmovdqu8 (%r11),%xmm2{%k1} skip_iv_len_12_init_IV: vmovdqu %xmm2,%xmm1 movl 240(%rdi),%r10d cmpl $9,%r10d je .Laes_128_wbuuzwjyGbjeaox cmpl $11,%r10d je .Laes_192_wbuuzwjyGbjeaox cmpl $13,%r10d je .Laes_256_wbuuzwjyGbjeaox jmp .Lexit_aes_wbuuzwjyGbjeaox .align 32 .Laes_128_wbuuzwjyGbjeaox: vpxorq 0(%rdi),%xmm1,%xmm1 vaesenc 16(%rdi),%xmm1,%xmm1 vaesenc 32(%rdi),%xmm1,%xmm1 vaesenc 48(%rdi),%xmm1,%xmm1 vaesenc 64(%rdi),%xmm1,%xmm1 vaesenc 80(%rdi),%xmm1,%xmm1 vaesenc 96(%rdi),%xmm1,%xmm1 vaesenc 112(%rdi),%xmm1,%xmm1 vaesenc 128(%rdi),%xmm1,%xmm1 vaesenc 144(%rdi),%xmm1,%xmm1 vaesenclast 160(%rdi),%xmm1,%xmm1 jmp .Lexit_aes_wbuuzwjyGbjeaox .align 32 .Laes_192_wbuuzwjyGbjeaox: vpxorq 0(%rdi),%xmm1,%xmm1 vaesenc 16(%rdi),%xmm1,%xmm1 vaesenc 32(%rdi),%xmm1,%xmm1 vaesenc 48(%rdi),%xmm1,%xmm1 vaesenc 64(%rdi),%xmm1,%xmm1 vaesenc 80(%rdi),%xmm1,%xmm1 vaesenc 96(%rdi),%xmm1,%xmm1 vaesenc 112(%rdi),%xmm1,%xmm1 vaesenc 128(%rdi),%xmm1,%xmm1 vaesenc 144(%rdi),%xmm1,%xmm1 vaesenc 160(%rdi),%xmm1,%xmm1 vaesenc 176(%rdi),%xmm1,%xmm1 vaesenclast 192(%rdi),%xmm1,%xmm1 jmp .Lexit_aes_wbuuzwjyGbjeaox .align 32 .Laes_256_wbuuzwjyGbjeaox: vpxorq 0(%rdi),%xmm1,%xmm1 vaesenc 16(%rdi),%xmm1,%xmm1 vaesenc 32(%rdi),%xmm1,%xmm1 vaesenc 48(%rdi),%xmm1,%xmm1 vaesenc 64(%rdi),%xmm1,%xmm1 vaesenc 80(%rdi),%xmm1,%xmm1 vaesenc 96(%rdi),%xmm1,%xmm1 vaesenc 112(%rdi),%xmm1,%xmm1 vaesenc 128(%rdi),%xmm1,%xmm1 vaesenc 144(%rdi),%xmm1,%xmm1 vaesenc 160(%rdi),%xmm1,%xmm1 vaesenc 176(%rdi),%xmm1,%xmm1 vaesenc 192(%rdi),%xmm1,%xmm1 vaesenc 208(%rdi),%xmm1,%xmm1 vaesenclast 224(%rdi),%xmm1,%xmm1 jmp .Lexit_aes_wbuuzwjyGbjeaox .Lexit_aes_wbuuzwjyGbjeaox: vmovdqu %xmm1,32(%rsi) vpshufb SHUF_MASK(%rip),%xmm2,%xmm2 vmovdqu %xmm2,0(%rsi) cmpq $256,%rcx jbe .Lskip_hkeys_cleanup_pseltoyDnFwppqb vpxor %xmm0,%xmm0,%xmm0 vmovdqa64 %zmm0,0(%rsp) vmovdqa64 %zmm0,64(%rsp) vmovdqa64 %zmm0,128(%rsp) vmovdqa64 %zmm0,192(%rsp) vmovdqa64 %zmm0,256(%rsp) vmovdqa64 %zmm0,320(%rsp) vmovdqa64 %zmm0,384(%rsp) vmovdqa64 %zmm0,448(%rsp) vmovdqa64 %zmm0,512(%rsp) vmovdqa64 %zmm0,576(%rsp) vmovdqa64 %zmm0,640(%rsp) vmovdqa64 %zmm0,704(%rsp) .Lskip_hkeys_cleanup_pseltoyDnFwppqb: vzeroupper leaq (%rbp),%rsp .cfi_def_cfa_register %rsp popq %r15 .cfi_adjust_cfa_offset -8 .cfi_restore %r15 popq %r14 .cfi_adjust_cfa_offset -8 .cfi_restore %r14 popq %r13 .cfi_adjust_cfa_offset -8 .cfi_restore %r13 popq %r12 .cfi_adjust_cfa_offset -8 .cfi_restore %r12 popq %rbp .cfi_adjust_cfa_offset -8 .cfi_restore %rbp popq %rbx .cfi_adjust_cfa_offset -8 .cfi_restore %rbx .Labort_setiv: .byte 0xf3,0xc3 .Lsetiv_seh_end: .cfi_endproc .size ossl_aes_gcm_setiv_avx512, .-ossl_aes_gcm_setiv_avx512 .globl ossl_aes_gcm_update_aad_avx512 .type ossl_aes_gcm_update_aad_avx512,@function .align 32 ossl_aes_gcm_update_aad_avx512: .cfi_startproc .Lghash_seh_begin: .byte 243,15,30,250 pushq %rbx .cfi_adjust_cfa_offset 8 .cfi_offset %rbx,-16 .Lghash_seh_push_rbx: pushq %rbp .cfi_adjust_cfa_offset 8 .cfi_offset %rbp,-24 .Lghash_seh_push_rbp: pushq %r12 .cfi_adjust_cfa_offset 8 .cfi_offset %r12,-32 .Lghash_seh_push_r12: pushq %r13 .cfi_adjust_cfa_offset 8 .cfi_offset %r13,-40 .Lghash_seh_push_r13: pushq %r14 .cfi_adjust_cfa_offset 8 .cfi_offset %r14,-48 .Lghash_seh_push_r14: pushq %r15 .cfi_adjust_cfa_offset 8 .cfi_offset %r15,-56 .Lghash_seh_push_r15: leaq 0(%rsp),%rbp .cfi_def_cfa_register %rbp .Lghash_seh_setfp: .Lghash_seh_prolog_end: subq $820,%rsp andq $(-64),%rsp vmovdqu64 64(%rdi),%xmm14 movq %rsi,%r10 movq %rdx,%r11 orq %r11,%r11 jz .L_CALC_AAD_done_ijFECAxDcrvrgja xorq %rbx,%rbx vmovdqa64 SHUF_MASK(%rip),%zmm16 .L_get_AAD_loop48x16_ijFECAxDcrvrgja: cmpq $768,%r11 jl .L_exit_AAD_loop48x16_ijFECAxDcrvrgja vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 testq %rbx,%rbx jnz .L_skip_hkeys_precomputation_AfEjmfnrFdFcycC vmovdqu64 288(%rdi),%zmm1 vmovdqu64 %zmm1,704(%rsp) vmovdqu64 224(%rdi),%zmm9 vmovdqu64 %zmm9,640(%rsp) vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 vmovdqu64 160(%rdi),%zmm10 vmovdqu64 %zmm10,576(%rsp) vmovdqu64 96(%rdi),%zmm12 vmovdqu64 %zmm12,512(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,448(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,384(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,320(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,256(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,192(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,128(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,64(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,0(%rsp) .L_skip_hkeys_precomputation_AfEjmfnrFdFcycC: movq $1,%rbx vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 0(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 64(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 128(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 192(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 256(%r10),%zmm11 vmovdqu64 320(%r10),%zmm3 vmovdqu64 384(%r10),%zmm4 vmovdqu64 448(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 256(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 320(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 384(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 448(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 512(%r10),%zmm11 vmovdqu64 576(%r10),%zmm3 vmovdqu64 640(%r10),%zmm4 vmovdqu64 704(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 512(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 576(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 640(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 704(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 vpsrldq $4,%xmm9,%xmm9 vpclmulqdq $0x10,%xmm1,%xmm10,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm6,%xmm9,%xmm14 subq $768,%r11 je .L_CALC_AAD_done_ijFECAxDcrvrgja addq $768,%r10 jmp .L_get_AAD_loop48x16_ijFECAxDcrvrgja .L_exit_AAD_loop48x16_ijFECAxDcrvrgja: cmpq $512,%r11 jl .L_less_than_32x16_ijFECAxDcrvrgja vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 testq %rbx,%rbx jnz .L_skip_hkeys_precomputation_kvsjACAeAekBEdd vmovdqu64 288(%rdi),%zmm1 vmovdqu64 %zmm1,704(%rsp) vmovdqu64 224(%rdi),%zmm9 vmovdqu64 %zmm9,640(%rsp) vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 vmovdqu64 160(%rdi),%zmm10 vmovdqu64 %zmm10,576(%rsp) vmovdqu64 96(%rdi),%zmm12 vmovdqu64 %zmm12,512(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,448(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,384(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,320(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,256(%rsp) .L_skip_hkeys_precomputation_kvsjACAeAekBEdd: movq $1,%rbx vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 256(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 320(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 384(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 448(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 256(%r10),%zmm11 vmovdqu64 320(%r10),%zmm3 vmovdqu64 384(%r10),%zmm4 vmovdqu64 448(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 512(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 576(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 640(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 704(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 vpsrldq $4,%xmm9,%xmm9 vpclmulqdq $0x10,%xmm1,%xmm10,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm6,%xmm9,%xmm14 subq $512,%r11 je .L_CALC_AAD_done_ijFECAxDcrvrgja addq $512,%r10 jmp .L_less_than_16x16_ijFECAxDcrvrgja .L_less_than_32x16_ijFECAxDcrvrgja: cmpq $256,%r11 jl .L_less_than_16x16_ijFECAxDcrvrgja vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 96(%rdi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 160(%rdi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 224(%rdi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 288(%rdi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 vpsrldq $4,%xmm9,%xmm9 vpclmulqdq $0x10,%xmm1,%xmm10,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm6,%xmm9,%xmm14 subq $256,%r11 je .L_CALC_AAD_done_ijFECAxDcrvrgja addq $256,%r10 .L_less_than_16x16_ijFECAxDcrvrgja: leaq byte64_len_to_mask_table(%rip),%r12 leaq (%r12,%r11,8),%r12 addl $15,%r11d shrl $4,%r11d cmpl $2,%r11d jb .L_AAD_blocks_1_ijFECAxDcrvrgja je .L_AAD_blocks_2_ijFECAxDcrvrgja cmpl $4,%r11d jb .L_AAD_blocks_3_ijFECAxDcrvrgja je .L_AAD_blocks_4_ijFECAxDcrvrgja cmpl $6,%r11d jb .L_AAD_blocks_5_ijFECAxDcrvrgja je .L_AAD_blocks_6_ijFECAxDcrvrgja cmpl $8,%r11d jb .L_AAD_blocks_7_ijFECAxDcrvrgja je .L_AAD_blocks_8_ijFECAxDcrvrgja cmpl $10,%r11d jb .L_AAD_blocks_9_ijFECAxDcrvrgja je .L_AAD_blocks_10_ijFECAxDcrvrgja cmpl $12,%r11d jb .L_AAD_blocks_11_ijFECAxDcrvrgja je .L_AAD_blocks_12_ijFECAxDcrvrgja cmpl $14,%r11d jb .L_AAD_blocks_13_ijFECAxDcrvrgja je .L_AAD_blocks_14_ijFECAxDcrvrgja cmpl $15,%r11d je .L_AAD_blocks_15_ijFECAxDcrvrgja .L_AAD_blocks_16_ijFECAxDcrvrgja: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%zmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 96(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 160(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 224(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm9,%zmm11,%zmm1 vpternlogq $0x96,%zmm10,%zmm3,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm12,%zmm11,%zmm7 vpternlogq $0x96,%zmm13,%zmm3,%zmm8 vmovdqu64 288(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm5,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm5,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm5,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm5,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_ijFECAxDcrvrgja .L_AAD_blocks_15_ijFECAxDcrvrgja: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%zmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 112(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 176(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 240(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 304(%rdi),%ymm15 vinserti64x2 $2,336(%rdi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm5,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm5,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm5,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm5,%zmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_ijFECAxDcrvrgja .L_AAD_blocks_14_ijFECAxDcrvrgja: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%ymm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %ymm16,%ymm5,%ymm5 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 128(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 192(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 256(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 320(%rdi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm5,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm5,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm5,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm5,%ymm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_ijFECAxDcrvrgja .L_AAD_blocks_13_ijFECAxDcrvrgja: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%xmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %xmm16,%xmm5,%xmm5 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 144(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 208(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 272(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 336(%rdi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm5,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm5,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm5,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm5,%xmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_ijFECAxDcrvrgja .L_AAD_blocks_12_ijFECAxDcrvrgja: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 160(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 224(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 288(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_ijFECAxDcrvrgja .L_AAD_blocks_11_ijFECAxDcrvrgja: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 176(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 240(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 304(%rdi),%ymm15 vinserti64x2 $2,336(%rdi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_ijFECAxDcrvrgja .L_AAD_blocks_10_ijFECAxDcrvrgja: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%ymm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %ymm16,%ymm4,%ymm4 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 192(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 256(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 320(%rdi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm4,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm4,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm4,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm4,%ymm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_ijFECAxDcrvrgja .L_AAD_blocks_9_ijFECAxDcrvrgja: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%xmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %xmm16,%xmm4,%xmm4 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 208(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 272(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 336(%rdi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm4,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm4,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm4,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm4,%xmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_ijFECAxDcrvrgja .L_AAD_blocks_8_ijFECAxDcrvrgja: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 224(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 288(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_ijFECAxDcrvrgja .L_AAD_blocks_7_ijFECAxDcrvrgja: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 240(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vmovdqu64 304(%rdi),%ymm15 vinserti64x2 $2,336(%rdi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_ijFECAxDcrvrgja .L_AAD_blocks_6_ijFECAxDcrvrgja: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%ymm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %ymm16,%ymm3,%ymm3 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 256(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vmovdqu64 320(%rdi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm3,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm3,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm3,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm3,%ymm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_ijFECAxDcrvrgja .L_AAD_blocks_5_ijFECAxDcrvrgja: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%xmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %xmm16,%xmm3,%xmm3 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 272(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vmovdqu64 336(%rdi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm3,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm3,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm3,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm3,%xmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_ijFECAxDcrvrgja .L_AAD_blocks_4_ijFECAxDcrvrgja: kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 288(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_ijFECAxDcrvrgja .L_AAD_blocks_3_ijFECAxDcrvrgja: kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 304(%rdi),%ymm15 vinserti64x2 $2,336(%rdi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_ijFECAxDcrvrgja .L_AAD_blocks_2_ijFECAxDcrvrgja: kmovq (%r12),%k1 vmovdqu8 0(%r10),%ymm11{%k1}{z} vpshufb %ymm16,%ymm11,%ymm11 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 320(%rdi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm11,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm11,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm11,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm11,%ymm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_ijFECAxDcrvrgja .L_AAD_blocks_1_ijFECAxDcrvrgja: kmovq (%r12),%k1 vmovdqu8 0(%r10),%xmm11{%k1}{z} vpshufb %xmm16,%xmm11,%xmm11 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 336(%rdi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm11,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm11,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm11,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm11,%xmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 .L_CALC_AAD_done_ijFECAxDcrvrgja: vmovdqu64 %xmm14,64(%rdi) cmpq $256,%rdx jbe .Lskip_hkeys_cleanup_qbvewaDGpzpiiAA vpxor %xmm0,%xmm0,%xmm0 vmovdqa64 %zmm0,0(%rsp) vmovdqa64 %zmm0,64(%rsp) vmovdqa64 %zmm0,128(%rsp) vmovdqa64 %zmm0,192(%rsp) vmovdqa64 %zmm0,256(%rsp) vmovdqa64 %zmm0,320(%rsp) vmovdqa64 %zmm0,384(%rsp) vmovdqa64 %zmm0,448(%rsp) vmovdqa64 %zmm0,512(%rsp) vmovdqa64 %zmm0,576(%rsp) vmovdqa64 %zmm0,640(%rsp) vmovdqa64 %zmm0,704(%rsp) .Lskip_hkeys_cleanup_qbvewaDGpzpiiAA: vzeroupper leaq (%rbp),%rsp .cfi_def_cfa_register %rsp popq %r15 .cfi_adjust_cfa_offset -8 .cfi_restore %r15 popq %r14 .cfi_adjust_cfa_offset -8 .cfi_restore %r14 popq %r13 .cfi_adjust_cfa_offset -8 .cfi_restore %r13 popq %r12 .cfi_adjust_cfa_offset -8 .cfi_restore %r12 popq %rbp .cfi_adjust_cfa_offset -8 .cfi_restore %rbp popq %rbx .cfi_adjust_cfa_offset -8 .cfi_restore %rbx .Lexit_update_aad: .byte 0xf3,0xc3 .Lghash_seh_end: .cfi_endproc .size ossl_aes_gcm_update_aad_avx512, .-ossl_aes_gcm_update_aad_avx512 .globl ossl_aes_gcm_encrypt_avx512 .type ossl_aes_gcm_encrypt_avx512,@function .align 32 ossl_aes_gcm_encrypt_avx512: .cfi_startproc .Lencrypt_seh_begin: .byte 243,15,30,250 pushq %rbx .cfi_adjust_cfa_offset 8 .cfi_offset %rbx,-16 .Lencrypt_seh_push_rbx: pushq %rbp .cfi_adjust_cfa_offset 8 .cfi_offset %rbp,-24 .Lencrypt_seh_push_rbp: pushq %r12 .cfi_adjust_cfa_offset 8 .cfi_offset %r12,-32 .Lencrypt_seh_push_r12: pushq %r13 .cfi_adjust_cfa_offset 8 .cfi_offset %r13,-40 .Lencrypt_seh_push_r13: pushq %r14 .cfi_adjust_cfa_offset 8 .cfi_offset %r14,-48 .Lencrypt_seh_push_r14: pushq %r15 .cfi_adjust_cfa_offset 8 .cfi_offset %r15,-56 .Lencrypt_seh_push_r15: leaq 0(%rsp),%rbp .cfi_def_cfa_register %rbp .Lencrypt_seh_setfp: .Lencrypt_seh_prolog_end: subq $1588,%rsp andq $(-64),%rsp movl 240(%rdi),%eax cmpl $9,%eax je .Laes_gcm_encrypt_128_avx512 cmpl $11,%eax je .Laes_gcm_encrypt_192_avx512 cmpl $13,%eax je .Laes_gcm_encrypt_256_avx512 xorl %eax,%eax jmp .Lexit_gcm_encrypt .align 32 .Laes_gcm_encrypt_128_avx512: orq %r8,%r8 je .L_enc_dec_done_pdDdEbGtmhbgzzj xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 movq (%rdx),%r11 orq %r11,%r11 je .L_partial_block_done_pxhfCnBixjkllFd movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 vmovdqu64 336(%rsi),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %r11,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%r11,1),%r13 subq $16,%r13 jge .L_no_extra_mask_pxhfCnBixjkllFd subq %r13,%r12 .L_no_extra_mask_pxhfCnBixjkllFd: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm3,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_pxhfCnBixjkllFd vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 vpsrldq $4,%xmm10,%xmm10 vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movq $0,(%rdx) movq %r11,%r12 movq $16,%r11 subq %r12,%r11 jmp .L_enc_dec_done_pxhfCnBixjkllFd .L_partial_incomplete_pxhfCnBixjkllFd: addq %r8,(%rdx) movq %r8,%r11 .L_enc_dec_done_pxhfCnBixjkllFd: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%r11,2),%k1 vmovdqu64 %xmm14,64(%rsi) vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_pxhfCnBixjkllFd: vmovdqu64 0(%rsi),%xmm2 subq %r11,%r8 je .L_enc_dec_done_pdDdEbGtmhbgzzj cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_pdDdEbGtmhbgzzj vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_mapiDClopxEitar vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_mapiDClopxEitar .L_next_16_overflow_mapiDClopxEitar: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_mapiDClopxEitar: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%r11,1),%zmm0 vmovdqu8 64(%rcx,%r11,1),%zmm3 vmovdqu8 128(%rcx,%r11,1),%zmm4 vmovdqu8 192(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%r11,1) vmovdqu8 %zmm10,64(%r10,%r11,1) vmovdqu8 %zmm11,128(%r10,%r11,1) vmovdqu8 %zmm12,192(%r10,%r11,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_wEgffnstFkkCiax vmovdqu64 288(%rsi),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 224(%rsi),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 160(%rsi),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 96(%rsi),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_wEgffnstFkkCiax: cmpq $512,%r8 jb .L_message_below_32_blocks_pdDdEbGtmhbgzzj cmpb $240,%r15b jae .L_next_16_overflow_lzgFuCogmBcsocA vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_lzgFuCogmBcsocA .L_next_16_overflow_lzgFuCogmBcsocA: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_lzgFuCogmBcsocA: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%r11,1),%zmm0 vmovdqu8 320(%rcx,%r11,1),%zmm3 vmovdqu8 384(%rcx,%r11,1),%zmm4 vmovdqu8 448(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%r11,1) vmovdqu8 %zmm10,320(%r10,%r11,1) vmovdqu8 %zmm11,384(%r10,%r11,1) vmovdqu8 %zmm12,448(%r10,%r11,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_fxgusndxuFFGjih vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_fxgusndxuFFGjih: movq $1,%r14 addq $512,%r11 subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_pdDdEbGtmhbgzzj .L_encrypt_big_nblocks_pdDdEbGtmhbgzzj: cmpb $240,%r15b jae .L_16_blocks_overflow_ibqhltvwwkyjEta vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_ibqhltvwwkyjEta .L_16_blocks_overflow_ibqhltvwwkyjEta: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_ibqhltvwwkyjEta: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_cEaavogFAbujiEy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_cEaavogFAbujiEy .L_16_blocks_overflow_cEaavogFAbujiEy: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_cEaavogFAbujiEy: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_usjsvymwkviypdp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_usjsvymwkviypdp .L_16_blocks_overflow_usjsvymwkviypdp: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_usjsvymwkviypdp: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%r11,1),%zmm17 vmovdqu8 576(%rcx,%r11,1),%zmm19 vmovdqu8 640(%rcx,%r11,1),%zmm20 vmovdqu8 704(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%r11,1) vmovdqu8 %zmm3,576(%r10,%r11,1) vmovdqu8 %zmm4,640(%r10,%r11,1) vmovdqu8 %zmm5,704(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%r11 subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_pdDdEbGtmhbgzzj .L_no_more_big_nblocks_pdDdEbGtmhbgzzj: cmpq $512,%r8 jae .L_encrypt_32_blocks_pdDdEbGtmhbgzzj cmpq $256,%r8 jae .L_encrypt_16_blocks_pdDdEbGtmhbgzzj .L_encrypt_0_blocks_ghash_32_pdDdEbGtmhbgzzj: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_ikhdrkemcGbqzad cmpl $8,%r10d je .L_last_num_blocks_is_8_ikhdrkemcGbqzad jb .L_last_num_blocks_is_7_1_ikhdrkemcGbqzad cmpl $12,%r10d je .L_last_num_blocks_is_12_ikhdrkemcGbqzad jb .L_last_num_blocks_is_11_9_ikhdrkemcGbqzad cmpl $15,%r10d je .L_last_num_blocks_is_15_ikhdrkemcGbqzad ja .L_last_num_blocks_is_16_ikhdrkemcGbqzad cmpl $14,%r10d je .L_last_num_blocks_is_14_ikhdrkemcGbqzad jmp .L_last_num_blocks_is_13_ikhdrkemcGbqzad .L_last_num_blocks_is_11_9_ikhdrkemcGbqzad: cmpl $10,%r10d je .L_last_num_blocks_is_10_ikhdrkemcGbqzad ja .L_last_num_blocks_is_11_ikhdrkemcGbqzad jmp .L_last_num_blocks_is_9_ikhdrkemcGbqzad .L_last_num_blocks_is_7_1_ikhdrkemcGbqzad: cmpl $4,%r10d je .L_last_num_blocks_is_4_ikhdrkemcGbqzad jb .L_last_num_blocks_is_3_1_ikhdrkemcGbqzad cmpl $6,%r10d ja .L_last_num_blocks_is_7_ikhdrkemcGbqzad je .L_last_num_blocks_is_6_ikhdrkemcGbqzad jmp .L_last_num_blocks_is_5_ikhdrkemcGbqzad .L_last_num_blocks_is_3_1_ikhdrkemcGbqzad: cmpl $2,%r10d ja .L_last_num_blocks_is_3_ikhdrkemcGbqzad je .L_last_num_blocks_is_2_ikhdrkemcGbqzad .L_last_num_blocks_is_1_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_itDorffzaCkryqj vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_itDorffzaCkryqj .L_16_blocks_overflow_itDorffzaCkryqj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_itDorffzaCkryqj: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_wcppwgxpbwxBCxm subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wcppwgxpbwxBCxm .L_small_initial_partial_block_wcppwgxpbwxBCxm: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_wcppwgxpbwxBCxm .L_small_initial_compute_done_wcppwgxpbwxBCxm: .L_after_reduction_wcppwgxpbwxBCxm: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_2_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_udFwtdnCnceudlw vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_udFwtdnCnceudlw .L_16_blocks_overflow_udFwtdnCnceudlw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_udFwtdnCnceudlw: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pBaBAiGArbidqBv subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pBaBAiGArbidqBv .L_small_initial_partial_block_pBaBAiGArbidqBv: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pBaBAiGArbidqBv: orq %r8,%r8 je .L_after_reduction_pBaBAiGArbidqBv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pBaBAiGArbidqBv: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_3_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_mnDuevixjjefvof vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_mnDuevixjjefvof .L_16_blocks_overflow_mnDuevixjjefvof: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_mnDuevixjjefvof: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yatvknGgscybvGg subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yatvknGgscybvGg .L_small_initial_partial_block_yatvknGgscybvGg: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yatvknGgscybvGg: orq %r8,%r8 je .L_after_reduction_yatvknGgscybvGg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yatvknGgscybvGg: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_4_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_vsajDEszBaAzgFt vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_vsajDEszBaAzgFt .L_16_blocks_overflow_vsajDEszBaAzgFt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_vsajDEszBaAzgFt: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tchAiplfgmzAeEo subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tchAiplfgmzAeEo .L_small_initial_partial_block_tchAiplfgmzAeEo: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tchAiplfgmzAeEo: orq %r8,%r8 je .L_after_reduction_tchAiplfgmzAeEo vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tchAiplfgmzAeEo: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_5_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_cxtFqdnzBjmtkGn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_cxtFqdnzBjmtkGn .L_16_blocks_overflow_cxtFqdnzBjmtkGn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_cxtFqdnzBjmtkGn: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EdeEenqDBtzbplp subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EdeEenqDBtzbplp .L_small_initial_partial_block_EdeEenqDBtzbplp: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EdeEenqDBtzbplp: orq %r8,%r8 je .L_after_reduction_EdeEenqDBtzbplp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EdeEenqDBtzbplp: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_6_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_jwkFAEiBkzxclcz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_jwkFAEiBkzxclcz .L_16_blocks_overflow_jwkFAEiBkzxclcz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_jwkFAEiBkzxclcz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lBhDyvvhkrxyrza subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lBhDyvvhkrxyrza .L_small_initial_partial_block_lBhDyvvhkrxyrza: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lBhDyvvhkrxyrza: orq %r8,%r8 je .L_after_reduction_lBhDyvvhkrxyrza vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lBhDyvvhkrxyrza: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_7_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_uGexndlCfdoqjpe vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_uGexndlCfdoqjpe .L_16_blocks_overflow_uGexndlCfdoqjpe: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_uGexndlCfdoqjpe: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_Bxunmhnvmncxhcy subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_Bxunmhnvmncxhcy .L_small_initial_partial_block_Bxunmhnvmncxhcy: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_Bxunmhnvmncxhcy: orq %r8,%r8 je .L_after_reduction_Bxunmhnvmncxhcy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_Bxunmhnvmncxhcy: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_8_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_vudwsyfxfgECgcf vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_vudwsyfxfgECgcf .L_16_blocks_overflow_vudwsyfxfgECgcf: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_vudwsyfxfgECgcf: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rvqyhsdrhoanuka subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rvqyhsdrhoanuka .L_small_initial_partial_block_rvqyhsdrhoanuka: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rvqyhsdrhoanuka: orq %r8,%r8 je .L_after_reduction_rvqyhsdrhoanuka vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rvqyhsdrhoanuka: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_9_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_mrBoGdbnxnwlkxC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_mrBoGdbnxnwlkxC .L_16_blocks_overflow_mrBoGdbnxnwlkxC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_mrBoGdbnxnwlkxC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tuyribkvmwGnBux subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tuyribkvmwGnBux .L_small_initial_partial_block_tuyribkvmwGnBux: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tuyribkvmwGnBux: orq %r8,%r8 je .L_after_reduction_tuyribkvmwGnBux vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tuyribkvmwGnBux: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_10_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_lgaFjCbzqlskvnC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_lgaFjCbzqlskvnC .L_16_blocks_overflow_lgaFjCbzqlskvnC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_lgaFjCbzqlskvnC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_doFvvyygahavAuD subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_doFvvyygahavAuD .L_small_initial_partial_block_doFvvyygahavAuD: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_doFvvyygahavAuD: orq %r8,%r8 je .L_after_reduction_doFvvyygahavAuD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_doFvvyygahavAuD: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_11_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_wnveeoCoFhnAsjr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_wnveeoCoFhnAsjr .L_16_blocks_overflow_wnveeoCoFhnAsjr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_wnveeoCoFhnAsjr: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_okdqxckEysfDiGw subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_okdqxckEysfDiGw .L_small_initial_partial_block_okdqxckEysfDiGw: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_okdqxckEysfDiGw: orq %r8,%r8 je .L_after_reduction_okdqxckEysfDiGw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_okdqxckEysfDiGw: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_12_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_aeCekhphkkfCGlp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_aeCekhphkkfCGlp .L_16_blocks_overflow_aeCekhphkkfCGlp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_aeCekhphkkfCGlp: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tAjudiknsDunngB subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tAjudiknsDunngB .L_small_initial_partial_block_tAjudiknsDunngB: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tAjudiknsDunngB: orq %r8,%r8 je .L_after_reduction_tAjudiknsDunngB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tAjudiknsDunngB: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_13_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_vFhoejiyDCGCfdw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_vFhoejiyDCGCfdw .L_16_blocks_overflow_vFhoejiyDCGCfdw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_vFhoejiyDCGCfdw: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_svrobwfwdbaDnCx subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_svrobwfwdbaDnCx .L_small_initial_partial_block_svrobwfwdbaDnCx: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_svrobwfwdbaDnCx: orq %r8,%r8 je .L_after_reduction_svrobwfwdbaDnCx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_svrobwfwdbaDnCx: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_14_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_hgwwfomjsnxunhr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_hgwwfomjsnxunhr .L_16_blocks_overflow_hgwwfomjsnxunhr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_hgwwfomjsnxunhr: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_seAkuxixhdBEdfz subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_seAkuxixhdBEdfz .L_small_initial_partial_block_seAkuxixhdBEdfz: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_seAkuxixhdBEdfz: orq %r8,%r8 je .L_after_reduction_seAkuxixhdBEdfz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_seAkuxixhdBEdfz: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_15_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_wbagfdFdigxytjj vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_wbagfdFdigxytjj .L_16_blocks_overflow_wbagfdFdigxytjj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_wbagfdFdigxytjj: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ChmDFBmjkjBuetv subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ChmDFBmjkjBuetv .L_small_initial_partial_block_ChmDFBmjkjBuetv: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ChmDFBmjkjBuetv: orq %r8,%r8 je .L_after_reduction_ChmDFBmjkjBuetv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ChmDFBmjkjBuetv: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_16_ikhdrkemcGbqzad: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_dkuzxAGzynhzFCe vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_dkuzxAGzynhzFCe .L_16_blocks_overflow_dkuzxAGzynhzFCe: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_dkuzxAGzynhzFCe: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_vtbrvsizdbGzbGo: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vtbrvsizdbGzbGo: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vtbrvsizdbGzbGo: jmp .L_last_blocks_done_ikhdrkemcGbqzad .L_last_num_blocks_is_0_ikhdrkemcGbqzad: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_ikhdrkemcGbqzad: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_pdDdEbGtmhbgzzj .L_encrypt_32_blocks_pdDdEbGtmhbgzzj: cmpb $240,%r15b jae .L_16_blocks_overflow_DpBiAfvjdcateGm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_DpBiAfvjdcateGm .L_16_blocks_overflow_DpBiAfvjdcateGm: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_DpBiAfvjdcateGm: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_pnochsioawayaBr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_pnochsioawayaBr .L_16_blocks_overflow_pnochsioawayaBr: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_pnochsioawayaBr: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%r11 movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_nqBvobwmcxocojb cmpl $8,%r10d je .L_last_num_blocks_is_8_nqBvobwmcxocojb jb .L_last_num_blocks_is_7_1_nqBvobwmcxocojb cmpl $12,%r10d je .L_last_num_blocks_is_12_nqBvobwmcxocojb jb .L_last_num_blocks_is_11_9_nqBvobwmcxocojb cmpl $15,%r10d je .L_last_num_blocks_is_15_nqBvobwmcxocojb ja .L_last_num_blocks_is_16_nqBvobwmcxocojb cmpl $14,%r10d je .L_last_num_blocks_is_14_nqBvobwmcxocojb jmp .L_last_num_blocks_is_13_nqBvobwmcxocojb .L_last_num_blocks_is_11_9_nqBvobwmcxocojb: cmpl $10,%r10d je .L_last_num_blocks_is_10_nqBvobwmcxocojb ja .L_last_num_blocks_is_11_nqBvobwmcxocojb jmp .L_last_num_blocks_is_9_nqBvobwmcxocojb .L_last_num_blocks_is_7_1_nqBvobwmcxocojb: cmpl $4,%r10d je .L_last_num_blocks_is_4_nqBvobwmcxocojb jb .L_last_num_blocks_is_3_1_nqBvobwmcxocojb cmpl $6,%r10d ja .L_last_num_blocks_is_7_nqBvobwmcxocojb je .L_last_num_blocks_is_6_nqBvobwmcxocojb jmp .L_last_num_blocks_is_5_nqBvobwmcxocojb .L_last_num_blocks_is_3_1_nqBvobwmcxocojb: cmpl $2,%r10d ja .L_last_num_blocks_is_3_nqBvobwmcxocojb je .L_last_num_blocks_is_2_nqBvobwmcxocojb .L_last_num_blocks_is_1_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_iGlCGEwegGzFhtA vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_iGlCGEwegGzFhtA .L_16_blocks_overflow_iGlCGEwegGzFhtA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_iGlCGEwegGzFhtA: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_hFBzlBjpABAteEq subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hFBzlBjpABAteEq .L_small_initial_partial_block_hFBzlBjpABAteEq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_hFBzlBjpABAteEq .L_small_initial_compute_done_hFBzlBjpABAteEq: .L_after_reduction_hFBzlBjpABAteEq: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_2_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_BwDxojfsymCmEeo vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_BwDxojfsymCmEeo .L_16_blocks_overflow_BwDxojfsymCmEeo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_BwDxojfsymCmEeo: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ujnyckFGoBmGvAD subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ujnyckFGoBmGvAD .L_small_initial_partial_block_ujnyckFGoBmGvAD: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ujnyckFGoBmGvAD: orq %r8,%r8 je .L_after_reduction_ujnyckFGoBmGvAD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ujnyckFGoBmGvAD: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_3_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_ArGalqGfmEgtzdC vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_ArGalqGfmEgtzdC .L_16_blocks_overflow_ArGalqGfmEgtzdC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_ArGalqGfmEgtzdC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tlDwADlnmmFjwlt subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tlDwADlnmmFjwlt .L_small_initial_partial_block_tlDwADlnmmFjwlt: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tlDwADlnmmFjwlt: orq %r8,%r8 je .L_after_reduction_tlDwADlnmmFjwlt vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tlDwADlnmmFjwlt: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_4_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_eiFwyntDmEqyCDx vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_eiFwyntDmEqyCDx .L_16_blocks_overflow_eiFwyntDmEqyCDx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_eiFwyntDmEqyCDx: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zAosBwqfDyjcdyb subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zAosBwqfDyjcdyb .L_small_initial_partial_block_zAosBwqfDyjcdyb: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zAosBwqfDyjcdyb: orq %r8,%r8 je .L_after_reduction_zAosBwqfDyjcdyb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zAosBwqfDyjcdyb: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_5_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_bAoFucDcpblzDdt vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_bAoFucDcpblzDdt .L_16_blocks_overflow_bAoFucDcpblzDdt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_bAoFucDcpblzDdt: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_icuaypakFrCovoy subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_icuaypakFrCovoy .L_small_initial_partial_block_icuaypakFrCovoy: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_icuaypakFrCovoy: orq %r8,%r8 je .L_after_reduction_icuaypakFrCovoy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_icuaypakFrCovoy: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_6_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_nBxnDvEEtcfmmpA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_nBxnDvEEtcfmmpA .L_16_blocks_overflow_nBxnDvEEtcfmmpA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_nBxnDvEEtcfmmpA: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_oBDgqvmqflGBdts subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_oBDgqvmqflGBdts .L_small_initial_partial_block_oBDgqvmqflGBdts: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_oBDgqvmqflGBdts: orq %r8,%r8 je .L_after_reduction_oBDgqvmqflGBdts vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_oBDgqvmqflGBdts: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_7_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_ktiEwgDjzbqnlgA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_ktiEwgDjzbqnlgA .L_16_blocks_overflow_ktiEwgDjzbqnlgA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_ktiEwgDjzbqnlgA: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rhqzwAqatoAowvt subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rhqzwAqatoAowvt .L_small_initial_partial_block_rhqzwAqatoAowvt: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rhqzwAqatoAowvt: orq %r8,%r8 je .L_after_reduction_rhqzwAqatoAowvt vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rhqzwAqatoAowvt: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_8_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_ppdpbjvaqFskcDy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_ppdpbjvaqFskcDy .L_16_blocks_overflow_ppdpbjvaqFskcDy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_ppdpbjvaqFskcDy: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hghryxmwctxcEsx subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hghryxmwctxcEsx .L_small_initial_partial_block_hghryxmwctxcEsx: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hghryxmwctxcEsx: orq %r8,%r8 je .L_after_reduction_hghryxmwctxcEsx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hghryxmwctxcEsx: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_9_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_ssqyutccxCiqEfp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_ssqyutccxCiqEfp .L_16_blocks_overflow_ssqyutccxCiqEfp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_ssqyutccxCiqEfp: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dkgcmoCccqwinCj subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dkgcmoCccqwinCj .L_small_initial_partial_block_dkgcmoCccqwinCj: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dkgcmoCccqwinCj: orq %r8,%r8 je .L_after_reduction_dkgcmoCccqwinCj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dkgcmoCccqwinCj: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_10_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_qrrfwGAzztwabql vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_qrrfwGAzztwabql .L_16_blocks_overflow_qrrfwGAzztwabql: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_qrrfwGAzztwabql: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ioCDffAzuDvuFmD subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ioCDffAzuDvuFmD .L_small_initial_partial_block_ioCDffAzuDvuFmD: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ioCDffAzuDvuFmD: orq %r8,%r8 je .L_after_reduction_ioCDffAzuDvuFmD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ioCDffAzuDvuFmD: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_11_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_BFnbwbbsiwGDDCn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_BFnbwbbsiwGDDCn .L_16_blocks_overflow_BFnbwbbsiwGDDCn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_BFnbwbbsiwGDDCn: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_cCoGeiFGozAwFew subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_cCoGeiFGozAwFew .L_small_initial_partial_block_cCoGeiFGozAwFew: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cCoGeiFGozAwFew: orq %r8,%r8 je .L_after_reduction_cCoGeiFGozAwFew vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_cCoGeiFGozAwFew: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_12_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_haBiqFbjgxpdzpn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_haBiqFbjgxpdzpn .L_16_blocks_overflow_haBiqFbjgxpdzpn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_haBiqFbjgxpdzpn: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nhbrtEjyiFhswCq subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nhbrtEjyiFhswCq .L_small_initial_partial_block_nhbrtEjyiFhswCq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nhbrtEjyiFhswCq: orq %r8,%r8 je .L_after_reduction_nhbrtEjyiFhswCq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nhbrtEjyiFhswCq: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_13_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_aDaGBFBAaojGGGj vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_aDaGBFBAaojGGGj .L_16_blocks_overflow_aDaGBFBAaojGGGj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_aDaGBFBAaojGGGj: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mozkzBtivrcvtEk subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mozkzBtivrcvtEk .L_small_initial_partial_block_mozkzBtivrcvtEk: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mozkzBtivrcvtEk: orq %r8,%r8 je .L_after_reduction_mozkzBtivrcvtEk vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_mozkzBtivrcvtEk: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_14_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_tAnEojledvrxyjr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_tAnEojledvrxyjr .L_16_blocks_overflow_tAnEojledvrxyjr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_tAnEojledvrxyjr: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FdkjoDukspwasBA subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FdkjoDukspwasBA .L_small_initial_partial_block_FdkjoDukspwasBA: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FdkjoDukspwasBA: orq %r8,%r8 je .L_after_reduction_FdkjoDukspwasBA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FdkjoDukspwasBA: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_15_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_EocAcwAEiGzmbor vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_EocAcwAEiGzmbor .L_16_blocks_overflow_EocAcwAEiGzmbor: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_EocAcwAEiGzmbor: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ioeijxfuGydnlim subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ioeijxfuGydnlim .L_small_initial_partial_block_ioeijxfuGydnlim: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ioeijxfuGydnlim: orq %r8,%r8 je .L_after_reduction_ioeijxfuGydnlim vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ioeijxfuGydnlim: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_16_nqBvobwmcxocojb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_uDqoqnyAqaujFth vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_uDqoqnyAqaujFth .L_16_blocks_overflow_uDqoqnyAqaujFth: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_uDqoqnyAqaujFth: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_rpjttlmmCtxqtrD: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rpjttlmmCtxqtrD: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rpjttlmmCtxqtrD: jmp .L_last_blocks_done_nqBvobwmcxocojb .L_last_num_blocks_is_0_nqBvobwmcxocojb: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_nqBvobwmcxocojb: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_pdDdEbGtmhbgzzj .L_encrypt_16_blocks_pdDdEbGtmhbgzzj: cmpb $240,%r15b jae .L_16_blocks_overflow_mlfnqsfcdbpAAfz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_mlfnqsfcdbpAAfz .L_16_blocks_overflow_mlfnqsfcdbpAAfz: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_mlfnqsfcdbpAAfz: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%r11 movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_hommwsmBDghhsCD cmpl $8,%r10d je .L_last_num_blocks_is_8_hommwsmBDghhsCD jb .L_last_num_blocks_is_7_1_hommwsmBDghhsCD cmpl $12,%r10d je .L_last_num_blocks_is_12_hommwsmBDghhsCD jb .L_last_num_blocks_is_11_9_hommwsmBDghhsCD cmpl $15,%r10d je .L_last_num_blocks_is_15_hommwsmBDghhsCD ja .L_last_num_blocks_is_16_hommwsmBDghhsCD cmpl $14,%r10d je .L_last_num_blocks_is_14_hommwsmBDghhsCD jmp .L_last_num_blocks_is_13_hommwsmBDghhsCD .L_last_num_blocks_is_11_9_hommwsmBDghhsCD: cmpl $10,%r10d je .L_last_num_blocks_is_10_hommwsmBDghhsCD ja .L_last_num_blocks_is_11_hommwsmBDghhsCD jmp .L_last_num_blocks_is_9_hommwsmBDghhsCD .L_last_num_blocks_is_7_1_hommwsmBDghhsCD: cmpl $4,%r10d je .L_last_num_blocks_is_4_hommwsmBDghhsCD jb .L_last_num_blocks_is_3_1_hommwsmBDghhsCD cmpl $6,%r10d ja .L_last_num_blocks_is_7_hommwsmBDghhsCD je .L_last_num_blocks_is_6_hommwsmBDghhsCD jmp .L_last_num_blocks_is_5_hommwsmBDghhsCD .L_last_num_blocks_is_3_1_hommwsmBDghhsCD: cmpl $2,%r10d ja .L_last_num_blocks_is_3_hommwsmBDghhsCD je .L_last_num_blocks_is_2_hommwsmBDghhsCD .L_last_num_blocks_is_1_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_mgEtuxommfhprEy vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_mgEtuxommfhprEy .L_16_blocks_overflow_mgEtuxommfhprEy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_mgEtuxommfhprEy: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %xmm31,%xmm0,%xmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_hmAEtdvbxtuofqt subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hmAEtdvbxtuofqt .L_small_initial_partial_block_hmAEtdvbxtuofqt: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_hmAEtdvbxtuofqt .L_small_initial_compute_done_hmAEtdvbxtuofqt: .L_after_reduction_hmAEtdvbxtuofqt: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_2_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_eunligEgprqxzEB vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_eunligEgprqxzEB .L_16_blocks_overflow_eunligEgprqxzEB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_eunligEgprqxzEB: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %ymm31,%ymm0,%ymm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_CpCtmyiCpxeyqBF subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CpCtmyiCpxeyqBF .L_small_initial_partial_block_CpCtmyiCpxeyqBF: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CpCtmyiCpxeyqBF: orq %r8,%r8 je .L_after_reduction_CpCtmyiCpxeyqBF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CpCtmyiCpxeyqBF: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_3_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_tCygkraciCitCxE vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_tCygkraciCitCxE .L_16_blocks_overflow_tCygkraciCitCxE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_tCygkraciCitCxE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_oscyleCtgoefssq subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_oscyleCtgoefssq .L_small_initial_partial_block_oscyleCtgoefssq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_oscyleCtgoefssq: orq %r8,%r8 je .L_after_reduction_oscyleCtgoefssq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_oscyleCtgoefssq: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_4_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_nkuGqpqvsuAfkpy vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_nkuGqpqvsuAfkpy .L_16_blocks_overflow_nkuGqpqvsuAfkpy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_nkuGqpqvsuAfkpy: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bszjeCzlpihayrq subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bszjeCzlpihayrq .L_small_initial_partial_block_bszjeCzlpihayrq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bszjeCzlpihayrq: orq %r8,%r8 je .L_after_reduction_bszjeCzlpihayrq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bszjeCzlpihayrq: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_5_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_yBohCFkvcahhcEE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_yBohCFkvcahhcEE .L_16_blocks_overflow_yBohCFkvcahhcEE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_yBohCFkvcahhcEE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_peyrCumyCvjyexD subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_peyrCumyCvjyexD .L_small_initial_partial_block_peyrCumyCvjyexD: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_peyrCumyCvjyexD: orq %r8,%r8 je .L_after_reduction_peyrCumyCvjyexD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_peyrCumyCvjyexD: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_6_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_efCkGsdFqsctEDl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_efCkGsdFqsctEDl .L_16_blocks_overflow_efCkGsdFqsctEDl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_efCkGsdFqsctEDl: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nolBDipDBhtrDmb subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nolBDipDBhtrDmb .L_small_initial_partial_block_nolBDipDBhtrDmb: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nolBDipDBhtrDmb: orq %r8,%r8 je .L_after_reduction_nolBDipDBhtrDmb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nolBDipDBhtrDmb: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_7_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_uGpnccromgjsdor vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_uGpnccromgjsdor .L_16_blocks_overflow_uGpnccromgjsdor: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_uGpnccromgjsdor: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wFFpDbecxxomBhl subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wFFpDbecxxomBhl .L_small_initial_partial_block_wFFpDbecxxomBhl: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wFFpDbecxxomBhl: orq %r8,%r8 je .L_after_reduction_wFFpDbecxxomBhl vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wFFpDbecxxomBhl: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_8_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_rCxvxGCqotFabFi vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_rCxvxGCqotFabFi .L_16_blocks_overflow_rCxvxGCqotFabFi: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_rCxvxGCqotFabFi: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GfamjmilndFvzhv subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GfamjmilndFvzhv .L_small_initial_partial_block_GfamjmilndFvzhv: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GfamjmilndFvzhv: orq %r8,%r8 je .L_after_reduction_GfamjmilndFvzhv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_GfamjmilndFvzhv: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_9_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_ycGahwjqkughsCy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_ycGahwjqkughsCy .L_16_blocks_overflow_ycGahwjqkughsCy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_ycGahwjqkughsCy: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_oodBdsqrimpGlcx subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_oodBdsqrimpGlcx .L_small_initial_partial_block_oodBdsqrimpGlcx: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_oodBdsqrimpGlcx: orq %r8,%r8 je .L_after_reduction_oodBdsqrimpGlcx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_oodBdsqrimpGlcx: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_10_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_qvAdocAzEtlnyGa vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_qvAdocAzEtlnyGa .L_16_blocks_overflow_qvAdocAzEtlnyGa: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_qvAdocAzEtlnyGa: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_sDpafzbwGCbyCCy subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sDpafzbwGCbyCCy .L_small_initial_partial_block_sDpafzbwGCbyCCy: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_sDpafzbwGCbyCCy: orq %r8,%r8 je .L_after_reduction_sDpafzbwGCbyCCy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_sDpafzbwGCbyCCy: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_11_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_BGwcgjgblbFBkyn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_BGwcgjgblbFBkyn .L_16_blocks_overflow_BGwcgjgblbFBkyn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_BGwcgjgblbFBkyn: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_oDmcaDazcjvlCqo subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_oDmcaDazcjvlCqo .L_small_initial_partial_block_oDmcaDazcjvlCqo: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_oDmcaDazcjvlCqo: orq %r8,%r8 je .L_after_reduction_oDmcaDazcjvlCqo vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_oDmcaDazcjvlCqo: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_12_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_ooGtexyxfikBFDA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_ooGtexyxfikBFDA .L_16_blocks_overflow_ooGtexyxfikBFDA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_ooGtexyxfikBFDA: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hawFrugxuDsFkwh subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hawFrugxuDsFkwh .L_small_initial_partial_block_hawFrugxuDsFkwh: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hawFrugxuDsFkwh: orq %r8,%r8 je .L_after_reduction_hawFrugxuDsFkwh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hawFrugxuDsFkwh: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_13_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_ffjezAuFCnhGagx vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_ffjezAuFCnhGagx .L_16_blocks_overflow_ffjezAuFCnhGagx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_ffjezAuFCnhGagx: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nszsngmcgAavfgo subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nszsngmcgAavfgo .L_small_initial_partial_block_nszsngmcgAavfgo: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nszsngmcgAavfgo: orq %r8,%r8 je .L_after_reduction_nszsngmcgAavfgo vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nszsngmcgAavfgo: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_14_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_CfdCFDnjwhDDuze vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_CfdCFDnjwhDDuze .L_16_blocks_overflow_CfdCFDnjwhDDuze: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_CfdCFDnjwhDDuze: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nnhzacbBeBgBwss subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nnhzacbBeBgBwss .L_small_initial_partial_block_nnhzacbBeBgBwss: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nnhzacbBeBgBwss: orq %r8,%r8 je .L_after_reduction_nnhzacbBeBgBwss vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nnhzacbBeBgBwss: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_15_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_GAcGndzbDEvCwfz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_GAcGndzbDEvCwfz .L_16_blocks_overflow_GAcGndzbDEvCwfz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_GAcGndzbDEvCwfz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kpsoetidpdjlnwh subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kpsoetidpdjlnwh .L_small_initial_partial_block_kpsoetidpdjlnwh: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kpsoetidpdjlnwh: orq %r8,%r8 je .L_after_reduction_kpsoetidpdjlnwh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_kpsoetidpdjlnwh: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_16_hommwsmBDghhsCD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_wpowiymzckfpmlc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_wpowiymzckfpmlc .L_16_blocks_overflow_wpowiymzckfpmlc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_wpowiymzckfpmlc: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_xjewDEdrojAwizl: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xjewDEdrojAwizl: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xjewDEdrojAwizl: jmp .L_last_blocks_done_hommwsmBDghhsCD .L_last_num_blocks_is_0_hommwsmBDghhsCD: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_hommwsmBDghhsCD: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_pdDdEbGtmhbgzzj .L_message_below_32_blocks_pdDdEbGtmhbgzzj: subq $256,%r8 addq $256,%r11 movl %r8d,%r10d testq %r14,%r14 jnz .L_skip_hkeys_precomputation_zxFmdGhwegjCAGr vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_zxFmdGhwegjCAGr: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_yEtjCjlkazyuxae cmpl $8,%r10d je .L_last_num_blocks_is_8_yEtjCjlkazyuxae jb .L_last_num_blocks_is_7_1_yEtjCjlkazyuxae cmpl $12,%r10d je .L_last_num_blocks_is_12_yEtjCjlkazyuxae jb .L_last_num_blocks_is_11_9_yEtjCjlkazyuxae cmpl $15,%r10d je .L_last_num_blocks_is_15_yEtjCjlkazyuxae ja .L_last_num_blocks_is_16_yEtjCjlkazyuxae cmpl $14,%r10d je .L_last_num_blocks_is_14_yEtjCjlkazyuxae jmp .L_last_num_blocks_is_13_yEtjCjlkazyuxae .L_last_num_blocks_is_11_9_yEtjCjlkazyuxae: cmpl $10,%r10d je .L_last_num_blocks_is_10_yEtjCjlkazyuxae ja .L_last_num_blocks_is_11_yEtjCjlkazyuxae jmp .L_last_num_blocks_is_9_yEtjCjlkazyuxae .L_last_num_blocks_is_7_1_yEtjCjlkazyuxae: cmpl $4,%r10d je .L_last_num_blocks_is_4_yEtjCjlkazyuxae jb .L_last_num_blocks_is_3_1_yEtjCjlkazyuxae cmpl $6,%r10d ja .L_last_num_blocks_is_7_yEtjCjlkazyuxae je .L_last_num_blocks_is_6_yEtjCjlkazyuxae jmp .L_last_num_blocks_is_5_yEtjCjlkazyuxae .L_last_num_blocks_is_3_1_yEtjCjlkazyuxae: cmpl $2,%r10d ja .L_last_num_blocks_is_3_yEtjCjlkazyuxae je .L_last_num_blocks_is_2_yEtjCjlkazyuxae .L_last_num_blocks_is_1_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_GemCxiwxneizpok vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_GemCxiwxneizpok .L_16_blocks_overflow_GemCxiwxneizpok: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_GemCxiwxneizpok: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_lDxtxBkDCvCDeAu subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lDxtxBkDCvCDeAu .L_small_initial_partial_block_lDxtxBkDCvCDeAu: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_lDxtxBkDCvCDeAu .L_small_initial_compute_done_lDxtxBkDCvCDeAu: .L_after_reduction_lDxtxBkDCvCDeAu: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_2_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_mtbzanedDzblhBt vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_mtbzanedDzblhBt .L_16_blocks_overflow_mtbzanedDzblhBt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_mtbzanedDzblhBt: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vDfEzdpCaoutqpk subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vDfEzdpCaoutqpk .L_small_initial_partial_block_vDfEzdpCaoutqpk: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vDfEzdpCaoutqpk: orq %r8,%r8 je .L_after_reduction_vDfEzdpCaoutqpk vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vDfEzdpCaoutqpk: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_3_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_dEDrjDhcyydvacb vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_dEDrjDhcyydvacb .L_16_blocks_overflow_dEDrjDhcyydvacb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_dEDrjDhcyydvacb: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ulcxboFccGvxqoA subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ulcxboFccGvxqoA .L_small_initial_partial_block_ulcxboFccGvxqoA: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ulcxboFccGvxqoA: orq %r8,%r8 je .L_after_reduction_ulcxboFccGvxqoA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ulcxboFccGvxqoA: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_4_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_oDxtFmsewqDacsh vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_oDxtFmsewqDacsh .L_16_blocks_overflow_oDxtFmsewqDacsh: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_oDxtFmsewqDacsh: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vugvwEfszCpbGFf subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vugvwEfszCpbGFf .L_small_initial_partial_block_vugvwEfszCpbGFf: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vugvwEfszCpbGFf: orq %r8,%r8 je .L_after_reduction_vugvwEfszCpbGFf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vugvwEfszCpbGFf: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_5_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_xkcGkGACdgyhfnk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_xkcGkGACdgyhfnk .L_16_blocks_overflow_xkcGkGACdgyhfnk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_xkcGkGACdgyhfnk: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ztfihBbCfBvyfov subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ztfihBbCfBvyfov .L_small_initial_partial_block_ztfihBbCfBvyfov: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ztfihBbCfBvyfov: orq %r8,%r8 je .L_after_reduction_ztfihBbCfBvyfov vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ztfihBbCfBvyfov: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_6_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_xlFpBxEfzmCmemF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_xlFpBxEfzmCmemF .L_16_blocks_overflow_xlFpBxEfzmCmemF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_xlFpBxEfzmCmemF: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lxGrFedjGdoqthf subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lxGrFedjGdoqthf .L_small_initial_partial_block_lxGrFedjGdoqthf: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lxGrFedjGdoqthf: orq %r8,%r8 je .L_after_reduction_lxGrFedjGdoqthf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lxGrFedjGdoqthf: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_7_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_epvGyiwrthhFeDk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_epvGyiwrthhFeDk .L_16_blocks_overflow_epvGyiwrthhFeDk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_epvGyiwrthhFeDk: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lDmxfclvwFuFuGn subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lDmxfclvwFuFuGn .L_small_initial_partial_block_lDmxfclvwFuFuGn: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lDmxfclvwFuFuGn: orq %r8,%r8 je .L_after_reduction_lDmxfclvwFuFuGn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lDmxfclvwFuFuGn: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_8_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_rlpnCjhhrhBjnBv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_rlpnCjhhrhBjnBv .L_16_blocks_overflow_rlpnCjhhrhBjnBv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_rlpnCjhhrhBjnBv: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wCmlnxlmuAqfmku subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wCmlnxlmuAqfmku .L_small_initial_partial_block_wCmlnxlmuAqfmku: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wCmlnxlmuAqfmku: orq %r8,%r8 je .L_after_reduction_wCmlnxlmuAqfmku vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wCmlnxlmuAqfmku: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_9_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_xGcqvoGCBlCvFjF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_xGcqvoGCBlCvFjF .L_16_blocks_overflow_xGcqvoGCBlCvFjF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_xGcqvoGCBlCvFjF: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_uoAmEEFbAhessra subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_uoAmEEFbAhessra .L_small_initial_partial_block_uoAmEEFbAhessra: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uoAmEEFbAhessra: orq %r8,%r8 je .L_after_reduction_uoAmEEFbAhessra vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_uoAmEEFbAhessra: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_10_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_lxwlEahBzykFvop vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_lxwlEahBzykFvop .L_16_blocks_overflow_lxwlEahBzykFvop: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_lxwlEahBzykFvop: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ymGqwwcaDlhrzht subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ymGqwwcaDlhrzht .L_small_initial_partial_block_ymGqwwcaDlhrzht: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ymGqwwcaDlhrzht: orq %r8,%r8 je .L_after_reduction_ymGqwwcaDlhrzht vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ymGqwwcaDlhrzht: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_11_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_DwphDuBmGjsjgos vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_DwphDuBmGjsjgos .L_16_blocks_overflow_DwphDuBmGjsjgos: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_DwphDuBmGjsjgos: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_feadFtsqxgxipCv subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_feadFtsqxgxipCv .L_small_initial_partial_block_feadFtsqxgxipCv: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_feadFtsqxgxipCv: orq %r8,%r8 je .L_after_reduction_feadFtsqxgxipCv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_feadFtsqxgxipCv: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_12_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_tysgGmlzxDCuchk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_tysgGmlzxDCuchk .L_16_blocks_overflow_tysgGmlzxDCuchk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_tysgGmlzxDCuchk: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jdvGApyCGfzBhpb subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jdvGApyCGfzBhpb .L_small_initial_partial_block_jdvGApyCGfzBhpb: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jdvGApyCGfzBhpb: orq %r8,%r8 je .L_after_reduction_jdvGApyCGfzBhpb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jdvGApyCGfzBhpb: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_13_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_halbrdjstkvuogl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_halbrdjstkvuogl .L_16_blocks_overflow_halbrdjstkvuogl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_halbrdjstkvuogl: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pdxowiCmkqsedqs subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pdxowiCmkqsedqs .L_small_initial_partial_block_pdxowiCmkqsedqs: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pdxowiCmkqsedqs: orq %r8,%r8 je .L_after_reduction_pdxowiCmkqsedqs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pdxowiCmkqsedqs: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_14_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_qlykidCbnDmCaom vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_qlykidCbnDmCaom .L_16_blocks_overflow_qlykidCbnDmCaom: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_qlykidCbnDmCaom: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bCGuxGwffFmkxlq subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bCGuxGwffFmkxlq .L_small_initial_partial_block_bCGuxGwffFmkxlq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bCGuxGwffFmkxlq: orq %r8,%r8 je .L_after_reduction_bCGuxGwffFmkxlq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bCGuxGwffFmkxlq: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_15_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_tvonowlqiEmbpqm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_tvonowlqiEmbpqm .L_16_blocks_overflow_tvonowlqiEmbpqm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_tvonowlqiEmbpqm: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dlvvxnvpiqivacr subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dlvvxnvpiqivacr .L_small_initial_partial_block_dlvvxnvpiqivacr: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dlvvxnvpiqivacr: orq %r8,%r8 je .L_after_reduction_dlvvxnvpiqivacr vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dlvvxnvpiqivacr: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_16_yEtjCjlkazyuxae: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_nqzepvdnfxxrztt vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_nqzepvdnfxxrztt .L_16_blocks_overflow_nqzepvdnfxxrztt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_nqzepvdnfxxrztt: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_bBybkCcjjhhjGnD: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bBybkCcjjhhjGnD: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bBybkCcjjhhjGnD: jmp .L_last_blocks_done_yEtjCjlkazyuxae .L_last_num_blocks_is_0_yEtjCjlkazyuxae: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_yEtjCjlkazyuxae: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_pdDdEbGtmhbgzzj .L_message_below_equal_16_blocks_pdDdEbGtmhbgzzj: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_ewuGsEvelaCkirh jl .L_small_initial_num_blocks_is_7_1_ewuGsEvelaCkirh cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_ewuGsEvelaCkirh jl .L_small_initial_num_blocks_is_11_9_ewuGsEvelaCkirh cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_ewuGsEvelaCkirh cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_ewuGsEvelaCkirh cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_ewuGsEvelaCkirh jmp .L_small_initial_num_blocks_is_13_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_11_9_ewuGsEvelaCkirh: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_ewuGsEvelaCkirh cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_ewuGsEvelaCkirh jmp .L_small_initial_num_blocks_is_9_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_7_1_ewuGsEvelaCkirh: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_ewuGsEvelaCkirh jl .L_small_initial_num_blocks_is_3_1_ewuGsEvelaCkirh cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_ewuGsEvelaCkirh cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_ewuGsEvelaCkirh jmp .L_small_initial_num_blocks_is_5_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_3_1_ewuGsEvelaCkirh: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_ewuGsEvelaCkirh cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_1_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONE(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %xmm15,%xmm0,%xmm0 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 cmpq $16,%r8 jl .L_small_initial_partial_block_qAfhfumcaDjruco subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qAfhfumcaDjruco .L_small_initial_partial_block_qAfhfumcaDjruco: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_qAfhfumcaDjruco .L_small_initial_compute_done_qAfhfumcaDjruco: .L_after_reduction_qAfhfumcaDjruco: jmp .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_2_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %ymm15,%ymm0,%ymm0 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ubuBFaxsGrnemfF subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ubuBFaxsGrnemfF .L_small_initial_partial_block_ubuBFaxsGrnemfF: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ubuBFaxsGrnemfF: orq %r8,%r8 je .L_after_reduction_ubuBFaxsGrnemfF vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_ubuBFaxsGrnemfF: jmp .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_3_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ndaAlsscEjpEkoq subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ndaAlsscEjpEkoq .L_small_initial_partial_block_ndaAlsscEjpEkoq: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ndaAlsscEjpEkoq: orq %r8,%r8 je .L_after_reduction_ndaAlsscEjpEkoq vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_ndaAlsscEjpEkoq: jmp .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_4_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jktiGoAbGDiFkaq subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jktiGoAbGDiFkaq .L_small_initial_partial_block_jktiGoAbGDiFkaq: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jktiGoAbGDiFkaq: orq %r8,%r8 je .L_after_reduction_jktiGoAbGDiFkaq vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_jktiGoAbGDiFkaq: jmp .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_5_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %xmm15,%xmm3,%xmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %xmm29,%xmm3,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_sEqEFsxphmltbmr subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sEqEFsxphmltbmr .L_small_initial_partial_block_sEqEFsxphmltbmr: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_sEqEFsxphmltbmr: orq %r8,%r8 je .L_after_reduction_sEqEFsxphmltbmr vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_sEqEFsxphmltbmr: jmp .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_6_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %ymm15,%ymm3,%ymm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %ymm29,%ymm3,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_slpocbFrpsoiAib subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_slpocbFrpsoiAib .L_small_initial_partial_block_slpocbFrpsoiAib: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_slpocbFrpsoiAib: orq %r8,%r8 je .L_after_reduction_slpocbFrpsoiAib vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_slpocbFrpsoiAib: jmp .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_7_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EEknGefGCzrkolw subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EEknGefGCzrkolw .L_small_initial_partial_block_EEknGefGCzrkolw: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EEknGefGCzrkolw: orq %r8,%r8 je .L_after_reduction_EEknGefGCzrkolw vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_EEknGefGCzrkolw: jmp .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_8_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qrgmfxpdazygeCe subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qrgmfxpdazygeCe .L_small_initial_partial_block_qrgmfxpdazygeCe: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qrgmfxpdazygeCe: orq %r8,%r8 je .L_after_reduction_qrgmfxpdazygeCe vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_qrgmfxpdazygeCe: jmp .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_9_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %xmm15,%xmm4,%xmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %xmm29,%xmm4,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ixdohjdwtejkAah subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ixdohjdwtejkAah .L_small_initial_partial_block_ixdohjdwtejkAah: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ixdohjdwtejkAah: orq %r8,%r8 je .L_after_reduction_ixdohjdwtejkAah vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_ixdohjdwtejkAah: jmp .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_10_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %ymm15,%ymm4,%ymm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %ymm29,%ymm4,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kdvEyrakCtlldFt subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kdvEyrakCtlldFt .L_small_initial_partial_block_kdvEyrakCtlldFt: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kdvEyrakCtlldFt: orq %r8,%r8 je .L_after_reduction_kdvEyrakCtlldFt vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_kdvEyrakCtlldFt: jmp .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_11_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_cutxzwGkeBggDqx subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_cutxzwGkeBggDqx .L_small_initial_partial_block_cutxzwGkeBggDqx: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cutxzwGkeBggDqx: orq %r8,%r8 je .L_after_reduction_cutxzwGkeBggDqx vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_cutxzwGkeBggDqx: jmp .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_12_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_oqFnyhhlpeztanE subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_oqFnyhhlpeztanE .L_small_initial_partial_block_oqFnyhhlpeztanE: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_oqFnyhhlpeztanE: orq %r8,%r8 je .L_after_reduction_oqFnyhhlpeztanE vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_oqFnyhhlpeztanE: jmp .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_13_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %xmm15,%xmm5,%xmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %xmm29,%xmm5,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mloEfjmpzzECCFk subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mloEfjmpzzECCFk .L_small_initial_partial_block_mloEfjmpzzECCFk: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mloEfjmpzzECCFk: orq %r8,%r8 je .L_after_reduction_mloEfjmpzzECCFk vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_mloEfjmpzzECCFk: jmp .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_14_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %ymm15,%ymm5,%ymm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %ymm29,%ymm5,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lokFbqCpdpswyxF subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lokFbqCpdpswyxF .L_small_initial_partial_block_lokFbqCpdpswyxF: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lokFbqCpdpswyxF: orq %r8,%r8 je .L_after_reduction_lokFbqCpdpswyxF vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_lokFbqCpdpswyxF: jmp .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_15_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bmnsCorxdnheyAb subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bmnsCorxdnheyAb .L_small_initial_partial_block_bmnsCorxdnheyAb: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bmnsCorxdnheyAb: orq %r8,%r8 je .L_after_reduction_bmnsCorxdnheyAb vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_bmnsCorxdnheyAb: jmp .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh .L_small_initial_num_blocks_is_16_ewuGsEvelaCkirh: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_iGnlhalqoGhdkbv: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_iGnlhalqoGhdkbv: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_iGnlhalqoGhdkbv: .L_small_initial_blocks_encrypted_ewuGsEvelaCkirh: .L_ghash_done_pdDdEbGtmhbgzzj: vmovdqu64 %xmm2,0(%rsi) vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_done_pdDdEbGtmhbgzzj: jmp .Lexit_gcm_encrypt .align 32 .Laes_gcm_encrypt_192_avx512: orq %r8,%r8 je .L_enc_dec_done_tFbkipsuzBAeEGF xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 movq (%rdx),%r11 orq %r11,%r11 je .L_partial_block_done_jdCiCmGpmghGfDo movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 vmovdqu64 336(%rsi),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %r11,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%r11,1),%r13 subq $16,%r13 jge .L_no_extra_mask_jdCiCmGpmghGfDo subq %r13,%r12 .L_no_extra_mask_jdCiCmGpmghGfDo: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm3,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_jdCiCmGpmghGfDo vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 vpsrldq $4,%xmm10,%xmm10 vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movq $0,(%rdx) movq %r11,%r12 movq $16,%r11 subq %r12,%r11 jmp .L_enc_dec_done_jdCiCmGpmghGfDo .L_partial_incomplete_jdCiCmGpmghGfDo: addq %r8,(%rdx) movq %r8,%r11 .L_enc_dec_done_jdCiCmGpmghGfDo: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%r11,2),%k1 vmovdqu64 %xmm14,64(%rsi) vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_jdCiCmGpmghGfDo: vmovdqu64 0(%rsi),%xmm2 subq %r11,%r8 je .L_enc_dec_done_tFbkipsuzBAeEGF cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_tFbkipsuzBAeEGF vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_pFvraahbaffuyct vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_pFvraahbaffuyct .L_next_16_overflow_pFvraahbaffuyct: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_pFvraahbaffuyct: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%r11,1),%zmm0 vmovdqu8 64(%rcx,%r11,1),%zmm3 vmovdqu8 128(%rcx,%r11,1),%zmm4 vmovdqu8 192(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%r11,1) vmovdqu8 %zmm10,64(%r10,%r11,1) vmovdqu8 %zmm11,128(%r10,%r11,1) vmovdqu8 %zmm12,192(%r10,%r11,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_yenzjhtagtpjklu vmovdqu64 288(%rsi),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 224(%rsi),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 160(%rsi),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 96(%rsi),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_yenzjhtagtpjklu: cmpq $512,%r8 jb .L_message_below_32_blocks_tFbkipsuzBAeEGF cmpb $240,%r15b jae .L_next_16_overflow_enCpGzovkqzhwzc vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_enCpGzovkqzhwzc .L_next_16_overflow_enCpGzovkqzhwzc: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_enCpGzovkqzhwzc: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%r11,1),%zmm0 vmovdqu8 320(%rcx,%r11,1),%zmm3 vmovdqu8 384(%rcx,%r11,1),%zmm4 vmovdqu8 448(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%r11,1) vmovdqu8 %zmm10,320(%r10,%r11,1) vmovdqu8 %zmm11,384(%r10,%r11,1) vmovdqu8 %zmm12,448(%r10,%r11,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_jqGvtcbttbiaDxy vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_jqGvtcbttbiaDxy: movq $1,%r14 addq $512,%r11 subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_tFbkipsuzBAeEGF .L_encrypt_big_nblocks_tFbkipsuzBAeEGF: cmpb $240,%r15b jae .L_16_blocks_overflow_jddBEjFhbsBAmmE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_jddBEjFhbsBAmmE .L_16_blocks_overflow_jddBEjFhbsBAmmE: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_jddBEjFhbsBAmmE: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_idpAqFqszdhymlh vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_idpAqFqszdhymlh .L_16_blocks_overflow_idpAqFqszdhymlh: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_idpAqFqszdhymlh: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_EFGAxoobnnGywoA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_EFGAxoobnnGywoA .L_16_blocks_overflow_EFGAxoobnnGywoA: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_EFGAxoobnnGywoA: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%r11,1),%zmm17 vmovdqu8 576(%rcx,%r11,1),%zmm19 vmovdqu8 640(%rcx,%r11,1),%zmm20 vmovdqu8 704(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%r11,1) vmovdqu8 %zmm3,576(%r10,%r11,1) vmovdqu8 %zmm4,640(%r10,%r11,1) vmovdqu8 %zmm5,704(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%r11 subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_tFbkipsuzBAeEGF .L_no_more_big_nblocks_tFbkipsuzBAeEGF: cmpq $512,%r8 jae .L_encrypt_32_blocks_tFbkipsuzBAeEGF cmpq $256,%r8 jae .L_encrypt_16_blocks_tFbkipsuzBAeEGF .L_encrypt_0_blocks_ghash_32_tFbkipsuzBAeEGF: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_uFjiwCxmGEbfAFa cmpl $8,%r10d je .L_last_num_blocks_is_8_uFjiwCxmGEbfAFa jb .L_last_num_blocks_is_7_1_uFjiwCxmGEbfAFa cmpl $12,%r10d je .L_last_num_blocks_is_12_uFjiwCxmGEbfAFa jb .L_last_num_blocks_is_11_9_uFjiwCxmGEbfAFa cmpl $15,%r10d je .L_last_num_blocks_is_15_uFjiwCxmGEbfAFa ja .L_last_num_blocks_is_16_uFjiwCxmGEbfAFa cmpl $14,%r10d je .L_last_num_blocks_is_14_uFjiwCxmGEbfAFa jmp .L_last_num_blocks_is_13_uFjiwCxmGEbfAFa .L_last_num_blocks_is_11_9_uFjiwCxmGEbfAFa: cmpl $10,%r10d je .L_last_num_blocks_is_10_uFjiwCxmGEbfAFa ja .L_last_num_blocks_is_11_uFjiwCxmGEbfAFa jmp .L_last_num_blocks_is_9_uFjiwCxmGEbfAFa .L_last_num_blocks_is_7_1_uFjiwCxmGEbfAFa: cmpl $4,%r10d je .L_last_num_blocks_is_4_uFjiwCxmGEbfAFa jb .L_last_num_blocks_is_3_1_uFjiwCxmGEbfAFa cmpl $6,%r10d ja .L_last_num_blocks_is_7_uFjiwCxmGEbfAFa je .L_last_num_blocks_is_6_uFjiwCxmGEbfAFa jmp .L_last_num_blocks_is_5_uFjiwCxmGEbfAFa .L_last_num_blocks_is_3_1_uFjiwCxmGEbfAFa: cmpl $2,%r10d ja .L_last_num_blocks_is_3_uFjiwCxmGEbfAFa je .L_last_num_blocks_is_2_uFjiwCxmGEbfAFa .L_last_num_blocks_is_1_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_lxdjeCteCnqypuE vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_lxdjeCteCnqypuE .L_16_blocks_overflow_lxdjeCteCnqypuE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_lxdjeCteCnqypuE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_xokBAycvbkevxfE subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xokBAycvbkevxfE .L_small_initial_partial_block_xokBAycvbkevxfE: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_xokBAycvbkevxfE .L_small_initial_compute_done_xokBAycvbkevxfE: .L_after_reduction_xokBAycvbkevxfE: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_2_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_tqAdjGAqcxebbGj vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_tqAdjGAqcxebbGj .L_16_blocks_overflow_tqAdjGAqcxebbGj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_tqAdjGAqcxebbGj: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_izsjBCvaDivghqe subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_izsjBCvaDivghqe .L_small_initial_partial_block_izsjBCvaDivghqe: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_izsjBCvaDivghqe: orq %r8,%r8 je .L_after_reduction_izsjBCvaDivghqe vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_izsjBCvaDivghqe: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_3_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_woFDjhpeDAEyeol vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_woFDjhpeDAEyeol .L_16_blocks_overflow_woFDjhpeDAEyeol: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_woFDjhpeDAEyeol: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AqCFGymmhaacFDC subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AqCFGymmhaacFDC .L_small_initial_partial_block_AqCFGymmhaacFDC: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AqCFGymmhaacFDC: orq %r8,%r8 je .L_after_reduction_AqCFGymmhaacFDC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AqCFGymmhaacFDC: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_4_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_BGnDrgfdztzmBGB vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_BGnDrgfdztzmBGB .L_16_blocks_overflow_BGnDrgfdztzmBGB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_BGnDrgfdztzmBGB: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_uClitrxBorxFyuy subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_uClitrxBorxFyuy .L_small_initial_partial_block_uClitrxBorxFyuy: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uClitrxBorxFyuy: orq %r8,%r8 je .L_after_reduction_uClitrxBorxFyuy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_uClitrxBorxFyuy: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_5_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_wDxAmusyyammDow vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_wDxAmusyyammDow .L_16_blocks_overflow_wDxAmusyyammDow: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_wDxAmusyyammDow: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bosguzEFytqmFeq subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bosguzEFytqmFeq .L_small_initial_partial_block_bosguzEFytqmFeq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bosguzEFytqmFeq: orq %r8,%r8 je .L_after_reduction_bosguzEFytqmFeq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bosguzEFytqmFeq: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_6_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_sCzAAgptixxBvip vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_sCzAAgptixxBvip .L_16_blocks_overflow_sCzAAgptixxBvip: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_sCzAAgptixxBvip: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FuuimCCibwFkhfx subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FuuimCCibwFkhfx .L_small_initial_partial_block_FuuimCCibwFkhfx: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FuuimCCibwFkhfx: orq %r8,%r8 je .L_after_reduction_FuuimCCibwFkhfx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FuuimCCibwFkhfx: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_7_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_gqtukwixiotlvjE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_gqtukwixiotlvjE .L_16_blocks_overflow_gqtukwixiotlvjE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_gqtukwixiotlvjE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_CBkCykisCgChyAc subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CBkCykisCgChyAc .L_small_initial_partial_block_CBkCykisCgChyAc: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CBkCykisCgChyAc: orq %r8,%r8 je .L_after_reduction_CBkCykisCgChyAc vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CBkCykisCgChyAc: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_8_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_Fznlwzcrirmvwxw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_Fznlwzcrirmvwxw .L_16_blocks_overflow_Fznlwzcrirmvwxw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_Fznlwzcrirmvwxw: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BszjzgFAnDlqhlr subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BszjzgFAnDlqhlr .L_small_initial_partial_block_BszjzgFAnDlqhlr: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BszjzgFAnDlqhlr: orq %r8,%r8 je .L_after_reduction_BszjzgFAnDlqhlr vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BszjzgFAnDlqhlr: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_9_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_nhcklxyaumrucBe vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_nhcklxyaumrucBe .L_16_blocks_overflow_nhcklxyaumrucBe: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_nhcklxyaumrucBe: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pofwkmqmhmpaDas subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pofwkmqmhmpaDas .L_small_initial_partial_block_pofwkmqmhmpaDas: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pofwkmqmhmpaDas: orq %r8,%r8 je .L_after_reduction_pofwkmqmhmpaDas vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pofwkmqmhmpaDas: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_10_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_DpcajcwBdqbwuEm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_DpcajcwBdqbwuEm .L_16_blocks_overflow_DpcajcwBdqbwuEm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_DpcajcwBdqbwuEm: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GoickdlxxlCgCmn subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GoickdlxxlCgCmn .L_small_initial_partial_block_GoickdlxxlCgCmn: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GoickdlxxlCgCmn: orq %r8,%r8 je .L_after_reduction_GoickdlxxlCgCmn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_GoickdlxxlCgCmn: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_11_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_CzDGlzuDofcmftE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_CzDGlzuDofcmftE .L_16_blocks_overflow_CzDGlzuDofcmftE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_CzDGlzuDofcmftE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AfGwErudvfGFkBd subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AfGwErudvfGFkBd .L_small_initial_partial_block_AfGwErudvfGFkBd: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AfGwErudvfGFkBd: orq %r8,%r8 je .L_after_reduction_AfGwErudvfGFkBd vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AfGwErudvfGFkBd: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_12_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_vFgtdmiGGceAuup vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_vFgtdmiGGceAuup .L_16_blocks_overflow_vFgtdmiGGceAuup: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_vFgtdmiGGceAuup: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hAugcokFGbhzzvx subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hAugcokFGbhzzvx .L_small_initial_partial_block_hAugcokFGbhzzvx: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hAugcokFGbhzzvx: orq %r8,%r8 je .L_after_reduction_hAugcokFGbhzzvx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hAugcokFGbhzzvx: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_13_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_ApsFAharcbobqcA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_ApsFAharcbobqcA .L_16_blocks_overflow_ApsFAharcbobqcA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_ApsFAharcbobqcA: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DkdftFtqeikgrDl subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DkdftFtqeikgrDl .L_small_initial_partial_block_DkdftFtqeikgrDl: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DkdftFtqeikgrDl: orq %r8,%r8 je .L_after_reduction_DkdftFtqeikgrDl vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DkdftFtqeikgrDl: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_14_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_amhEEFGkEmcdfyg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_amhEEFGkEmcdfyg .L_16_blocks_overflow_amhEEFGkEmcdfyg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_amhEEFGkEmcdfyg: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DsqdvjyjtgiDdjk subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DsqdvjyjtgiDdjk .L_small_initial_partial_block_DsqdvjyjtgiDdjk: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DsqdvjyjtgiDdjk: orq %r8,%r8 je .L_after_reduction_DsqdvjyjtgiDdjk vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DsqdvjyjtgiDdjk: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_15_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_GyCmDqABriaxjxf vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_GyCmDqABriaxjxf .L_16_blocks_overflow_GyCmDqABriaxjxf: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_GyCmDqABriaxjxf: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pGoiupmcfezlCDb subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pGoiupmcfezlCDb .L_small_initial_partial_block_pGoiupmcfezlCDb: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pGoiupmcfezlCDb: orq %r8,%r8 je .L_after_reduction_pGoiupmcfezlCDb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pGoiupmcfezlCDb: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_16_uFjiwCxmGEbfAFa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_imDahqossjyafvG vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_imDahqossjyafvG .L_16_blocks_overflow_imDahqossjyafvG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_imDahqossjyafvG: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_stpCjmquwqkvlEu: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_stpCjmquwqkvlEu: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_stpCjmquwqkvlEu: jmp .L_last_blocks_done_uFjiwCxmGEbfAFa .L_last_num_blocks_is_0_uFjiwCxmGEbfAFa: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_uFjiwCxmGEbfAFa: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_tFbkipsuzBAeEGF .L_encrypt_32_blocks_tFbkipsuzBAeEGF: cmpb $240,%r15b jae .L_16_blocks_overflow_AGsgmucxjDjGrat vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_AGsgmucxjDjGrat .L_16_blocks_overflow_AGsgmucxjDjGrat: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_AGsgmucxjDjGrat: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_miCaCzFgEsdrxCb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_miCaCzFgEsdrxCb .L_16_blocks_overflow_miCaCzFgEsdrxCb: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_miCaCzFgEsdrxCb: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%r11 movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_jcdFbiukBEavFGE cmpl $8,%r10d je .L_last_num_blocks_is_8_jcdFbiukBEavFGE jb .L_last_num_blocks_is_7_1_jcdFbiukBEavFGE cmpl $12,%r10d je .L_last_num_blocks_is_12_jcdFbiukBEavFGE jb .L_last_num_blocks_is_11_9_jcdFbiukBEavFGE cmpl $15,%r10d je .L_last_num_blocks_is_15_jcdFbiukBEavFGE ja .L_last_num_blocks_is_16_jcdFbiukBEavFGE cmpl $14,%r10d je .L_last_num_blocks_is_14_jcdFbiukBEavFGE jmp .L_last_num_blocks_is_13_jcdFbiukBEavFGE .L_last_num_blocks_is_11_9_jcdFbiukBEavFGE: cmpl $10,%r10d je .L_last_num_blocks_is_10_jcdFbiukBEavFGE ja .L_last_num_blocks_is_11_jcdFbiukBEavFGE jmp .L_last_num_blocks_is_9_jcdFbiukBEavFGE .L_last_num_blocks_is_7_1_jcdFbiukBEavFGE: cmpl $4,%r10d je .L_last_num_blocks_is_4_jcdFbiukBEavFGE jb .L_last_num_blocks_is_3_1_jcdFbiukBEavFGE cmpl $6,%r10d ja .L_last_num_blocks_is_7_jcdFbiukBEavFGE je .L_last_num_blocks_is_6_jcdFbiukBEavFGE jmp .L_last_num_blocks_is_5_jcdFbiukBEavFGE .L_last_num_blocks_is_3_1_jcdFbiukBEavFGE: cmpl $2,%r10d ja .L_last_num_blocks_is_3_jcdFbiukBEavFGE je .L_last_num_blocks_is_2_jcdFbiukBEavFGE .L_last_num_blocks_is_1_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_vxxnDcnfkrwsdjp vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_vxxnDcnfkrwsdjp .L_16_blocks_overflow_vxxnDcnfkrwsdjp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_vxxnDcnfkrwsdjp: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_rjcmxpckvzxcizE subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rjcmxpckvzxcizE .L_small_initial_partial_block_rjcmxpckvzxcizE: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_rjcmxpckvzxcizE .L_small_initial_compute_done_rjcmxpckvzxcizE: .L_after_reduction_rjcmxpckvzxcizE: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_2_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_uhDoynhcngzlgum vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_uhDoynhcngzlgum .L_16_blocks_overflow_uhDoynhcngzlgum: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_uhDoynhcngzlgum: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_uukoDhouhnxbvBs subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_uukoDhouhnxbvBs .L_small_initial_partial_block_uukoDhouhnxbvBs: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uukoDhouhnxbvBs: orq %r8,%r8 je .L_after_reduction_uukoDhouhnxbvBs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_uukoDhouhnxbvBs: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_3_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_uqbvqDscdfzCyvo vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_uqbvqDscdfzCyvo .L_16_blocks_overflow_uqbvqDscdfzCyvo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_uqbvqDscdfzCyvo: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AzBBwGideFptDwf subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AzBBwGideFptDwf .L_small_initial_partial_block_AzBBwGideFptDwf: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AzBBwGideFptDwf: orq %r8,%r8 je .L_after_reduction_AzBBwGideFptDwf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AzBBwGideFptDwf: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_4_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_kyFozElpAosldpA vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_kyFozElpAosldpA .L_16_blocks_overflow_kyFozElpAosldpA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_kyFozElpAosldpA: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_cyDyceqdwxjBzzg subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_cyDyceqdwxjBzzg .L_small_initial_partial_block_cyDyceqdwxjBzzg: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cyDyceqdwxjBzzg: orq %r8,%r8 je .L_after_reduction_cyDyceqdwxjBzzg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_cyDyceqdwxjBzzg: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_5_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_lFprftfcjilzpav vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_lFprftfcjilzpav .L_16_blocks_overflow_lFprftfcjilzpav: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_lFprftfcjilzpav: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pGBzEdwhzcavspd subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pGBzEdwhzcavspd .L_small_initial_partial_block_pGBzEdwhzcavspd: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pGBzEdwhzcavspd: orq %r8,%r8 je .L_after_reduction_pGBzEdwhzcavspd vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pGBzEdwhzcavspd: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_6_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_GkzjxqDyGdedavo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_GkzjxqDyGdedavo .L_16_blocks_overflow_GkzjxqDyGdedavo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_GkzjxqDyGdedavo: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_owicnDDzeheGwrB subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_owicnDDzeheGwrB .L_small_initial_partial_block_owicnDDzeheGwrB: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_owicnDDzeheGwrB: orq %r8,%r8 je .L_after_reduction_owicnDDzeheGwrB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_owicnDDzeheGwrB: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_7_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_CaCztGdjulthntc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_CaCztGdjulthntc .L_16_blocks_overflow_CaCztGdjulthntc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_CaCztGdjulthntc: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_davwqylkhqewajl subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_davwqylkhqewajl .L_small_initial_partial_block_davwqylkhqewajl: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_davwqylkhqewajl: orq %r8,%r8 je .L_after_reduction_davwqylkhqewajl vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_davwqylkhqewajl: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_8_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_GbaqslwpsaFuoyz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_GbaqslwpsaFuoyz .L_16_blocks_overflow_GbaqslwpsaFuoyz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_GbaqslwpsaFuoyz: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FelclvrviuByirb subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FelclvrviuByirb .L_small_initial_partial_block_FelclvrviuByirb: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FelclvrviuByirb: orq %r8,%r8 je .L_after_reduction_FelclvrviuByirb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FelclvrviuByirb: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_9_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_AplsctBswkCkEgg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_AplsctBswkCkEgg .L_16_blocks_overflow_AplsctBswkCkEgg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_AplsctBswkCkEgg: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jtFtADjqFyogvlv subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jtFtADjqFyogvlv .L_small_initial_partial_block_jtFtADjqFyogvlv: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jtFtADjqFyogvlv: orq %r8,%r8 je .L_after_reduction_jtFtADjqFyogvlv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jtFtADjqFyogvlv: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_10_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_sGofikfdvCsyufv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_sGofikfdvCsyufv .L_16_blocks_overflow_sGofikfdvCsyufv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_sGofikfdvCsyufv: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tcfdrpyrpqxjGcq subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tcfdrpyrpqxjGcq .L_small_initial_partial_block_tcfdrpyrpqxjGcq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tcfdrpyrpqxjGcq: orq %r8,%r8 je .L_after_reduction_tcfdrpyrpqxjGcq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tcfdrpyrpqxjGcq: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_11_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_toAwkfvytGCcuzd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_toAwkfvytGCcuzd .L_16_blocks_overflow_toAwkfvytGCcuzd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_toAwkfvytGCcuzd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wlcDxsmFdsaDbFp subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wlcDxsmFdsaDbFp .L_small_initial_partial_block_wlcDxsmFdsaDbFp: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wlcDxsmFdsaDbFp: orq %r8,%r8 je .L_after_reduction_wlcDxsmFdsaDbFp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wlcDxsmFdsaDbFp: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_12_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_teGFdCBFbFbgpyu vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_teGFdCBFbFbgpyu .L_16_blocks_overflow_teGFdCBFbFbgpyu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_teGFdCBFbFbgpyu: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hapodhDjogGiCkb subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hapodhDjogGiCkb .L_small_initial_partial_block_hapodhDjogGiCkb: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hapodhDjogGiCkb: orq %r8,%r8 je .L_after_reduction_hapodhDjogGiCkb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hapodhDjogGiCkb: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_13_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_EcrGhzkACEdjiEA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_EcrGhzkACEdjiEA .L_16_blocks_overflow_EcrGhzkACEdjiEA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_EcrGhzkACEdjiEA: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lgpADhokDilDmjB subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lgpADhokDilDmjB .L_small_initial_partial_block_lgpADhokDilDmjB: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lgpADhokDilDmjB: orq %r8,%r8 je .L_after_reduction_lgpADhokDilDmjB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lgpADhokDilDmjB: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_14_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_vfAlEigAGAFFgAm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_vfAlEigAGAFFgAm .L_16_blocks_overflow_vfAlEigAGAFFgAm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_vfAlEigAGAFFgAm: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jvziCnlsAiEavam subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jvziCnlsAiEavam .L_small_initial_partial_block_jvziCnlsAiEavam: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jvziCnlsAiEavam: orq %r8,%r8 je .L_after_reduction_jvziCnlsAiEavam vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jvziCnlsAiEavam: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_15_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_vDsgChtGCDEtEvr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_vDsgChtGCDEtEvr .L_16_blocks_overflow_vDsgChtGCDEtEvr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_vDsgChtGCDEtEvr: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_aaoEnbdnBGewaEG subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_aaoEnbdnBGewaEG .L_small_initial_partial_block_aaoEnbdnBGewaEG: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_aaoEnbdnBGewaEG: orq %r8,%r8 je .L_after_reduction_aaoEnbdnBGewaEG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_aaoEnbdnBGewaEG: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_16_jcdFbiukBEavFGE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_rGdvngzaeGtrlsf vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_rGdvngzaeGtrlsf .L_16_blocks_overflow_rGdvngzaeGtrlsf: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_rGdvngzaeGtrlsf: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_llADlmtFjlEejxe: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_llADlmtFjlEejxe: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_llADlmtFjlEejxe: jmp .L_last_blocks_done_jcdFbiukBEavFGE .L_last_num_blocks_is_0_jcdFbiukBEavFGE: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_jcdFbiukBEavFGE: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_tFbkipsuzBAeEGF .L_encrypt_16_blocks_tFbkipsuzBAeEGF: cmpb $240,%r15b jae .L_16_blocks_overflow_AfdGcFddyowgCfD vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_AfdGcFddyowgCfD .L_16_blocks_overflow_AfdGcFddyowgCfD: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_AfdGcFddyowgCfD: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%r11 movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_DkxrwjzcAFtwGmv cmpl $8,%r10d je .L_last_num_blocks_is_8_DkxrwjzcAFtwGmv jb .L_last_num_blocks_is_7_1_DkxrwjzcAFtwGmv cmpl $12,%r10d je .L_last_num_blocks_is_12_DkxrwjzcAFtwGmv jb .L_last_num_blocks_is_11_9_DkxrwjzcAFtwGmv cmpl $15,%r10d je .L_last_num_blocks_is_15_DkxrwjzcAFtwGmv ja .L_last_num_blocks_is_16_DkxrwjzcAFtwGmv cmpl $14,%r10d je .L_last_num_blocks_is_14_DkxrwjzcAFtwGmv jmp .L_last_num_blocks_is_13_DkxrwjzcAFtwGmv .L_last_num_blocks_is_11_9_DkxrwjzcAFtwGmv: cmpl $10,%r10d je .L_last_num_blocks_is_10_DkxrwjzcAFtwGmv ja .L_last_num_blocks_is_11_DkxrwjzcAFtwGmv jmp .L_last_num_blocks_is_9_DkxrwjzcAFtwGmv .L_last_num_blocks_is_7_1_DkxrwjzcAFtwGmv: cmpl $4,%r10d je .L_last_num_blocks_is_4_DkxrwjzcAFtwGmv jb .L_last_num_blocks_is_3_1_DkxrwjzcAFtwGmv cmpl $6,%r10d ja .L_last_num_blocks_is_7_DkxrwjzcAFtwGmv je .L_last_num_blocks_is_6_DkxrwjzcAFtwGmv jmp .L_last_num_blocks_is_5_DkxrwjzcAFtwGmv .L_last_num_blocks_is_3_1_DkxrwjzcAFtwGmv: cmpl $2,%r10d ja .L_last_num_blocks_is_3_DkxrwjzcAFtwGmv je .L_last_num_blocks_is_2_DkxrwjzcAFtwGmv .L_last_num_blocks_is_1_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_AeBdutzBBGkrhww vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_AeBdutzBBGkrhww .L_16_blocks_overflow_AeBdutzBBGkrhww: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_AeBdutzBBGkrhww: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %xmm31,%xmm0,%xmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_sanDChDEAsbDbDy subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sanDChDEAsbDbDy .L_small_initial_partial_block_sanDChDEAsbDbDy: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_sanDChDEAsbDbDy .L_small_initial_compute_done_sanDChDEAsbDbDy: .L_after_reduction_sanDChDEAsbDbDy: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_2_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_zEobAyflaqodkxt vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_zEobAyflaqodkxt .L_16_blocks_overflow_zEobAyflaqodkxt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_zEobAyflaqodkxt: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %ymm31,%ymm0,%ymm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_btzmvhkGEADbAkx subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_btzmvhkGEADbAkx .L_small_initial_partial_block_btzmvhkGEADbAkx: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_btzmvhkGEADbAkx: orq %r8,%r8 je .L_after_reduction_btzmvhkGEADbAkx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_btzmvhkGEADbAkx: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_3_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_gcfAxoFzqodzGEz vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_gcfAxoFzqodzGEz .L_16_blocks_overflow_gcfAxoFzqodzGEz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_gcfAxoFzqodzGEz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EasBgBicpEglkiw subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EasBgBicpEglkiw .L_small_initial_partial_block_EasBgBicpEglkiw: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EasBgBicpEglkiw: orq %r8,%r8 je .L_after_reduction_EasBgBicpEglkiw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EasBgBicpEglkiw: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_4_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_manbGbfyvfFsrnl vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_manbGbfyvfFsrnl .L_16_blocks_overflow_manbGbfyvfFsrnl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_manbGbfyvfFsrnl: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kwtpvxfGBCymBsb subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kwtpvxfGBCymBsb .L_small_initial_partial_block_kwtpvxfGBCymBsb: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kwtpvxfGBCymBsb: orq %r8,%r8 je .L_after_reduction_kwtpvxfGBCymBsb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_kwtpvxfGBCymBsb: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_5_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_fjElnuxjdEdFEct vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_fjElnuxjdEdFEct .L_16_blocks_overflow_fjElnuxjdEdFEct: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_fjElnuxjdEdFEct: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DbgCAmgvxscuoqv subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DbgCAmgvxscuoqv .L_small_initial_partial_block_DbgCAmgvxscuoqv: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DbgCAmgvxscuoqv: orq %r8,%r8 je .L_after_reduction_DbgCAmgvxscuoqv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DbgCAmgvxscuoqv: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_6_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_tfrvDdzahijbwmB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_tfrvDdzahijbwmB .L_16_blocks_overflow_tfrvDdzahijbwmB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_tfrvDdzahijbwmB: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_uEnwhzkdGwAplec subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_uEnwhzkdGwAplec .L_small_initial_partial_block_uEnwhzkdGwAplec: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uEnwhzkdGwAplec: orq %r8,%r8 je .L_after_reduction_uEnwhzkdGwAplec vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_uEnwhzkdGwAplec: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_7_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_qidtflFxFddzhgg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_qidtflFxFddzhgg .L_16_blocks_overflow_qidtflFxFddzhgg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_qidtflFxFddzhgg: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qvicAgCgBiisxsr subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qvicAgCgBiisxsr .L_small_initial_partial_block_qvicAgCgBiisxsr: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qvicAgCgBiisxsr: orq %r8,%r8 je .L_after_reduction_qvicAgCgBiisxsr vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qvicAgCgBiisxsr: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_8_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_luzsesiwggypeey vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_luzsesiwggypeey .L_16_blocks_overflow_luzsesiwggypeey: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_luzsesiwggypeey: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dhgyBxajscbfima subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dhgyBxajscbfima .L_small_initial_partial_block_dhgyBxajscbfima: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dhgyBxajscbfima: orq %r8,%r8 je .L_after_reduction_dhgyBxajscbfima vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dhgyBxajscbfima: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_9_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_EkueqaGdhDjCdgp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_EkueqaGdhDjCdgp .L_16_blocks_overflow_EkueqaGdhDjCdgp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_EkueqaGdhDjCdgp: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_heqAoqbbuAkcyrx subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_heqAoqbbuAkcyrx .L_small_initial_partial_block_heqAoqbbuAkcyrx: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_heqAoqbbuAkcyrx: orq %r8,%r8 je .L_after_reduction_heqAoqbbuAkcyrx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_heqAoqbbuAkcyrx: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_10_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_wvgCfboudsrmujp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_wvgCfboudsrmujp .L_16_blocks_overflow_wvgCfboudsrmujp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_wvgCfboudsrmujp: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yxeqEqghwAplnqh subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yxeqEqghwAplnqh .L_small_initial_partial_block_yxeqEqghwAplnqh: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yxeqEqghwAplnqh: orq %r8,%r8 je .L_after_reduction_yxeqEqghwAplnqh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yxeqEqghwAplnqh: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_11_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_cwemdvzqaqrBmvF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_cwemdvzqaqrBmvF .L_16_blocks_overflow_cwemdvzqaqrBmvF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_cwemdvzqaqrBmvF: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tngolGfEmxmwAAg subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tngolGfEmxmwAAg .L_small_initial_partial_block_tngolGfEmxmwAAg: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tngolGfEmxmwAAg: orq %r8,%r8 je .L_after_reduction_tngolGfEmxmwAAg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tngolGfEmxmwAAg: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_12_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_viscCxhaitpgcDa vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_viscCxhaitpgcDa .L_16_blocks_overflow_viscCxhaitpgcDa: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_viscCxhaitpgcDa: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AEGqAevCpluaCEe subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AEGqAevCpluaCEe .L_small_initial_partial_block_AEGqAevCpluaCEe: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AEGqAevCpluaCEe: orq %r8,%r8 je .L_after_reduction_AEGqAevCpluaCEe vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AEGqAevCpluaCEe: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_13_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_aswqypGGFyocuvD vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_aswqypGGFyocuvD .L_16_blocks_overflow_aswqypGGFyocuvD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_aswqypGGFyocuvD: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ddibpDBalvcbdjr subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ddibpDBalvcbdjr .L_small_initial_partial_block_ddibpDBalvcbdjr: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ddibpDBalvcbdjr: orq %r8,%r8 je .L_after_reduction_ddibpDBalvcbdjr vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ddibpDBalvcbdjr: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_14_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_uDoedupEeCpfBar vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_uDoedupEeCpfBar .L_16_blocks_overflow_uDoedupEeCpfBar: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_uDoedupEeCpfBar: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AilxjDdBvvoizqE subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AilxjDdBvvoizqE .L_small_initial_partial_block_AilxjDdBvvoizqE: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AilxjDdBvvoizqE: orq %r8,%r8 je .L_after_reduction_AilxjDdBvvoizqE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AilxjDdBvvoizqE: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_15_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_qsiCcemvFCbgltw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_qsiCcemvFCbgltw .L_16_blocks_overflow_qsiCcemvFCbgltw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_qsiCcemvFCbgltw: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_uvFingxredipaxs subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_uvFingxredipaxs .L_small_initial_partial_block_uvFingxredipaxs: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uvFingxredipaxs: orq %r8,%r8 je .L_after_reduction_uvFingxredipaxs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_uvFingxredipaxs: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_16_DkxrwjzcAFtwGmv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_pAbgwDdgnghCfey vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_pAbgwDdgnghCfey .L_16_blocks_overflow_pAbgwDdgnghCfey: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_pAbgwDdgnghCfey: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_fFkawEbFoBxjEyl: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fFkawEbFoBxjEyl: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_fFkawEbFoBxjEyl: jmp .L_last_blocks_done_DkxrwjzcAFtwGmv .L_last_num_blocks_is_0_DkxrwjzcAFtwGmv: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_DkxrwjzcAFtwGmv: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_tFbkipsuzBAeEGF .L_message_below_32_blocks_tFbkipsuzBAeEGF: subq $256,%r8 addq $256,%r11 movl %r8d,%r10d testq %r14,%r14 jnz .L_skip_hkeys_precomputation_lpEjyDrFbrgBuyj vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_lpEjyDrFbrgBuyj: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_wmGtzaxjkAduAzk cmpl $8,%r10d je .L_last_num_blocks_is_8_wmGtzaxjkAduAzk jb .L_last_num_blocks_is_7_1_wmGtzaxjkAduAzk cmpl $12,%r10d je .L_last_num_blocks_is_12_wmGtzaxjkAduAzk jb .L_last_num_blocks_is_11_9_wmGtzaxjkAduAzk cmpl $15,%r10d je .L_last_num_blocks_is_15_wmGtzaxjkAduAzk ja .L_last_num_blocks_is_16_wmGtzaxjkAduAzk cmpl $14,%r10d je .L_last_num_blocks_is_14_wmGtzaxjkAduAzk jmp .L_last_num_blocks_is_13_wmGtzaxjkAduAzk .L_last_num_blocks_is_11_9_wmGtzaxjkAduAzk: cmpl $10,%r10d je .L_last_num_blocks_is_10_wmGtzaxjkAduAzk ja .L_last_num_blocks_is_11_wmGtzaxjkAduAzk jmp .L_last_num_blocks_is_9_wmGtzaxjkAduAzk .L_last_num_blocks_is_7_1_wmGtzaxjkAduAzk: cmpl $4,%r10d je .L_last_num_blocks_is_4_wmGtzaxjkAduAzk jb .L_last_num_blocks_is_3_1_wmGtzaxjkAduAzk cmpl $6,%r10d ja .L_last_num_blocks_is_7_wmGtzaxjkAduAzk je .L_last_num_blocks_is_6_wmGtzaxjkAduAzk jmp .L_last_num_blocks_is_5_wmGtzaxjkAduAzk .L_last_num_blocks_is_3_1_wmGtzaxjkAduAzk: cmpl $2,%r10d ja .L_last_num_blocks_is_3_wmGtzaxjkAduAzk je .L_last_num_blocks_is_2_wmGtzaxjkAduAzk .L_last_num_blocks_is_1_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_zAppBdlpFnqjcjn vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_zAppBdlpFnqjcjn .L_16_blocks_overflow_zAppBdlpFnqjcjn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_zAppBdlpFnqjcjn: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_ohletviGGDnsqsh subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ohletviGGDnsqsh .L_small_initial_partial_block_ohletviGGDnsqsh: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_ohletviGGDnsqsh .L_small_initial_compute_done_ohletviGGDnsqsh: .L_after_reduction_ohletviGGDnsqsh: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_2_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_bApGhpvksEbgnlq vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_bApGhpvksEbgnlq .L_16_blocks_overflow_bApGhpvksEbgnlq: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_bApGhpvksEbgnlq: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_atfqpoawbrCaGCo subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_atfqpoawbrCaGCo .L_small_initial_partial_block_atfqpoawbrCaGCo: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_atfqpoawbrCaGCo: orq %r8,%r8 je .L_after_reduction_atfqpoawbrCaGCo vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_atfqpoawbrCaGCo: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_3_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_ngmcavmrDqtqduc vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_ngmcavmrDqtqduc .L_16_blocks_overflow_ngmcavmrDqtqduc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_ngmcavmrDqtqduc: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EgjBqgvkBgauzsF subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EgjBqgvkBgauzsF .L_small_initial_partial_block_EgjBqgvkBgauzsF: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EgjBqgvkBgauzsF: orq %r8,%r8 je .L_after_reduction_EgjBqgvkBgauzsF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EgjBqgvkBgauzsF: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_4_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_oDoDxdeeEEpoaof vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_oDoDxdeeEEpoaof .L_16_blocks_overflow_oDoDxdeeEEpoaof: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_oDoDxdeeEEpoaof: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_akFyBqpssGEhllv subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_akFyBqpssGEhllv .L_small_initial_partial_block_akFyBqpssGEhllv: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_akFyBqpssGEhllv: orq %r8,%r8 je .L_after_reduction_akFyBqpssGEhllv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_akFyBqpssGEhllv: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_5_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_vwvElrjpjpxAvis vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_vwvElrjpjpxAvis .L_16_blocks_overflow_vwvElrjpjpxAvis: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_vwvElrjpjpxAvis: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DFFzfAbyBGFnoDn subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DFFzfAbyBGFnoDn .L_small_initial_partial_block_DFFzfAbyBGFnoDn: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DFFzfAbyBGFnoDn: orq %r8,%r8 je .L_after_reduction_DFFzfAbyBGFnoDn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DFFzfAbyBGFnoDn: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_6_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_vyDvhDFpixkDdnk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_vyDvhDFpixkDdnk .L_16_blocks_overflow_vyDvhDFpixkDdnk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_vyDvhDFpixkDdnk: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FEocggExrFlAoic subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FEocggExrFlAoic .L_small_initial_partial_block_FEocggExrFlAoic: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FEocggExrFlAoic: orq %r8,%r8 je .L_after_reduction_FEocggExrFlAoic vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FEocggExrFlAoic: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_7_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_fvtxctukrBFoshm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_fvtxctukrBFoshm .L_16_blocks_overflow_fvtxctukrBFoshm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_fvtxctukrBFoshm: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zsgnBgnADqqaFdG subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zsgnBgnADqqaFdG .L_small_initial_partial_block_zsgnBgnADqqaFdG: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zsgnBgnADqqaFdG: orq %r8,%r8 je .L_after_reduction_zsgnBgnADqqaFdG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zsgnBgnADqqaFdG: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_8_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_ACyFnxEijEcdofC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_ACyFnxEijEcdofC .L_16_blocks_overflow_ACyFnxEijEcdofC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_ACyFnxEijEcdofC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pinsyEqvsAdoiak subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pinsyEqvsAdoiak .L_small_initial_partial_block_pinsyEqvsAdoiak: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pinsyEqvsAdoiak: orq %r8,%r8 je .L_after_reduction_pinsyEqvsAdoiak vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pinsyEqvsAdoiak: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_9_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_AhlgEzovddtvDon vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_AhlgEzovddtvDon .L_16_blocks_overflow_AhlgEzovddtvDon: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_AhlgEzovddtvDon: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dgkfebGqcuDCjgt subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dgkfebGqcuDCjgt .L_small_initial_partial_block_dgkfebGqcuDCjgt: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dgkfebGqcuDCjgt: orq %r8,%r8 je .L_after_reduction_dgkfebGqcuDCjgt vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dgkfebGqcuDCjgt: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_10_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_AcoEnlwuyyjhDuq vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_AcoEnlwuyyjhDuq .L_16_blocks_overflow_AcoEnlwuyyjhDuq: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_AcoEnlwuyyjhDuq: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_upsmGyaxeoyuGwq subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_upsmGyaxeoyuGwq .L_small_initial_partial_block_upsmGyaxeoyuGwq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_upsmGyaxeoyuGwq: orq %r8,%r8 je .L_after_reduction_upsmGyaxeoyuGwq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_upsmGyaxeoyuGwq: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_11_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_coDokyrbzujjnFG vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_coDokyrbzujjnFG .L_16_blocks_overflow_coDokyrbzujjnFG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_coDokyrbzujjnFG: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dtFFjiEElouyrlF subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dtFFjiEElouyrlF .L_small_initial_partial_block_dtFFjiEElouyrlF: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dtFFjiEElouyrlF: orq %r8,%r8 je .L_after_reduction_dtFFjiEElouyrlF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dtFFjiEElouyrlF: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_12_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_uvhijsplaEEmlke vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_uvhijsplaEEmlke .L_16_blocks_overflow_uvhijsplaEEmlke: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_uvhijsplaEEmlke: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_sArmCAuDwnDnahw subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sArmCAuDwnDnahw .L_small_initial_partial_block_sArmCAuDwnDnahw: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_sArmCAuDwnDnahw: orq %r8,%r8 je .L_after_reduction_sArmCAuDwnDnahw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_sArmCAuDwnDnahw: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_13_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_dCqAGwyhtFDDhuf vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_dCqAGwyhtFDDhuf .L_16_blocks_overflow_dCqAGwyhtFDDhuf: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_dCqAGwyhtFDDhuf: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AoFriGggjmCqdFe subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AoFriGggjmCqdFe .L_small_initial_partial_block_AoFriGggjmCqdFe: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AoFriGggjmCqdFe: orq %r8,%r8 je .L_after_reduction_AoFriGggjmCqdFe vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AoFriGggjmCqdFe: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_14_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_eymtigzEympdfbq vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_eymtigzEympdfbq .L_16_blocks_overflow_eymtigzEympdfbq: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_eymtigzEympdfbq: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_psAhdEAgnjgwhnp subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_psAhdEAgnjgwhnp .L_small_initial_partial_block_psAhdEAgnjgwhnp: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_psAhdEAgnjgwhnp: orq %r8,%r8 je .L_after_reduction_psAhdEAgnjgwhnp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_psAhdEAgnjgwhnp: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_15_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_qGavfpFFnvaCwAd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_qGavfpFFnvaCwAd .L_16_blocks_overflow_qGavfpFFnvaCwAd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_qGavfpFFnvaCwAd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DBkpyuBbpopmDCv subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DBkpyuBbpopmDCv .L_small_initial_partial_block_DBkpyuBbpopmDCv: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DBkpyuBbpopmDCv: orq %r8,%r8 je .L_after_reduction_DBkpyuBbpopmDCv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DBkpyuBbpopmDCv: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_16_wmGtzaxjkAduAzk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_jfFqqEmsqrheBbh vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_jfFqqEmsqrheBbh .L_16_blocks_overflow_jfFqqEmsqrheBbh: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_jfFqqEmsqrheBbh: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_CEafoEfoaioCrtB: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CEafoEfoaioCrtB: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CEafoEfoaioCrtB: jmp .L_last_blocks_done_wmGtzaxjkAduAzk .L_last_num_blocks_is_0_wmGtzaxjkAduAzk: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_wmGtzaxjkAduAzk: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_tFbkipsuzBAeEGF .L_message_below_equal_16_blocks_tFbkipsuzBAeEGF: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_tpcppgjkDAAGbmz jl .L_small_initial_num_blocks_is_7_1_tpcppgjkDAAGbmz cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_tpcppgjkDAAGbmz jl .L_small_initial_num_blocks_is_11_9_tpcppgjkDAAGbmz cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_tpcppgjkDAAGbmz cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_tpcppgjkDAAGbmz cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_tpcppgjkDAAGbmz jmp .L_small_initial_num_blocks_is_13_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_11_9_tpcppgjkDAAGbmz: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_tpcppgjkDAAGbmz cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_tpcppgjkDAAGbmz jmp .L_small_initial_num_blocks_is_9_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_7_1_tpcppgjkDAAGbmz: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_tpcppgjkDAAGbmz jl .L_small_initial_num_blocks_is_3_1_tpcppgjkDAAGbmz cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_tpcppgjkDAAGbmz cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_tpcppgjkDAAGbmz jmp .L_small_initial_num_blocks_is_5_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_3_1_tpcppgjkDAAGbmz: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_tpcppgjkDAAGbmz cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_1_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONE(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %xmm15,%xmm0,%xmm0 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 cmpq $16,%r8 jl .L_small_initial_partial_block_vkGpbehGialtrzj subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vkGpbehGialtrzj .L_small_initial_partial_block_vkGpbehGialtrzj: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_vkGpbehGialtrzj .L_small_initial_compute_done_vkGpbehGialtrzj: .L_after_reduction_vkGpbehGialtrzj: jmp .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_2_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %ymm15,%ymm0,%ymm0 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yrCuttqEucBxwFi subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yrCuttqEucBxwFi .L_small_initial_partial_block_yrCuttqEucBxwFi: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yrCuttqEucBxwFi: orq %r8,%r8 je .L_after_reduction_yrCuttqEucBxwFi vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_yrCuttqEucBxwFi: jmp .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_3_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kgsCrgatEoGephk subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kgsCrgatEoGephk .L_small_initial_partial_block_kgsCrgatEoGephk: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kgsCrgatEoGephk: orq %r8,%r8 je .L_after_reduction_kgsCrgatEoGephk vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_kgsCrgatEoGephk: jmp .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_4_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_flxrhfiogcrnqye subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_flxrhfiogcrnqye .L_small_initial_partial_block_flxrhfiogcrnqye: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_flxrhfiogcrnqye: orq %r8,%r8 je .L_after_reduction_flxrhfiogcrnqye vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_flxrhfiogcrnqye: jmp .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_5_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %xmm15,%xmm3,%xmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %xmm29,%xmm3,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gFzmwxijGDfbEEt subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gFzmwxijGDfbEEt .L_small_initial_partial_block_gFzmwxijGDfbEEt: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gFzmwxijGDfbEEt: orq %r8,%r8 je .L_after_reduction_gFzmwxijGDfbEEt vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_gFzmwxijGDfbEEt: jmp .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_6_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %ymm15,%ymm3,%ymm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %ymm29,%ymm3,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ywvaiFFsGziikok subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ywvaiFFsGziikok .L_small_initial_partial_block_ywvaiFFsGziikok: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ywvaiFFsGziikok: orq %r8,%r8 je .L_after_reduction_ywvaiFFsGziikok vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_ywvaiFFsGziikok: jmp .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_7_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vjjxFhBDbbgteCx subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vjjxFhBDbbgteCx .L_small_initial_partial_block_vjjxFhBDbbgteCx: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vjjxFhBDbbgteCx: orq %r8,%r8 je .L_after_reduction_vjjxFhBDbbgteCx vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_vjjxFhBDbbgteCx: jmp .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_8_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jvbFniEeBiBFBmv subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jvbFniEeBiBFBmv .L_small_initial_partial_block_jvbFniEeBiBFBmv: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jvbFniEeBiBFBmv: orq %r8,%r8 je .L_after_reduction_jvbFniEeBiBFBmv vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_jvbFniEeBiBFBmv: jmp .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_9_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %xmm15,%xmm4,%xmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %xmm29,%xmm4,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zyfCoCjsyFFnpwn subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zyfCoCjsyFFnpwn .L_small_initial_partial_block_zyfCoCjsyFFnpwn: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zyfCoCjsyFFnpwn: orq %r8,%r8 je .L_after_reduction_zyfCoCjsyFFnpwn vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_zyfCoCjsyFFnpwn: jmp .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_10_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %ymm15,%ymm4,%ymm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %ymm29,%ymm4,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GlGwjupayCEmAmk subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GlGwjupayCEmAmk .L_small_initial_partial_block_GlGwjupayCEmAmk: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GlGwjupayCEmAmk: orq %r8,%r8 je .L_after_reduction_GlGwjupayCEmAmk vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_GlGwjupayCEmAmk: jmp .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_11_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AedaxoBdGfervsb subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AedaxoBdGfervsb .L_small_initial_partial_block_AedaxoBdGfervsb: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AedaxoBdGfervsb: orq %r8,%r8 je .L_after_reduction_AedaxoBdGfervsb vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_AedaxoBdGfervsb: jmp .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_12_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zfkGparhhvDqahn subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zfkGparhhvDqahn .L_small_initial_partial_block_zfkGparhhvDqahn: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zfkGparhhvDqahn: orq %r8,%r8 je .L_after_reduction_zfkGparhhvDqahn vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_zfkGparhhvDqahn: jmp .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_13_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %xmm15,%xmm5,%xmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %xmm29,%xmm5,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_uDsrwxuwAvaluno subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_uDsrwxuwAvaluno .L_small_initial_partial_block_uDsrwxuwAvaluno: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uDsrwxuwAvaluno: orq %r8,%r8 je .L_after_reduction_uDsrwxuwAvaluno vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_uDsrwxuwAvaluno: jmp .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_14_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %ymm15,%ymm5,%ymm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %ymm29,%ymm5,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_awnsCplrcfgEbDA subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_awnsCplrcfgEbDA .L_small_initial_partial_block_awnsCplrcfgEbDA: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_awnsCplrcfgEbDA: orq %r8,%r8 je .L_after_reduction_awnsCplrcfgEbDA vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_awnsCplrcfgEbDA: jmp .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_15_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hgEBfdDtdFvGqjb subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hgEBfdDtdFvGqjb .L_small_initial_partial_block_hgEBfdDtdFvGqjb: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hgEBfdDtdFvGqjb: orq %r8,%r8 je .L_after_reduction_hgEBfdDtdFvGqjb vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_hgEBfdDtdFvGqjb: jmp .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz .L_small_initial_num_blocks_is_16_tpcppgjkDAAGbmz: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_mbufndcrlyapBCF: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mbufndcrlyapBCF: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_mbufndcrlyapBCF: .L_small_initial_blocks_encrypted_tpcppgjkDAAGbmz: .L_ghash_done_tFbkipsuzBAeEGF: vmovdqu64 %xmm2,0(%rsi) vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_done_tFbkipsuzBAeEGF: jmp .Lexit_gcm_encrypt .align 32 .Laes_gcm_encrypt_256_avx512: orq %r8,%r8 je .L_enc_dec_done_eawnuBpGmxcBoDC xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 movq (%rdx),%r11 orq %r11,%r11 je .L_partial_block_done_yomlCiqlqyhGbxA movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 vmovdqu64 336(%rsi),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %r11,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%r11,1),%r13 subq $16,%r13 jge .L_no_extra_mask_yomlCiqlqyhGbxA subq %r13,%r12 .L_no_extra_mask_yomlCiqlqyhGbxA: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm3,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_yomlCiqlqyhGbxA vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 vpsrldq $4,%xmm10,%xmm10 vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movq $0,(%rdx) movq %r11,%r12 movq $16,%r11 subq %r12,%r11 jmp .L_enc_dec_done_yomlCiqlqyhGbxA .L_partial_incomplete_yomlCiqlqyhGbxA: addq %r8,(%rdx) movq %r8,%r11 .L_enc_dec_done_yomlCiqlqyhGbxA: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%r11,2),%k1 vmovdqu64 %xmm14,64(%rsi) vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_yomlCiqlqyhGbxA: vmovdqu64 0(%rsi),%xmm2 subq %r11,%r8 je .L_enc_dec_done_eawnuBpGmxcBoDC cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_eawnuBpGmxcBoDC vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_iqGewgDgqvuhkra vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_iqGewgDgqvuhkra .L_next_16_overflow_iqGewgDgqvuhkra: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_iqGewgDgqvuhkra: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%r11,1),%zmm0 vmovdqu8 64(%rcx,%r11,1),%zmm3 vmovdqu8 128(%rcx,%r11,1),%zmm4 vmovdqu8 192(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 208(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 224(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%r11,1) vmovdqu8 %zmm10,64(%r10,%r11,1) vmovdqu8 %zmm11,128(%r10,%r11,1) vmovdqu8 %zmm12,192(%r10,%r11,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_alwniGiGuuwbdou vmovdqu64 288(%rsi),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 224(%rsi),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 160(%rsi),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 96(%rsi),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_alwniGiGuuwbdou: cmpq $512,%r8 jb .L_message_below_32_blocks_eawnuBpGmxcBoDC cmpb $240,%r15b jae .L_next_16_overflow_wkhDhbijnuGGCmD vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_wkhDhbijnuGGCmD .L_next_16_overflow_wkhDhbijnuGGCmD: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_wkhDhbijnuGGCmD: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%r11,1),%zmm0 vmovdqu8 320(%rcx,%r11,1),%zmm3 vmovdqu8 384(%rcx,%r11,1),%zmm4 vmovdqu8 448(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 208(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 224(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%r11,1) vmovdqu8 %zmm10,320(%r10,%r11,1) vmovdqu8 %zmm11,384(%r10,%r11,1) vmovdqu8 %zmm12,448(%r10,%r11,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_xuEcimfukbaBqDu vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_xuEcimfukbaBqDu: movq $1,%r14 addq $512,%r11 subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_eawnuBpGmxcBoDC .L_encrypt_big_nblocks_eawnuBpGmxcBoDC: cmpb $240,%r15b jae .L_16_blocks_overflow_hsjyfxApibhdaao vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_hsjyfxApibhdaao .L_16_blocks_overflow_hsjyfxApibhdaao: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_hsjyfxApibhdaao: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_FyafAtAzhgGauwk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_FyafAtAzhgGauwk .L_16_blocks_overflow_FyafAtAzhgGauwk: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_FyafAtAzhgGauwk: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_mshygnywvbAbxuk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_mshygnywvbAbxuk .L_16_blocks_overflow_mshygnywvbAbxuk: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_mshygnywvbAbxuk: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%r11,1),%zmm17 vmovdqu8 576(%rcx,%r11,1),%zmm19 vmovdqu8 640(%rcx,%r11,1),%zmm20 vmovdqu8 704(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%r11,1) vmovdqu8 %zmm3,576(%r10,%r11,1) vmovdqu8 %zmm4,640(%r10,%r11,1) vmovdqu8 %zmm5,704(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%r11 subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_eawnuBpGmxcBoDC .L_no_more_big_nblocks_eawnuBpGmxcBoDC: cmpq $512,%r8 jae .L_encrypt_32_blocks_eawnuBpGmxcBoDC cmpq $256,%r8 jae .L_encrypt_16_blocks_eawnuBpGmxcBoDC .L_encrypt_0_blocks_ghash_32_eawnuBpGmxcBoDC: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_CAikcjdGDugFfth cmpl $8,%r10d je .L_last_num_blocks_is_8_CAikcjdGDugFfth jb .L_last_num_blocks_is_7_1_CAikcjdGDugFfth cmpl $12,%r10d je .L_last_num_blocks_is_12_CAikcjdGDugFfth jb .L_last_num_blocks_is_11_9_CAikcjdGDugFfth cmpl $15,%r10d je .L_last_num_blocks_is_15_CAikcjdGDugFfth ja .L_last_num_blocks_is_16_CAikcjdGDugFfth cmpl $14,%r10d je .L_last_num_blocks_is_14_CAikcjdGDugFfth jmp .L_last_num_blocks_is_13_CAikcjdGDugFfth .L_last_num_blocks_is_11_9_CAikcjdGDugFfth: cmpl $10,%r10d je .L_last_num_blocks_is_10_CAikcjdGDugFfth ja .L_last_num_blocks_is_11_CAikcjdGDugFfth jmp .L_last_num_blocks_is_9_CAikcjdGDugFfth .L_last_num_blocks_is_7_1_CAikcjdGDugFfth: cmpl $4,%r10d je .L_last_num_blocks_is_4_CAikcjdGDugFfth jb .L_last_num_blocks_is_3_1_CAikcjdGDugFfth cmpl $6,%r10d ja .L_last_num_blocks_is_7_CAikcjdGDugFfth je .L_last_num_blocks_is_6_CAikcjdGDugFfth jmp .L_last_num_blocks_is_5_CAikcjdGDugFfth .L_last_num_blocks_is_3_1_CAikcjdGDugFfth: cmpl $2,%r10d ja .L_last_num_blocks_is_3_CAikcjdGDugFfth je .L_last_num_blocks_is_2_CAikcjdGDugFfth .L_last_num_blocks_is_1_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_xFvljgxvqrrjiEx vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_xFvljgxvqrrjiEx .L_16_blocks_overflow_xFvljgxvqrrjiEx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_xFvljgxvqrrjiEx: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_qxurhxfinuxAakr subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qxurhxfinuxAakr .L_small_initial_partial_block_qxurhxfinuxAakr: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_qxurhxfinuxAakr .L_small_initial_compute_done_qxurhxfinuxAakr: .L_after_reduction_qxurhxfinuxAakr: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_2_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_jkwkgdBwnfqtmoz vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_jkwkgdBwnfqtmoz .L_16_blocks_overflow_jkwkgdBwnfqtmoz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_jkwkgdBwnfqtmoz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FuEgfclAfodbltt subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FuEgfclAfodbltt .L_small_initial_partial_block_FuEgfclAfodbltt: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FuEgfclAfodbltt: orq %r8,%r8 je .L_after_reduction_FuEgfclAfodbltt vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FuEgfclAfodbltt: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_3_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_rlpicECjalEogkA vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_rlpicECjalEogkA .L_16_blocks_overflow_rlpicECjalEogkA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_rlpicECjalEogkA: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_CuzDDhbEvttwEEk subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CuzDDhbEvttwEEk .L_small_initial_partial_block_CuzDDhbEvttwEEk: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CuzDDhbEvttwEEk: orq %r8,%r8 je .L_after_reduction_CuzDDhbEvttwEEk vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CuzDDhbEvttwEEk: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_4_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_gqkAClvbnegzAmA vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_gqkAClvbnegzAmA .L_16_blocks_overflow_gqkAClvbnegzAmA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_gqkAClvbnegzAmA: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xcnzwhtrnbgDqfy subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xcnzwhtrnbgDqfy .L_small_initial_partial_block_xcnzwhtrnbgDqfy: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xcnzwhtrnbgDqfy: orq %r8,%r8 je .L_after_reduction_xcnzwhtrnbgDqfy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xcnzwhtrnbgDqfy: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_5_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_FklAbbifjuDAcpD vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_FklAbbifjuDAcpD .L_16_blocks_overflow_FklAbbifjuDAcpD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_FklAbbifjuDAcpD: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_oxoctmohDgCBefA subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_oxoctmohDgCBefA .L_small_initial_partial_block_oxoctmohDgCBefA: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_oxoctmohDgCBefA: orq %r8,%r8 je .L_after_reduction_oxoctmohDgCBefA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_oxoctmohDgCBefA: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_6_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_odCCAydbBFAapzd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_odCCAydbBFAapzd .L_16_blocks_overflow_odCCAydbBFAapzd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_odCCAydbBFAapzd: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qlwikcksldoilrG subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qlwikcksldoilrG .L_small_initial_partial_block_qlwikcksldoilrG: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qlwikcksldoilrG: orq %r8,%r8 je .L_after_reduction_qlwikcksldoilrG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qlwikcksldoilrG: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_7_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_mjwDlmhvzElddng vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_mjwDlmhvzElddng .L_16_blocks_overflow_mjwDlmhvzElddng: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_mjwDlmhvzElddng: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_unqgfDFcvabkGta subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_unqgfDFcvabkGta .L_small_initial_partial_block_unqgfDFcvabkGta: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_unqgfDFcvabkGta: orq %r8,%r8 je .L_after_reduction_unqgfDFcvabkGta vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_unqgfDFcvabkGta: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_8_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_EinBcyEEyChknsj vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_EinBcyEEyChknsj .L_16_blocks_overflow_EinBcyEEyChknsj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_EinBcyEEyChknsj: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ejuhaaqjamhcjqF subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ejuhaaqjamhcjqF .L_small_initial_partial_block_ejuhaaqjamhcjqF: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ejuhaaqjamhcjqF: orq %r8,%r8 je .L_after_reduction_ejuhaaqjamhcjqF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ejuhaaqjamhcjqF: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_9_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_mhxEmCxxjyDqdDo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_mhxEmCxxjyDqdDo .L_16_blocks_overflow_mhxEmCxxjyDqdDo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_mhxEmCxxjyDqdDo: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zdofzxhsAexptkx subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zdofzxhsAexptkx .L_small_initial_partial_block_zdofzxhsAexptkx: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zdofzxhsAexptkx: orq %r8,%r8 je .L_after_reduction_zdofzxhsAexptkx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zdofzxhsAexptkx: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_10_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_rvskGvkumwEhhsc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_rvskGvkumwEhhsc .L_16_blocks_overflow_rvskGvkumwEhhsc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_rvskGvkumwEhhsc: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gngjmGDkBquyveG subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gngjmGDkBquyveG .L_small_initial_partial_block_gngjmGDkBquyveG: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gngjmGDkBquyveG: orq %r8,%r8 je .L_after_reduction_gngjmGDkBquyveG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_gngjmGDkBquyveG: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_11_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_Dtnnktpbavbarsp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_Dtnnktpbavbarsp .L_16_blocks_overflow_Dtnnktpbavbarsp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_Dtnnktpbavbarsp: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xfvylkhgAonGlpn subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xfvylkhgAonGlpn .L_small_initial_partial_block_xfvylkhgAonGlpn: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xfvylkhgAonGlpn: orq %r8,%r8 je .L_after_reduction_xfvylkhgAonGlpn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xfvylkhgAonGlpn: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_12_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_bpklztjgEEdhFxz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_bpklztjgEEdhFxz .L_16_blocks_overflow_bpklztjgEEdhFxz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_bpklztjgEEdhFxz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dgtbwzqgvnDyDmt subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dgtbwzqgvnDyDmt .L_small_initial_partial_block_dgtbwzqgvnDyDmt: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dgtbwzqgvnDyDmt: orq %r8,%r8 je .L_after_reduction_dgtbwzqgvnDyDmt vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dgtbwzqgvnDyDmt: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_13_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_BBkhDhGlvcaehas vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_BBkhDhGlvcaehas .L_16_blocks_overflow_BBkhDhGlvcaehas: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_BBkhDhGlvcaehas: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_euhapEbhfhxemzw subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_euhapEbhfhxemzw .L_small_initial_partial_block_euhapEbhfhxemzw: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_euhapEbhfhxemzw: orq %r8,%r8 je .L_after_reduction_euhapEbhfhxemzw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_euhapEbhfhxemzw: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_14_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_wFmlAewyxkiABzu vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_wFmlAewyxkiABzu .L_16_blocks_overflow_wFmlAewyxkiABzu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_wFmlAewyxkiABzu: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xleiaowmorzhxfq subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xleiaowmorzhxfq .L_small_initial_partial_block_xleiaowmorzhxfq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xleiaowmorzhxfq: orq %r8,%r8 je .L_after_reduction_xleiaowmorzhxfq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xleiaowmorzhxfq: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_15_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_fwmFnlmCbhngvtq vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_fwmFnlmCbhngvtq .L_16_blocks_overflow_fwmFnlmCbhngvtq: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_fwmFnlmCbhngvtq: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_Cwwewmiesghaixp subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_Cwwewmiesghaixp .L_small_initial_partial_block_Cwwewmiesghaixp: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_Cwwewmiesghaixp: orq %r8,%r8 je .L_after_reduction_Cwwewmiesghaixp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_Cwwewmiesghaixp: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_16_CAikcjdGDugFfth: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_xEdGzjmGszadGFy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_xEdGzjmGszadGFy .L_16_blocks_overflow_xEdGzjmGszadGFy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_xEdGzjmGszadGFy: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_fphazgGgmEuxiEi: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fphazgGgmEuxiEi: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_fphazgGgmEuxiEi: jmp .L_last_blocks_done_CAikcjdGDugFfth .L_last_num_blocks_is_0_CAikcjdGDugFfth: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_CAikcjdGDugFfth: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_eawnuBpGmxcBoDC .L_encrypt_32_blocks_eawnuBpGmxcBoDC: cmpb $240,%r15b jae .L_16_blocks_overflow_fxEfrxCahjuywkw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_fxEfrxCahjuywkw .L_16_blocks_overflow_fxEfrxCahjuywkw: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_fxEfrxCahjuywkw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_iwxfgjgfFyEczhg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_iwxfgjgfFyEczhg .L_16_blocks_overflow_iwxfgjgfFyEczhg: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_iwxfgjgfFyEczhg: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%r11 movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_muvbsvrgtnhDwuC cmpl $8,%r10d je .L_last_num_blocks_is_8_muvbsvrgtnhDwuC jb .L_last_num_blocks_is_7_1_muvbsvrgtnhDwuC cmpl $12,%r10d je .L_last_num_blocks_is_12_muvbsvrgtnhDwuC jb .L_last_num_blocks_is_11_9_muvbsvrgtnhDwuC cmpl $15,%r10d je .L_last_num_blocks_is_15_muvbsvrgtnhDwuC ja .L_last_num_blocks_is_16_muvbsvrgtnhDwuC cmpl $14,%r10d je .L_last_num_blocks_is_14_muvbsvrgtnhDwuC jmp .L_last_num_blocks_is_13_muvbsvrgtnhDwuC .L_last_num_blocks_is_11_9_muvbsvrgtnhDwuC: cmpl $10,%r10d je .L_last_num_blocks_is_10_muvbsvrgtnhDwuC ja .L_last_num_blocks_is_11_muvbsvrgtnhDwuC jmp .L_last_num_blocks_is_9_muvbsvrgtnhDwuC .L_last_num_blocks_is_7_1_muvbsvrgtnhDwuC: cmpl $4,%r10d je .L_last_num_blocks_is_4_muvbsvrgtnhDwuC jb .L_last_num_blocks_is_3_1_muvbsvrgtnhDwuC cmpl $6,%r10d ja .L_last_num_blocks_is_7_muvbsvrgtnhDwuC je .L_last_num_blocks_is_6_muvbsvrgtnhDwuC jmp .L_last_num_blocks_is_5_muvbsvrgtnhDwuC .L_last_num_blocks_is_3_1_muvbsvrgtnhDwuC: cmpl $2,%r10d ja .L_last_num_blocks_is_3_muvbsvrgtnhDwuC je .L_last_num_blocks_is_2_muvbsvrgtnhDwuC .L_last_num_blocks_is_1_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_sCioAEgxkAkBsms vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_sCioAEgxkAkBsms .L_16_blocks_overflow_sCioAEgxkAkBsms: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_sCioAEgxkAkBsms: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_iuEEnvAblnyuBEp subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_iuEEnvAblnyuBEp .L_small_initial_partial_block_iuEEnvAblnyuBEp: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_iuEEnvAblnyuBEp .L_small_initial_compute_done_iuEEnvAblnyuBEp: .L_after_reduction_iuEEnvAblnyuBEp: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_2_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_syraAlmuhpzefuz vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_syraAlmuhpzefuz .L_16_blocks_overflow_syraAlmuhpzefuz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_syraAlmuhpzefuz: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wklxqcsAiCzEeze subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wklxqcsAiCzEeze .L_small_initial_partial_block_wklxqcsAiCzEeze: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wklxqcsAiCzEeze: orq %r8,%r8 je .L_after_reduction_wklxqcsAiCzEeze vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wklxqcsAiCzEeze: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_3_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_iccrdFDrrokpmyB vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_iccrdFDrrokpmyB .L_16_blocks_overflow_iccrdFDrrokpmyB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_iccrdFDrrokpmyB: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ohaugBufhhdgdDo subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ohaugBufhhdgdDo .L_small_initial_partial_block_ohaugBufhhdgdDo: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ohaugBufhhdgdDo: orq %r8,%r8 je .L_after_reduction_ohaugBufhhdgdDo vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ohaugBufhhdgdDo: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_4_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_jkieEplbtgwkEgk vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_jkieEplbtgwkEgk .L_16_blocks_overflow_jkieEplbtgwkEgk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_jkieEplbtgwkEgk: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_omkzepGnFhlDsok subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_omkzepGnFhlDsok .L_small_initial_partial_block_omkzepGnFhlDsok: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_omkzepGnFhlDsok: orq %r8,%r8 je .L_after_reduction_omkzepGnFhlDsok vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_omkzepGnFhlDsok: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_5_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_vtnqanBpwpcCkvb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_vtnqanBpwpcCkvb .L_16_blocks_overflow_vtnqanBpwpcCkvb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_vtnqanBpwpcCkvb: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DiateEzAgclciak subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DiateEzAgclciak .L_small_initial_partial_block_DiateEzAgclciak: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DiateEzAgclciak: orq %r8,%r8 je .L_after_reduction_DiateEzAgclciak vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DiateEzAgclciak: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_6_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_oakjAwsnClAznod vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_oakjAwsnClAznod .L_16_blocks_overflow_oakjAwsnClAznod: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_oakjAwsnClAznod: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_oqCwqiEfmwxEduu subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_oqCwqiEfmwxEduu .L_small_initial_partial_block_oqCwqiEfmwxEduu: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_oqCwqiEfmwxEduu: orq %r8,%r8 je .L_after_reduction_oqCwqiEfmwxEduu vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_oqCwqiEfmwxEduu: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_7_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_lhrubptnEwwxvoi vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_lhrubptnEwwxvoi .L_16_blocks_overflow_lhrubptnEwwxvoi: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_lhrubptnEwwxvoi: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lyGDbaegdAnFgEy subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lyGDbaegdAnFgEy .L_small_initial_partial_block_lyGDbaegdAnFgEy: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lyGDbaegdAnFgEy: orq %r8,%r8 je .L_after_reduction_lyGDbaegdAnFgEy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lyGDbaegdAnFgEy: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_8_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_umvkbciEsdgFrgg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_umvkbciEsdgFrgg .L_16_blocks_overflow_umvkbciEsdgFrgg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_umvkbciEsdgFrgg: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ogfGBxxhhoalgtB subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ogfGBxxhhoalgtB .L_small_initial_partial_block_ogfGBxxhhoalgtB: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ogfGBxxhhoalgtB: orq %r8,%r8 je .L_after_reduction_ogfGBxxhhoalgtB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ogfGBxxhhoalgtB: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_9_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_wFkatvuEtupbkGb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_wFkatvuEtupbkGb .L_16_blocks_overflow_wFkatvuEtupbkGb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_wFkatvuEtupbkGb: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wkiizpjcpbzfFyj subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wkiizpjcpbzfFyj .L_small_initial_partial_block_wkiizpjcpbzfFyj: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wkiizpjcpbzfFyj: orq %r8,%r8 je .L_after_reduction_wkiizpjcpbzfFyj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wkiizpjcpbzfFyj: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_10_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_ircelvtBaeuiwvC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_ircelvtBaeuiwvC .L_16_blocks_overflow_ircelvtBaeuiwvC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_ircelvtBaeuiwvC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pDtuuFvFlvjvrCz subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pDtuuFvFlvjvrCz .L_small_initial_partial_block_pDtuuFvFlvjvrCz: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pDtuuFvFlvjvrCz: orq %r8,%r8 je .L_after_reduction_pDtuuFvFlvjvrCz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pDtuuFvFlvjvrCz: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_11_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_GozdsctAidzEqxd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_GozdsctAidzEqxd .L_16_blocks_overflow_GozdsctAidzEqxd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_GozdsctAidzEqxd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yrocgFvryFBiech subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yrocgFvryFBiech .L_small_initial_partial_block_yrocgFvryFBiech: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yrocgFvryFBiech: orq %r8,%r8 je .L_after_reduction_yrocgFvryFBiech vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yrocgFvryFBiech: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_12_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_kgvcyifhjuAglsm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_kgvcyifhjuAglsm .L_16_blocks_overflow_kgvcyifhjuAglsm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_kgvcyifhjuAglsm: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_oclBtelgDoBblti subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_oclBtelgDoBblti .L_small_initial_partial_block_oclBtelgDoBblti: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_oclBtelgDoBblti: orq %r8,%r8 je .L_after_reduction_oclBtelgDoBblti vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_oclBtelgDoBblti: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_13_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_GgsgulfrbGGFGGc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_GgsgulfrbGGFGGc .L_16_blocks_overflow_GgsgulfrbGGFGGc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_GgsgulfrbGGFGGc: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bvEBvhpbxzwvDrk subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bvEBvhpbxzwvDrk .L_small_initial_partial_block_bvEBvhpbxzwvDrk: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bvEBvhpbxzwvDrk: orq %r8,%r8 je .L_after_reduction_bvEBvhpbxzwvDrk vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bvEBvhpbxzwvDrk: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_14_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_vejDBlGzdxbDGDE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_vejDBlGzdxbDGDE .L_16_blocks_overflow_vejDBlGzdxbDGDE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_vejDBlGzdxbDGDE: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lvCGeChuoEvfnul subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lvCGeChuoEvfnul .L_small_initial_partial_block_lvCGeChuoEvfnul: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lvCGeChuoEvfnul: orq %r8,%r8 je .L_after_reduction_lvCGeChuoEvfnul vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lvCGeChuoEvfnul: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_15_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_ytioEdspdkiwstn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_ytioEdspdkiwstn .L_16_blocks_overflow_ytioEdspdkiwstn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_ytioEdspdkiwstn: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_fxpoudCxsjlwBmb subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fxpoudCxsjlwBmb .L_small_initial_partial_block_fxpoudCxsjlwBmb: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fxpoudCxsjlwBmb: orq %r8,%r8 je .L_after_reduction_fxpoudCxsjlwBmb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_fxpoudCxsjlwBmb: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_16_muvbsvrgtnhDwuC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_ijwokgwDeCteCll vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_ijwokgwDeCteCll .L_16_blocks_overflow_ijwokgwDeCteCll: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_ijwokgwDeCteCll: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_rCCuFewyfDAEddb: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rCCuFewyfDAEddb: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rCCuFewyfDAEddb: jmp .L_last_blocks_done_muvbsvrgtnhDwuC .L_last_num_blocks_is_0_muvbsvrgtnhDwuC: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_muvbsvrgtnhDwuC: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_eawnuBpGmxcBoDC .L_encrypt_16_blocks_eawnuBpGmxcBoDC: cmpb $240,%r15b jae .L_16_blocks_overflow_nAxplcgfimbFyBh vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_nAxplcgfimbFyBh .L_16_blocks_overflow_nAxplcgfimbFyBh: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_nAxplcgfimbFyBh: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%r11 movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_gFFyhgntvwxgCvF cmpl $8,%r10d je .L_last_num_blocks_is_8_gFFyhgntvwxgCvF jb .L_last_num_blocks_is_7_1_gFFyhgntvwxgCvF cmpl $12,%r10d je .L_last_num_blocks_is_12_gFFyhgntvwxgCvF jb .L_last_num_blocks_is_11_9_gFFyhgntvwxgCvF cmpl $15,%r10d je .L_last_num_blocks_is_15_gFFyhgntvwxgCvF ja .L_last_num_blocks_is_16_gFFyhgntvwxgCvF cmpl $14,%r10d je .L_last_num_blocks_is_14_gFFyhgntvwxgCvF jmp .L_last_num_blocks_is_13_gFFyhgntvwxgCvF .L_last_num_blocks_is_11_9_gFFyhgntvwxgCvF: cmpl $10,%r10d je .L_last_num_blocks_is_10_gFFyhgntvwxgCvF ja .L_last_num_blocks_is_11_gFFyhgntvwxgCvF jmp .L_last_num_blocks_is_9_gFFyhgntvwxgCvF .L_last_num_blocks_is_7_1_gFFyhgntvwxgCvF: cmpl $4,%r10d je .L_last_num_blocks_is_4_gFFyhgntvwxgCvF jb .L_last_num_blocks_is_3_1_gFFyhgntvwxgCvF cmpl $6,%r10d ja .L_last_num_blocks_is_7_gFFyhgntvwxgCvF je .L_last_num_blocks_is_6_gFFyhgntvwxgCvF jmp .L_last_num_blocks_is_5_gFFyhgntvwxgCvF .L_last_num_blocks_is_3_1_gFFyhgntvwxgCvF: cmpl $2,%r10d ja .L_last_num_blocks_is_3_gFFyhgntvwxgCvF je .L_last_num_blocks_is_2_gFFyhgntvwxgCvF .L_last_num_blocks_is_1_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_edqyFiqozsDenuz vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_edqyFiqozsDenuz .L_16_blocks_overflow_edqyFiqozsDenuz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_edqyFiqozsDenuz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %xmm31,%xmm0,%xmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_hxBDgFwdGwbthCy subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hxBDgFwdGwbthCy .L_small_initial_partial_block_hxBDgFwdGwbthCy: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_hxBDgFwdGwbthCy .L_small_initial_compute_done_hxBDgFwdGwbthCy: .L_after_reduction_hxBDgFwdGwbthCy: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_2_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_uyuBmtkqzsrxAjG vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_uyuBmtkqzsrxAjG .L_16_blocks_overflow_uyuBmtkqzsrxAjG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_uyuBmtkqzsrxAjG: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %ymm31,%ymm0,%ymm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DnwnjmmqBtjmtxy subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DnwnjmmqBtjmtxy .L_small_initial_partial_block_DnwnjmmqBtjmtxy: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DnwnjmmqBtjmtxy: orq %r8,%r8 je .L_after_reduction_DnwnjmmqBtjmtxy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DnwnjmmqBtjmtxy: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_3_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_mayxFbwAyisdwiE vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_mayxFbwAyisdwiE .L_16_blocks_overflow_mayxFbwAyisdwiE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_mayxFbwAyisdwiE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_sFnrdciEorxGldB subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sFnrdciEorxGldB .L_small_initial_partial_block_sFnrdciEorxGldB: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_sFnrdciEorxGldB: orq %r8,%r8 je .L_after_reduction_sFnrdciEorxGldB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_sFnrdciEorxGldB: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_4_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_cahBhluzDpDniBC vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_cahBhluzDpDniBC .L_16_blocks_overflow_cahBhluzDpDniBC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_cahBhluzDpDniBC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_flBuFDkGEouCjry subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_flBuFDkGEouCjry .L_small_initial_partial_block_flBuFDkGEouCjry: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_flBuFDkGEouCjry: orq %r8,%r8 je .L_after_reduction_flBuFDkGEouCjry vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_flBuFDkGEouCjry: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_5_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_dogBbFBCkktqmfE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_dogBbFBCkktqmfE .L_16_blocks_overflow_dogBbFBCkktqmfE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_dogBbFBCkktqmfE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BcpothbedDEfeoC subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BcpothbedDEfeoC .L_small_initial_partial_block_BcpothbedDEfeoC: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BcpothbedDEfeoC: orq %r8,%r8 je .L_after_reduction_BcpothbedDEfeoC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BcpothbedDEfeoC: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_6_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_oGartozfntEBpal vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_oGartozfntEBpal .L_16_blocks_overflow_oGartozfntEBpal: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_oGartozfntEBpal: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rwznrbbsqxwaCko subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rwznrbbsqxwaCko .L_small_initial_partial_block_rwznrbbsqxwaCko: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rwznrbbsqxwaCko: orq %r8,%r8 je .L_after_reduction_rwznrbbsqxwaCko vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rwznrbbsqxwaCko: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_7_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_EBiardhujGzcrlk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_EBiardhujGzcrlk .L_16_blocks_overflow_EBiardhujGzcrlk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_EBiardhujGzcrlk: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tnvletidFAfbEDF subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tnvletidFAfbEDF .L_small_initial_partial_block_tnvletidFAfbEDF: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tnvletidFAfbEDF: orq %r8,%r8 je .L_after_reduction_tnvletidFAfbEDF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tnvletidFAfbEDF: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_8_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_iumqnFogzhcrGGw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_iumqnFogzhcrGGw .L_16_blocks_overflow_iumqnFogzhcrGGw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_iumqnFogzhcrGGw: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qEzaCAhsCAiFoFG subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qEzaCAhsCAiFoFG .L_small_initial_partial_block_qEzaCAhsCAiFoFG: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qEzaCAhsCAiFoFG: orq %r8,%r8 je .L_after_reduction_qEzaCAhsCAiFoFG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qEzaCAhsCAiFoFG: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_9_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_uerldGeDtdqniAd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_uerldGeDtdqniAd .L_16_blocks_overflow_uerldGeDtdqniAd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_uerldGeDtdqniAd: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_aaFGCaaBiGmkrxE subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_aaFGCaaBiGmkrxE .L_small_initial_partial_block_aaFGCaaBiGmkrxE: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_aaFGCaaBiGmkrxE: orq %r8,%r8 je .L_after_reduction_aaFGCaaBiGmkrxE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_aaFGCaaBiGmkrxE: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_10_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_Aozpqcpomafvkzu vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_Aozpqcpomafvkzu .L_16_blocks_overflow_Aozpqcpomafvkzu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_Aozpqcpomafvkzu: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dahhcFmAhdipFgB subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dahhcFmAhdipFgB .L_small_initial_partial_block_dahhcFmAhdipFgB: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dahhcFmAhdipFgB: orq %r8,%r8 je .L_after_reduction_dahhcFmAhdipFgB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dahhcFmAhdipFgB: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_11_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_EgocqAvvFflyEjg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_EgocqAvvFflyEjg .L_16_blocks_overflow_EgocqAvvFflyEjg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_EgocqAvvFflyEjg: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BgCerdsyeobnbbs subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BgCerdsyeobnbbs .L_small_initial_partial_block_BgCerdsyeobnbbs: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BgCerdsyeobnbbs: orq %r8,%r8 je .L_after_reduction_BgCerdsyeobnbbs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BgCerdsyeobnbbs: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_12_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_cydmoiBEzigfGjF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_cydmoiBEzigfGjF .L_16_blocks_overflow_cydmoiBEzigfGjF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_cydmoiBEzigfGjF: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_cDdypaAhkmGvFrB subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_cDdypaAhkmGvFrB .L_small_initial_partial_block_cDdypaAhkmGvFrB: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cDdypaAhkmGvFrB: orq %r8,%r8 je .L_after_reduction_cDdypaAhkmGvFrB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_cDdypaAhkmGvFrB: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_13_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_cGnAhGixtCoyetC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_cGnAhGixtCoyetC .L_16_blocks_overflow_cGnAhGixtCoyetC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_cGnAhGixtCoyetC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FeGcnwBvApiyeqj subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FeGcnwBvApiyeqj .L_small_initial_partial_block_FeGcnwBvApiyeqj: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FeGcnwBvApiyeqj: orq %r8,%r8 je .L_after_reduction_FeGcnwBvApiyeqj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FeGcnwBvApiyeqj: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_14_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_iftBfEFqGGBvyjm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_iftBfEFqGGBvyjm .L_16_blocks_overflow_iftBfEFqGGBvyjm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_iftBfEFqGGBvyjm: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_oihhuqgdwBFgleb subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_oihhuqgdwBFgleb .L_small_initial_partial_block_oihhuqgdwBFgleb: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_oihhuqgdwBFgleb: orq %r8,%r8 je .L_after_reduction_oihhuqgdwBFgleb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_oihhuqgdwBFgleb: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_15_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_fvupeAvimjnmGoe vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_fvupeAvimjnmGoe .L_16_blocks_overflow_fvupeAvimjnmGoe: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_fvupeAvimjnmGoe: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rrptnxnCqernCsp subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rrptnxnCqernCsp .L_small_initial_partial_block_rrptnxnCqernCsp: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rrptnxnCqernCsp: orq %r8,%r8 je .L_after_reduction_rrptnxnCqernCsp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rrptnxnCqernCsp: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_16_gFFyhgntvwxgCvF: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_wGkryszirehgiqf vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_wGkryszirehgiqf .L_16_blocks_overflow_wGkryszirehgiqf: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_wGkryszirehgiqf: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_ylCxcFDbnxrlyjy: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ylCxcFDbnxrlyjy: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ylCxcFDbnxrlyjy: jmp .L_last_blocks_done_gFFyhgntvwxgCvF .L_last_num_blocks_is_0_gFFyhgntvwxgCvF: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_gFFyhgntvwxgCvF: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_eawnuBpGmxcBoDC .L_message_below_32_blocks_eawnuBpGmxcBoDC: subq $256,%r8 addq $256,%r11 movl %r8d,%r10d testq %r14,%r14 jnz .L_skip_hkeys_precomputation_cyGhsoclCDuqust vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_cyGhsoclCDuqust: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_gmjFjaoGnEhAquD cmpl $8,%r10d je .L_last_num_blocks_is_8_gmjFjaoGnEhAquD jb .L_last_num_blocks_is_7_1_gmjFjaoGnEhAquD cmpl $12,%r10d je .L_last_num_blocks_is_12_gmjFjaoGnEhAquD jb .L_last_num_blocks_is_11_9_gmjFjaoGnEhAquD cmpl $15,%r10d je .L_last_num_blocks_is_15_gmjFjaoGnEhAquD ja .L_last_num_blocks_is_16_gmjFjaoGnEhAquD cmpl $14,%r10d je .L_last_num_blocks_is_14_gmjFjaoGnEhAquD jmp .L_last_num_blocks_is_13_gmjFjaoGnEhAquD .L_last_num_blocks_is_11_9_gmjFjaoGnEhAquD: cmpl $10,%r10d je .L_last_num_blocks_is_10_gmjFjaoGnEhAquD ja .L_last_num_blocks_is_11_gmjFjaoGnEhAquD jmp .L_last_num_blocks_is_9_gmjFjaoGnEhAquD .L_last_num_blocks_is_7_1_gmjFjaoGnEhAquD: cmpl $4,%r10d je .L_last_num_blocks_is_4_gmjFjaoGnEhAquD jb .L_last_num_blocks_is_3_1_gmjFjaoGnEhAquD cmpl $6,%r10d ja .L_last_num_blocks_is_7_gmjFjaoGnEhAquD je .L_last_num_blocks_is_6_gmjFjaoGnEhAquD jmp .L_last_num_blocks_is_5_gmjFjaoGnEhAquD .L_last_num_blocks_is_3_1_gmjFjaoGnEhAquD: cmpl $2,%r10d ja .L_last_num_blocks_is_3_gmjFjaoGnEhAquD je .L_last_num_blocks_is_2_gmjFjaoGnEhAquD .L_last_num_blocks_is_1_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_lmprlxqohayAaff vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_lmprlxqohayAaff .L_16_blocks_overflow_lmprlxqohayAaff: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_lmprlxqohayAaff: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_ycnbantiDaoGCva subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ycnbantiDaoGCva .L_small_initial_partial_block_ycnbantiDaoGCva: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_ycnbantiDaoGCva .L_small_initial_compute_done_ycnbantiDaoGCva: .L_after_reduction_ycnbantiDaoGCva: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_2_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_FmnmcFgtBcispji vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_FmnmcFgtBcispji .L_16_blocks_overflow_FmnmcFgtBcispji: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_FmnmcFgtBcispji: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AtjvciobwAfsBgo subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AtjvciobwAfsBgo .L_small_initial_partial_block_AtjvciobwAfsBgo: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AtjvciobwAfsBgo: orq %r8,%r8 je .L_after_reduction_AtjvciobwAfsBgo vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AtjvciobwAfsBgo: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_3_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_tgAkxvFFocitubl vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_tgAkxvFFocitubl .L_16_blocks_overflow_tgAkxvFFocitubl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_tgAkxvFFocitubl: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_siwDojaimuxlcux subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_siwDojaimuxlcux .L_small_initial_partial_block_siwDojaimuxlcux: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_siwDojaimuxlcux: orq %r8,%r8 je .L_after_reduction_siwDojaimuxlcux vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_siwDojaimuxlcux: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_4_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_AaBBmAybFatffyg vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_AaBBmAybFatffyg .L_16_blocks_overflow_AaBBmAybFatffyg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_AaBBmAybFatffyg: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xhaBeCiyfAeqaBf subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xhaBeCiyfAeqaBf .L_small_initial_partial_block_xhaBeCiyfAeqaBf: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xhaBeCiyfAeqaBf: orq %r8,%r8 je .L_after_reduction_xhaBeCiyfAeqaBf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xhaBeCiyfAeqaBf: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_5_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_akmmkrkgrAtqDyf vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_akmmkrkgrAtqDyf .L_16_blocks_overflow_akmmkrkgrAtqDyf: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_akmmkrkgrAtqDyf: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xqhfeyAhltlBsyF subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xqhfeyAhltlBsyF .L_small_initial_partial_block_xqhfeyAhltlBsyF: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xqhfeyAhltlBsyF: orq %r8,%r8 je .L_after_reduction_xqhfeyAhltlBsyF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xqhfeyAhltlBsyF: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_6_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_vuckCplCqacsnkw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_vuckCplCqacsnkw .L_16_blocks_overflow_vuckCplCqacsnkw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_vuckCplCqacsnkw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ruAuuqlioaFhuzd subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ruAuuqlioaFhuzd .L_small_initial_partial_block_ruAuuqlioaFhuzd: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ruAuuqlioaFhuzd: orq %r8,%r8 je .L_after_reduction_ruAuuqlioaFhuzd vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ruAuuqlioaFhuzd: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_7_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_vxwemaBiapgApmr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_vxwemaBiapgApmr .L_16_blocks_overflow_vxwemaBiapgApmr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_vxwemaBiapgApmr: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wdpAcmnbkmzzufl subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wdpAcmnbkmzzufl .L_small_initial_partial_block_wdpAcmnbkmzzufl: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wdpAcmnbkmzzufl: orq %r8,%r8 je .L_after_reduction_wdpAcmnbkmzzufl vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wdpAcmnbkmzzufl: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_8_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_kuexuhgEceqggje vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_kuexuhgEceqggje .L_16_blocks_overflow_kuexuhgEceqggje: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_kuexuhgEceqggje: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tvzmBcComjdtAzn subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tvzmBcComjdtAzn .L_small_initial_partial_block_tvzmBcComjdtAzn: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tvzmBcComjdtAzn: orq %r8,%r8 je .L_after_reduction_tvzmBcComjdtAzn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tvzmBcComjdtAzn: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_9_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_npAFwfijqmcuehu vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_npAFwfijqmcuehu .L_16_blocks_overflow_npAFwfijqmcuehu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_npAFwfijqmcuehu: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gxddwsBBhjrmGda subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gxddwsBBhjrmGda .L_small_initial_partial_block_gxddwsBBhjrmGda: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gxddwsBBhjrmGda: orq %r8,%r8 je .L_after_reduction_gxddwsBBhjrmGda vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_gxddwsBBhjrmGda: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_10_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_hvAwbmhkGhGravm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_hvAwbmhkGhGravm .L_16_blocks_overflow_hvAwbmhkGhGravm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_hvAwbmhkGhGravm: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bjwDcmjtGlgmwEb subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bjwDcmjtGlgmwEb .L_small_initial_partial_block_bjwDcmjtGlgmwEb: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bjwDcmjtGlgmwEb: orq %r8,%r8 je .L_after_reduction_bjwDcmjtGlgmwEb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bjwDcmjtGlgmwEb: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_11_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_BhqdCBAEnwmDwhl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_BhqdCBAEnwmDwhl .L_16_blocks_overflow_BhqdCBAEnwmDwhl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_BhqdCBAEnwmDwhl: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ipuaxhAChCElalm subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ipuaxhAChCElalm .L_small_initial_partial_block_ipuaxhAChCElalm: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ipuaxhAChCElalm: orq %r8,%r8 je .L_after_reduction_ipuaxhAChCElalm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ipuaxhAChCElalm: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_12_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_ckykbBijvpyDxDm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_ckykbBijvpyDxDm .L_16_blocks_overflow_ckykbBijvpyDxDm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_ckykbBijvpyDxDm: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mkzFsudzBDhjcvh subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mkzFsudzBDhjcvh .L_small_initial_partial_block_mkzFsudzBDhjcvh: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mkzFsudzBDhjcvh: orq %r8,%r8 je .L_after_reduction_mkzFsudzBDhjcvh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_mkzFsudzBDhjcvh: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_13_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_DjGBFpAkClvxnAD vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_DjGBFpAkClvxnAD .L_16_blocks_overflow_DjGBFpAkClvxnAD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_DjGBFpAkClvxnAD: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lygCkeDknmvaExs subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lygCkeDknmvaExs .L_small_initial_partial_block_lygCkeDknmvaExs: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lygCkeDknmvaExs: orq %r8,%r8 je .L_after_reduction_lygCkeDknmvaExs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lygCkeDknmvaExs: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_14_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_mxbEwfimcnwvdax vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_mxbEwfimcnwvdax .L_16_blocks_overflow_mxbEwfimcnwvdax: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_mxbEwfimcnwvdax: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bdGmCjdgnqqlltq subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bdGmCjdgnqqlltq .L_small_initial_partial_block_bdGmCjdgnqqlltq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bdGmCjdgnqqlltq: orq %r8,%r8 je .L_after_reduction_bdGmCjdgnqqlltq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bdGmCjdgnqqlltq: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_15_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_zgjqhDpFicvrFBk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_zgjqhDpFicvrFBk .L_16_blocks_overflow_zgjqhDpFicvrFBk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_zgjqhDpFicvrFBk: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DiAChhgwveonFpA subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DiAChhgwveonFpA .L_small_initial_partial_block_DiAChhgwveonFpA: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DiAChhgwveonFpA: orq %r8,%r8 je .L_after_reduction_DiAChhgwveonFpA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DiAChhgwveonFpA: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_16_gmjFjaoGnEhAquD: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_yyltxtltrzdqBtp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_yyltxtltrzdqBtp .L_16_blocks_overflow_yyltxtltrzdqBtp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_yyltxtltrzdqBtp: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_GsrEfbqkvAdwclh: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GsrEfbqkvAdwclh: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_GsrEfbqkvAdwclh: jmp .L_last_blocks_done_gmjFjaoGnEhAquD .L_last_num_blocks_is_0_gmjFjaoGnEhAquD: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_gmjFjaoGnEhAquD: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_eawnuBpGmxcBoDC .L_message_below_equal_16_blocks_eawnuBpGmxcBoDC: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_hbqugjruGfgczBp jl .L_small_initial_num_blocks_is_7_1_hbqugjruGfgczBp cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_hbqugjruGfgczBp jl .L_small_initial_num_blocks_is_11_9_hbqugjruGfgczBp cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_hbqugjruGfgczBp cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_hbqugjruGfgczBp cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_hbqugjruGfgczBp jmp .L_small_initial_num_blocks_is_13_hbqugjruGfgczBp .L_small_initial_num_blocks_is_11_9_hbqugjruGfgczBp: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_hbqugjruGfgczBp cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_hbqugjruGfgczBp jmp .L_small_initial_num_blocks_is_9_hbqugjruGfgczBp .L_small_initial_num_blocks_is_7_1_hbqugjruGfgczBp: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_hbqugjruGfgczBp jl .L_small_initial_num_blocks_is_3_1_hbqugjruGfgczBp cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_hbqugjruGfgczBp cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_hbqugjruGfgczBp jmp .L_small_initial_num_blocks_is_5_hbqugjruGfgczBp .L_small_initial_num_blocks_is_3_1_hbqugjruGfgczBp: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_hbqugjruGfgczBp cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_hbqugjruGfgczBp .L_small_initial_num_blocks_is_1_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONE(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %xmm15,%xmm0,%xmm0 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 cmpq $16,%r8 jl .L_small_initial_partial_block_iFmDdgrbxxlznyd subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_iFmDdgrbxxlznyd .L_small_initial_partial_block_iFmDdgrbxxlznyd: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_iFmDdgrbxxlznyd .L_small_initial_compute_done_iFmDdgrbxxlznyd: .L_after_reduction_iFmDdgrbxxlznyd: jmp .L_small_initial_blocks_encrypted_hbqugjruGfgczBp .L_small_initial_num_blocks_is_2_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %ymm15,%ymm0,%ymm0 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EsCbfxikCrkamtE subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EsCbfxikCrkamtE .L_small_initial_partial_block_EsCbfxikCrkamtE: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EsCbfxikCrkamtE: orq %r8,%r8 je .L_after_reduction_EsCbfxikCrkamtE vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_EsCbfxikCrkamtE: jmp .L_small_initial_blocks_encrypted_hbqugjruGfgczBp .L_small_initial_num_blocks_is_3_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tBEoFGBxxBysmml subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tBEoFGBxxBysmml .L_small_initial_partial_block_tBEoFGBxxBysmml: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tBEoFGBxxBysmml: orq %r8,%r8 je .L_after_reduction_tBEoFGBxxBysmml vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_tBEoFGBxxBysmml: jmp .L_small_initial_blocks_encrypted_hbqugjruGfgczBp .L_small_initial_num_blocks_is_4_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dDrxftiGhnzzsCu subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dDrxftiGhnzzsCu .L_small_initial_partial_block_dDrxftiGhnzzsCu: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dDrxftiGhnzzsCu: orq %r8,%r8 je .L_after_reduction_dDrxftiGhnzzsCu vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_dDrxftiGhnzzsCu: jmp .L_small_initial_blocks_encrypted_hbqugjruGfgczBp .L_small_initial_num_blocks_is_5_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %xmm15,%xmm3,%xmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %xmm29,%xmm3,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tgluGdkfFDhsixe subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tgluGdkfFDhsixe .L_small_initial_partial_block_tgluGdkfFDhsixe: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tgluGdkfFDhsixe: orq %r8,%r8 je .L_after_reduction_tgluGdkfFDhsixe vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_tgluGdkfFDhsixe: jmp .L_small_initial_blocks_encrypted_hbqugjruGfgczBp .L_small_initial_num_blocks_is_6_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %ymm15,%ymm3,%ymm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %ymm29,%ymm3,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_cDptiniAjeCvsaA subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_cDptiniAjeCvsaA .L_small_initial_partial_block_cDptiniAjeCvsaA: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cDptiniAjeCvsaA: orq %r8,%r8 je .L_after_reduction_cDptiniAjeCvsaA vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_cDptiniAjeCvsaA: jmp .L_small_initial_blocks_encrypted_hbqugjruGfgczBp .L_small_initial_num_blocks_is_7_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_CkuomECEjoqBFyr subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CkuomECEjoqBFyr .L_small_initial_partial_block_CkuomECEjoqBFyr: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CkuomECEjoqBFyr: orq %r8,%r8 je .L_after_reduction_CkuomECEjoqBFyr vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_CkuomECEjoqBFyr: jmp .L_small_initial_blocks_encrypted_hbqugjruGfgczBp .L_small_initial_num_blocks_is_8_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jetFsEuskrjwged subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jetFsEuskrjwged .L_small_initial_partial_block_jetFsEuskrjwged: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jetFsEuskrjwged: orq %r8,%r8 je .L_after_reduction_jetFsEuskrjwged vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_jetFsEuskrjwged: jmp .L_small_initial_blocks_encrypted_hbqugjruGfgczBp .L_small_initial_num_blocks_is_9_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %xmm15,%xmm4,%xmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %xmm29,%xmm4,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_djtvlDCcmtClCqd subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_djtvlDCcmtClCqd .L_small_initial_partial_block_djtvlDCcmtClCqd: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_djtvlDCcmtClCqd: orq %r8,%r8 je .L_after_reduction_djtvlDCcmtClCqd vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_djtvlDCcmtClCqd: jmp .L_small_initial_blocks_encrypted_hbqugjruGfgczBp .L_small_initial_num_blocks_is_10_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %ymm15,%ymm4,%ymm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %ymm29,%ymm4,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_aptugwefEgbpisD subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_aptugwefEgbpisD .L_small_initial_partial_block_aptugwefEgbpisD: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_aptugwefEgbpisD: orq %r8,%r8 je .L_after_reduction_aptugwefEgbpisD vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_aptugwefEgbpisD: jmp .L_small_initial_blocks_encrypted_hbqugjruGfgczBp .L_small_initial_num_blocks_is_11_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BboqcvvuFoyragm subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BboqcvvuFoyragm .L_small_initial_partial_block_BboqcvvuFoyragm: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BboqcvvuFoyragm: orq %r8,%r8 je .L_after_reduction_BboqcvvuFoyragm vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_BboqcvvuFoyragm: jmp .L_small_initial_blocks_encrypted_hbqugjruGfgczBp .L_small_initial_num_blocks_is_12_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yzpAqvxjrjtpbge subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yzpAqvxjrjtpbge .L_small_initial_partial_block_yzpAqvxjrjtpbge: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yzpAqvxjrjtpbge: orq %r8,%r8 je .L_after_reduction_yzpAqvxjrjtpbge vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_yzpAqvxjrjtpbge: jmp .L_small_initial_blocks_encrypted_hbqugjruGfgczBp .L_small_initial_num_blocks_is_13_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %xmm15,%xmm5,%xmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %xmm29,%xmm5,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jjkyzlqDAbpoEdw subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jjkyzlqDAbpoEdw .L_small_initial_partial_block_jjkyzlqDAbpoEdw: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jjkyzlqDAbpoEdw: orq %r8,%r8 je .L_after_reduction_jjkyzlqDAbpoEdw vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_jjkyzlqDAbpoEdw: jmp .L_small_initial_blocks_encrypted_hbqugjruGfgczBp .L_small_initial_num_blocks_is_14_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %ymm15,%ymm5,%ymm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %ymm29,%ymm5,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GlbsvkxecbisEEg subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GlbsvkxecbisEEg .L_small_initial_partial_block_GlbsvkxecbisEEg: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GlbsvkxecbisEEg: orq %r8,%r8 je .L_after_reduction_GlbsvkxecbisEEg vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_GlbsvkxecbisEEg: jmp .L_small_initial_blocks_encrypted_hbqugjruGfgczBp .L_small_initial_num_blocks_is_15_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BFutaboihmcgqcA subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BFutaboihmcgqcA .L_small_initial_partial_block_BFutaboihmcgqcA: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BFutaboihmcgqcA: orq %r8,%r8 je .L_after_reduction_BFutaboihmcgqcA vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_BFutaboihmcgqcA: jmp .L_small_initial_blocks_encrypted_hbqugjruGfgczBp .L_small_initial_num_blocks_is_16_hbqugjruGfgczBp: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_AxxoDBglqjscnzw: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AxxoDBglqjscnzw: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_AxxoDBglqjscnzw: .L_small_initial_blocks_encrypted_hbqugjruGfgczBp: .L_ghash_done_eawnuBpGmxcBoDC: vmovdqu64 %xmm2,0(%rsi) vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_done_eawnuBpGmxcBoDC: jmp .Lexit_gcm_encrypt .Lexit_gcm_encrypt: cmpq $256,%r8 jbe .Lskip_hkeys_cleanup_FwyhaGceDljchpo vpxor %xmm0,%xmm0,%xmm0 vmovdqa64 %zmm0,0(%rsp) vmovdqa64 %zmm0,64(%rsp) vmovdqa64 %zmm0,128(%rsp) vmovdqa64 %zmm0,192(%rsp) vmovdqa64 %zmm0,256(%rsp) vmovdqa64 %zmm0,320(%rsp) vmovdqa64 %zmm0,384(%rsp) vmovdqa64 %zmm0,448(%rsp) vmovdqa64 %zmm0,512(%rsp) vmovdqa64 %zmm0,576(%rsp) vmovdqa64 %zmm0,640(%rsp) vmovdqa64 %zmm0,704(%rsp) .Lskip_hkeys_cleanup_FwyhaGceDljchpo: vzeroupper leaq (%rbp),%rsp .cfi_def_cfa_register %rsp popq %r15 .cfi_adjust_cfa_offset -8 .cfi_restore %r15 popq %r14 .cfi_adjust_cfa_offset -8 .cfi_restore %r14 popq %r13 .cfi_adjust_cfa_offset -8 .cfi_restore %r13 popq %r12 .cfi_adjust_cfa_offset -8 .cfi_restore %r12 popq %rbp .cfi_adjust_cfa_offset -8 .cfi_restore %rbp popq %rbx .cfi_adjust_cfa_offset -8 .cfi_restore %rbx .byte 0xf3,0xc3 .Lencrypt_seh_end: .cfi_endproc .size ossl_aes_gcm_encrypt_avx512, .-ossl_aes_gcm_encrypt_avx512 .globl ossl_aes_gcm_decrypt_avx512 .type ossl_aes_gcm_decrypt_avx512,@function .align 32 ossl_aes_gcm_decrypt_avx512: .cfi_startproc .Ldecrypt_seh_begin: .byte 243,15,30,250 pushq %rbx .cfi_adjust_cfa_offset 8 .cfi_offset %rbx,-16 .Ldecrypt_seh_push_rbx: pushq %rbp .cfi_adjust_cfa_offset 8 .cfi_offset %rbp,-24 .Ldecrypt_seh_push_rbp: pushq %r12 .cfi_adjust_cfa_offset 8 .cfi_offset %r12,-32 .Ldecrypt_seh_push_r12: pushq %r13 .cfi_adjust_cfa_offset 8 .cfi_offset %r13,-40 .Ldecrypt_seh_push_r13: pushq %r14 .cfi_adjust_cfa_offset 8 .cfi_offset %r14,-48 .Ldecrypt_seh_push_r14: pushq %r15 .cfi_adjust_cfa_offset 8 .cfi_offset %r15,-56 .Ldecrypt_seh_push_r15: leaq 0(%rsp),%rbp .cfi_def_cfa_register %rbp .Ldecrypt_seh_setfp: .Ldecrypt_seh_prolog_end: subq $1588,%rsp andq $(-64),%rsp movl 240(%rdi),%eax cmpl $9,%eax je .Laes_gcm_decrypt_128_avx512 cmpl $11,%eax je .Laes_gcm_decrypt_192_avx512 cmpl $13,%eax je .Laes_gcm_decrypt_256_avx512 xorl %eax,%eax jmp .Lexit_gcm_decrypt .align 32 .Laes_gcm_decrypt_128_avx512: orq %r8,%r8 je .L_enc_dec_done_brADimEeCnCcDmv xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 movq (%rdx),%r11 orq %r11,%r11 je .L_partial_block_done_bsCeAyqpAAwsgvv movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 vmovdqu64 336(%rsi),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %r11,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vmovdqa64 %xmm0,%xmm6 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%r11,1),%r13 subq $16,%r13 jge .L_no_extra_mask_bsCeAyqpAAwsgvv subq %r13,%r12 .L_no_extra_mask_bsCeAyqpAAwsgvv: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpand %xmm0,%xmm6,%xmm6 vpshufb SHUF_MASK(%rip),%xmm6,%xmm6 vpshufb %xmm5,%xmm6,%xmm6 vpxorq %xmm6,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_bsCeAyqpAAwsgvv vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 vpsrldq $4,%xmm10,%xmm10 vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movq $0,(%rdx) movq %r11,%r12 movq $16,%r11 subq %r12,%r11 jmp .L_enc_dec_done_bsCeAyqpAAwsgvv .L_partial_incomplete_bsCeAyqpAAwsgvv: addq %r8,(%rdx) movq %r8,%r11 .L_enc_dec_done_bsCeAyqpAAwsgvv: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%r11,2),%k1 vmovdqu64 %xmm14,64(%rsi) movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_bsCeAyqpAAwsgvv: vmovdqu64 0(%rsi),%xmm2 subq %r11,%r8 je .L_enc_dec_done_brADimEeCnCcDmv cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_brADimEeCnCcDmv vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_eghvmbEDtcnDnAu vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_eghvmbEDtcnDnAu .L_next_16_overflow_eghvmbEDtcnDnAu: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_eghvmbEDtcnDnAu: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%r11,1),%zmm0 vmovdqu8 64(%rcx,%r11,1),%zmm3 vmovdqu8 128(%rcx,%r11,1),%zmm4 vmovdqu8 192(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%r11,1) vmovdqu8 %zmm10,64(%r10,%r11,1) vmovdqu8 %zmm11,128(%r10,%r11,1) vmovdqu8 %zmm12,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_plwezswvdFDdDBp vmovdqu64 288(%rsi),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 224(%rsi),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 160(%rsi),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 96(%rsi),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_plwezswvdFDdDBp: cmpq $512,%r8 jb .L_message_below_32_blocks_brADimEeCnCcDmv cmpb $240,%r15b jae .L_next_16_overflow_yieysttglezqCBf vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_yieysttglezqCBf .L_next_16_overflow_yieysttglezqCBf: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_yieysttglezqCBf: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%r11,1),%zmm0 vmovdqu8 320(%rcx,%r11,1),%zmm3 vmovdqu8 384(%rcx,%r11,1),%zmm4 vmovdqu8 448(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%r11,1) vmovdqu8 %zmm10,320(%r10,%r11,1) vmovdqu8 %zmm11,384(%r10,%r11,1) vmovdqu8 %zmm12,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_cqhgcscctsdbGkB vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_cqhgcscctsdbGkB: movq $1,%r14 addq $512,%r11 subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_brADimEeCnCcDmv .L_encrypt_big_nblocks_brADimEeCnCcDmv: cmpb $240,%r15b jae .L_16_blocks_overflow_jeuDwtvAfvGmCgt vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_jeuDwtvAfvGmCgt .L_16_blocks_overflow_jeuDwtvAfvGmCgt: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_jeuDwtvAfvGmCgt: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_hGznvbxlbulnqGf vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_hGznvbxlbulnqGf .L_16_blocks_overflow_hGznvbxlbulnqGf: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_hGznvbxlbulnqGf: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_hikcfykasilniFs vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_hikcfykasilniFs .L_16_blocks_overflow_hikcfykasilniFs: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_hikcfykasilniFs: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%r11,1),%zmm17 vmovdqu8 576(%rcx,%r11,1),%zmm19 vmovdqu8 640(%rcx,%r11,1),%zmm20 vmovdqu8 704(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%r11,1) vmovdqu8 %zmm3,576(%r10,%r11,1) vmovdqu8 %zmm4,640(%r10,%r11,1) vmovdqu8 %zmm5,704(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%r11 subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_brADimEeCnCcDmv .L_no_more_big_nblocks_brADimEeCnCcDmv: cmpq $512,%r8 jae .L_encrypt_32_blocks_brADimEeCnCcDmv cmpq $256,%r8 jae .L_encrypt_16_blocks_brADimEeCnCcDmv .L_encrypt_0_blocks_ghash_32_brADimEeCnCcDmv: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_xyDAiCmaAhzpydl cmpl $8,%r10d je .L_last_num_blocks_is_8_xyDAiCmaAhzpydl jb .L_last_num_blocks_is_7_1_xyDAiCmaAhzpydl cmpl $12,%r10d je .L_last_num_blocks_is_12_xyDAiCmaAhzpydl jb .L_last_num_blocks_is_11_9_xyDAiCmaAhzpydl cmpl $15,%r10d je .L_last_num_blocks_is_15_xyDAiCmaAhzpydl ja .L_last_num_blocks_is_16_xyDAiCmaAhzpydl cmpl $14,%r10d je .L_last_num_blocks_is_14_xyDAiCmaAhzpydl jmp .L_last_num_blocks_is_13_xyDAiCmaAhzpydl .L_last_num_blocks_is_11_9_xyDAiCmaAhzpydl: cmpl $10,%r10d je .L_last_num_blocks_is_10_xyDAiCmaAhzpydl ja .L_last_num_blocks_is_11_xyDAiCmaAhzpydl jmp .L_last_num_blocks_is_9_xyDAiCmaAhzpydl .L_last_num_blocks_is_7_1_xyDAiCmaAhzpydl: cmpl $4,%r10d je .L_last_num_blocks_is_4_xyDAiCmaAhzpydl jb .L_last_num_blocks_is_3_1_xyDAiCmaAhzpydl cmpl $6,%r10d ja .L_last_num_blocks_is_7_xyDAiCmaAhzpydl je .L_last_num_blocks_is_6_xyDAiCmaAhzpydl jmp .L_last_num_blocks_is_5_xyDAiCmaAhzpydl .L_last_num_blocks_is_3_1_xyDAiCmaAhzpydl: cmpl $2,%r10d ja .L_last_num_blocks_is_3_xyDAiCmaAhzpydl je .L_last_num_blocks_is_2_xyDAiCmaAhzpydl .L_last_num_blocks_is_1_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_fyDzBrphsGjubgG vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_fyDzBrphsGjubgG .L_16_blocks_overflow_fyDzBrphsGjubgG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_fyDzBrphsGjubgG: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_vtxqFwAgrdnllzF subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vtxqFwAgrdnllzF .L_small_initial_partial_block_vtxqFwAgrdnllzF: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_vtxqFwAgrdnllzF .L_small_initial_compute_done_vtxqFwAgrdnllzF: .L_after_reduction_vtxqFwAgrdnllzF: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_2_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_BugDrclgtxGysBC vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_BugDrclgtxGysBC .L_16_blocks_overflow_BugDrclgtxGysBC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_BugDrclgtxGysBC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dwpAvxknFwdDaDi subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dwpAvxknFwdDaDi .L_small_initial_partial_block_dwpAvxknFwdDaDi: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dwpAvxknFwdDaDi: orq %r8,%r8 je .L_after_reduction_dwpAvxknFwdDaDi vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dwpAvxknFwdDaDi: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_3_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_xznshBaaivCChih vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_xznshBaaivCChih .L_16_blocks_overflow_xznshBaaivCChih: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_xznshBaaivCChih: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ltvboeEneeszwsu subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ltvboeEneeszwsu .L_small_initial_partial_block_ltvboeEneeszwsu: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ltvboeEneeszwsu: orq %r8,%r8 je .L_after_reduction_ltvboeEneeszwsu vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ltvboeEneeszwsu: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_4_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_ofErewxunpEhuze vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_ofErewxunpEhuze .L_16_blocks_overflow_ofErewxunpEhuze: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_ofErewxunpEhuze: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mdwrrkghGswontC subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mdwrrkghGswontC .L_small_initial_partial_block_mdwrrkghGswontC: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mdwrrkghGswontC: orq %r8,%r8 je .L_after_reduction_mdwrrkghGswontC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_mdwrrkghGswontC: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_5_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_vlFDjDvkCmipDjj vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_vlFDjDvkCmipDjj .L_16_blocks_overflow_vlFDjDvkCmipDjj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_vlFDjDvkCmipDjj: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vyyfueCAnBpziso subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vyyfueCAnBpziso .L_small_initial_partial_block_vyyfueCAnBpziso: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vyyfueCAnBpziso: orq %r8,%r8 je .L_after_reduction_vyyfueCAnBpziso vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vyyfueCAnBpziso: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_6_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_swonEtcpnChuzwe vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_swonEtcpnChuzwe .L_16_blocks_overflow_swonEtcpnChuzwe: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_swonEtcpnChuzwe: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_aEryhnaxCjcvalc subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_aEryhnaxCjcvalc .L_small_initial_partial_block_aEryhnaxCjcvalc: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_aEryhnaxCjcvalc: orq %r8,%r8 je .L_after_reduction_aEryhnaxCjcvalc vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_aEryhnaxCjcvalc: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_7_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_EGhejzspzceoDrz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_EGhejzspzceoDrz .L_16_blocks_overflow_EGhejzspzceoDrz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_EGhejzspzceoDrz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lcrbhrsFEemAseF subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lcrbhrsFEemAseF .L_small_initial_partial_block_lcrbhrsFEemAseF: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lcrbhrsFEemAseF: orq %r8,%r8 je .L_after_reduction_lcrbhrsFEemAseF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lcrbhrsFEemAseF: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_8_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_bwyfeoBaojvbAgd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_bwyfeoBaojvbAgd .L_16_blocks_overflow_bwyfeoBaojvbAgd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_bwyfeoBaojvbAgd: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_osycqepyfDlatEs subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_osycqepyfDlatEs .L_small_initial_partial_block_osycqepyfDlatEs: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_osycqepyfDlatEs: orq %r8,%r8 je .L_after_reduction_osycqepyfDlatEs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_osycqepyfDlatEs: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_9_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_BaoGkpEpCdeyrev vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_BaoGkpEpCdeyrev .L_16_blocks_overflow_BaoGkpEpCdeyrev: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_BaoGkpEpCdeyrev: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ilsvshcinsdmttt subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ilsvshcinsdmttt .L_small_initial_partial_block_ilsvshcinsdmttt: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ilsvshcinsdmttt: orq %r8,%r8 je .L_after_reduction_ilsvshcinsdmttt vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ilsvshcinsdmttt: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_10_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_sAtxBaaxwaffire vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_sAtxBaaxwaffire .L_16_blocks_overflow_sAtxBaaxwaffire: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_sAtxBaaxwaffire: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mAgwqklangGkxiD subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mAgwqklangGkxiD .L_small_initial_partial_block_mAgwqklangGkxiD: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mAgwqklangGkxiD: orq %r8,%r8 je .L_after_reduction_mAgwqklangGkxiD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_mAgwqklangGkxiD: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_11_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_ditvbyzmFxiaFex vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_ditvbyzmFxiaFex .L_16_blocks_overflow_ditvbyzmFxiaFex: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_ditvbyzmFxiaFex: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hnpDdEkCCcoeFCy subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hnpDdEkCCcoeFCy .L_small_initial_partial_block_hnpDdEkCCcoeFCy: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hnpDdEkCCcoeFCy: orq %r8,%r8 je .L_after_reduction_hnpDdEkCCcoeFCy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hnpDdEkCCcoeFCy: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_12_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_iDaEpwpdhbvwFws vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_iDaEpwpdhbvwFws .L_16_blocks_overflow_iDaEpwpdhbvwFws: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_iDaEpwpdhbvwFws: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vFCCocfxfdGyktw subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vFCCocfxfdGyktw .L_small_initial_partial_block_vFCCocfxfdGyktw: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vFCCocfxfdGyktw: orq %r8,%r8 je .L_after_reduction_vFCCocfxfdGyktw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vFCCocfxfdGyktw: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_13_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_ossjtlatrhiigng vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_ossjtlatrhiigng .L_16_blocks_overflow_ossjtlatrhiigng: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_ossjtlatrhiigng: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_CiuBkutmcuwgEdD subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CiuBkutmcuwgEdD .L_small_initial_partial_block_CiuBkutmcuwgEdD: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CiuBkutmcuwgEdD: orq %r8,%r8 je .L_after_reduction_CiuBkutmcuwgEdD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CiuBkutmcuwgEdD: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_14_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_vocABmmphunBotn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_vocABmmphunBotn .L_16_blocks_overflow_vocABmmphunBotn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_vocABmmphunBotn: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xoGwditlthtdCzd subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xoGwditlthtdCzd .L_small_initial_partial_block_xoGwditlthtdCzd: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xoGwditlthtdCzd: orq %r8,%r8 je .L_after_reduction_xoGwditlthtdCzd vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xoGwditlthtdCzd: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_15_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_jbcAwazvdrBjhzu vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_jbcAwazvdrBjhzu .L_16_blocks_overflow_jbcAwazvdrBjhzu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_jbcAwazvdrBjhzu: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_eohjglCqsfjlesq subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_eohjglCqsfjlesq .L_small_initial_partial_block_eohjglCqsfjlesq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_eohjglCqsfjlesq: orq %r8,%r8 je .L_after_reduction_eohjglCqsfjlesq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_eohjglCqsfjlesq: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_16_xyDAiCmaAhzpydl: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_uatdhlpChpnBofk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_uatdhlpChpnBofk .L_16_blocks_overflow_uatdhlpChpnBofk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_uatdhlpChpnBofk: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_uvEqevkuejAoeFv: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uvEqevkuejAoeFv: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_uvEqevkuejAoeFv: jmp .L_last_blocks_done_xyDAiCmaAhzpydl .L_last_num_blocks_is_0_xyDAiCmaAhzpydl: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_xyDAiCmaAhzpydl: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_brADimEeCnCcDmv .L_encrypt_32_blocks_brADimEeCnCcDmv: cmpb $240,%r15b jae .L_16_blocks_overflow_brlCzGBjhaqyEcd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_brlCzGBjhaqyEcd .L_16_blocks_overflow_brlCzGBjhaqyEcd: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_brlCzGBjhaqyEcd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_pchieDggcEipdhz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_pchieDggcEipdhz .L_16_blocks_overflow_pchieDggcEipdhz: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_pchieDggcEipdhz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%r11 movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_digsBljoDvGeopi cmpl $8,%r10d je .L_last_num_blocks_is_8_digsBljoDvGeopi jb .L_last_num_blocks_is_7_1_digsBljoDvGeopi cmpl $12,%r10d je .L_last_num_blocks_is_12_digsBljoDvGeopi jb .L_last_num_blocks_is_11_9_digsBljoDvGeopi cmpl $15,%r10d je .L_last_num_blocks_is_15_digsBljoDvGeopi ja .L_last_num_blocks_is_16_digsBljoDvGeopi cmpl $14,%r10d je .L_last_num_blocks_is_14_digsBljoDvGeopi jmp .L_last_num_blocks_is_13_digsBljoDvGeopi .L_last_num_blocks_is_11_9_digsBljoDvGeopi: cmpl $10,%r10d je .L_last_num_blocks_is_10_digsBljoDvGeopi ja .L_last_num_blocks_is_11_digsBljoDvGeopi jmp .L_last_num_blocks_is_9_digsBljoDvGeopi .L_last_num_blocks_is_7_1_digsBljoDvGeopi: cmpl $4,%r10d je .L_last_num_blocks_is_4_digsBljoDvGeopi jb .L_last_num_blocks_is_3_1_digsBljoDvGeopi cmpl $6,%r10d ja .L_last_num_blocks_is_7_digsBljoDvGeopi je .L_last_num_blocks_is_6_digsBljoDvGeopi jmp .L_last_num_blocks_is_5_digsBljoDvGeopi .L_last_num_blocks_is_3_1_digsBljoDvGeopi: cmpl $2,%r10d ja .L_last_num_blocks_is_3_digsBljoDvGeopi je .L_last_num_blocks_is_2_digsBljoDvGeopi .L_last_num_blocks_is_1_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_eopubcfobBxhpzt vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_eopubcfobBxhpzt .L_16_blocks_overflow_eopubcfobBxhpzt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_eopubcfobBxhpzt: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_GethbnvGqcjphdB subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GethbnvGqcjphdB .L_small_initial_partial_block_GethbnvGqcjphdB: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_GethbnvGqcjphdB .L_small_initial_compute_done_GethbnvGqcjphdB: .L_after_reduction_GethbnvGqcjphdB: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_2_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_tpsnzcptGBjneak vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_tpsnzcptGBjneak .L_16_blocks_overflow_tpsnzcptGBjneak: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_tpsnzcptGBjneak: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xzAlvFvGbtFmqjz subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xzAlvFvGbtFmqjz .L_small_initial_partial_block_xzAlvFvGbtFmqjz: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xzAlvFvGbtFmqjz: orq %r8,%r8 je .L_after_reduction_xzAlvFvGbtFmqjz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xzAlvFvGbtFmqjz: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_3_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_lirgnnkvzmitoxw vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_lirgnnkvzmitoxw .L_16_blocks_overflow_lirgnnkvzmitoxw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_lirgnnkvzmitoxw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ovClAwtFzFgwrxE subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ovClAwtFzFgwrxE .L_small_initial_partial_block_ovClAwtFzFgwrxE: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ovClAwtFzFgwrxE: orq %r8,%r8 je .L_after_reduction_ovClAwtFzFgwrxE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ovClAwtFzFgwrxE: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_4_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_xgCtemAejdionch vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_xgCtemAejdionch .L_16_blocks_overflow_xgCtemAejdionch: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_xgCtemAejdionch: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_iEyBjAGEhdmCFpz subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_iEyBjAGEhdmCFpz .L_small_initial_partial_block_iEyBjAGEhdmCFpz: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_iEyBjAGEhdmCFpz: orq %r8,%r8 je .L_after_reduction_iEyBjAGEhdmCFpz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_iEyBjAGEhdmCFpz: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_5_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_eojywxfxbxGnElA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_eojywxfxbxGnElA .L_16_blocks_overflow_eojywxfxbxGnElA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_eojywxfxbxGnElA: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xzyrfzavvdvxobt subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xzyrfzavvdvxobt .L_small_initial_partial_block_xzyrfzavvdvxobt: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xzyrfzavvdvxobt: orq %r8,%r8 je .L_after_reduction_xzyrfzavvdvxobt vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xzyrfzavvdvxobt: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_6_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_fefwvFrCitcygrh vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_fefwvFrCitcygrh .L_16_blocks_overflow_fefwvFrCitcygrh: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_fefwvFrCitcygrh: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EGwsgDahgpEisFa subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EGwsgDahgpEisFa .L_small_initial_partial_block_EGwsgDahgpEisFa: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EGwsgDahgpEisFa: orq %r8,%r8 je .L_after_reduction_EGwsgDahgpEisFa vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EGwsgDahgpEisFa: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_7_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_GiAftkxuDrwByoy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_GiAftkxuDrwByoy .L_16_blocks_overflow_GiAftkxuDrwByoy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_GiAftkxuDrwByoy: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pvtnwvrCesGFzzt subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pvtnwvrCesGFzzt .L_small_initial_partial_block_pvtnwvrCesGFzzt: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pvtnwvrCesGFzzt: orq %r8,%r8 je .L_after_reduction_pvtnwvrCesGFzzt vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pvtnwvrCesGFzzt: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_8_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_fdotfBFcguDtbBo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_fdotfBFcguDtbBo .L_16_blocks_overflow_fdotfBFcguDtbBo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_fdotfBFcguDtbBo: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wvodhAGehoxjCmp subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wvodhAGehoxjCmp .L_small_initial_partial_block_wvodhAGehoxjCmp: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wvodhAGehoxjCmp: orq %r8,%r8 je .L_after_reduction_wvodhAGehoxjCmp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wvodhAGehoxjCmp: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_9_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_GcksGDvymbkGaeh vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_GcksGDvymbkGaeh .L_16_blocks_overflow_GcksGDvymbkGaeh: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_GcksGDvymbkGaeh: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_uqlihfyhxyhihvk subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_uqlihfyhxyhihvk .L_small_initial_partial_block_uqlihfyhxyhihvk: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uqlihfyhxyhihvk: orq %r8,%r8 je .L_after_reduction_uqlihfyhxyhihvk vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_uqlihfyhxyhihvk: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_10_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_bjDavzoezpzksBl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_bjDavzoezpzksBl .L_16_blocks_overflow_bjDavzoezpzksBl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_bjDavzoezpzksBl: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_thhwkdBkbzuszkb subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_thhwkdBkbzuszkb .L_small_initial_partial_block_thhwkdBkbzuszkb: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_thhwkdBkbzuszkb: orq %r8,%r8 je .L_after_reduction_thhwkdBkbzuszkb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_thhwkdBkbzuszkb: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_11_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_epoBmnewvcDxoga vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_epoBmnewvcDxoga .L_16_blocks_overflow_epoBmnewvcDxoga: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_epoBmnewvcDxoga: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xCrDaEDvhzCAvdw subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xCrDaEDvhzCAvdw .L_small_initial_partial_block_xCrDaEDvhzCAvdw: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xCrDaEDvhzCAvdw: orq %r8,%r8 je .L_after_reduction_xCrDaEDvhzCAvdw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xCrDaEDvhzCAvdw: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_12_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_jDebikuAmaaarvn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_jDebikuAmaaarvn .L_16_blocks_overflow_jDebikuAmaaarvn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_jDebikuAmaaarvn: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ynohxakFGzjuDGi subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ynohxakFGzjuDGi .L_small_initial_partial_block_ynohxakFGzjuDGi: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ynohxakFGzjuDGi: orq %r8,%r8 je .L_after_reduction_ynohxakFGzjuDGi vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ynohxakFGzjuDGi: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_13_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_hshekyDxCginrlC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_hshekyDxCginrlC .L_16_blocks_overflow_hshekyDxCginrlC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_hshekyDxCginrlC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_httDwjAaGCslaiE subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_httDwjAaGCslaiE .L_small_initial_partial_block_httDwjAaGCslaiE: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_httDwjAaGCslaiE: orq %r8,%r8 je .L_after_reduction_httDwjAaGCslaiE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_httDwjAaGCslaiE: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_14_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_DrtmyDmpgCneBsy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_DrtmyDmpgCneBsy .L_16_blocks_overflow_DrtmyDmpgCneBsy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_DrtmyDmpgCneBsy: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_fAmeqrcqmahfygz subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fAmeqrcqmahfygz .L_small_initial_partial_block_fAmeqrcqmahfygz: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fAmeqrcqmahfygz: orq %r8,%r8 je .L_after_reduction_fAmeqrcqmahfygz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_fAmeqrcqmahfygz: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_15_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_jakbeEuDkermeem vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_jakbeEuDkermeem .L_16_blocks_overflow_jakbeEuDkermeem: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_jakbeEuDkermeem: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_czuljoFmwduytgq subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_czuljoFmwduytgq .L_small_initial_partial_block_czuljoFmwduytgq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_czuljoFmwduytgq: orq %r8,%r8 je .L_after_reduction_czuljoFmwduytgq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_czuljoFmwduytgq: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_16_digsBljoDvGeopi: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_pFvBGotBaidmClB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_pFvBGotBaidmClB .L_16_blocks_overflow_pFvBGotBaidmClB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_pFvBGotBaidmClB: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_rlrrckDhqtmvgrG: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rlrrckDhqtmvgrG: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rlrrckDhqtmvgrG: jmp .L_last_blocks_done_digsBljoDvGeopi .L_last_num_blocks_is_0_digsBljoDvGeopi: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_digsBljoDvGeopi: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_brADimEeCnCcDmv .L_encrypt_16_blocks_brADimEeCnCcDmv: cmpb $240,%r15b jae .L_16_blocks_overflow_mBiujfnyqjDacBo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_mBiujfnyqjDacBo .L_16_blocks_overflow_mBiujfnyqjDacBo: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_mBiujfnyqjDacBo: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%r11 movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_hpinkedxAsgwrDG cmpl $8,%r10d je .L_last_num_blocks_is_8_hpinkedxAsgwrDG jb .L_last_num_blocks_is_7_1_hpinkedxAsgwrDG cmpl $12,%r10d je .L_last_num_blocks_is_12_hpinkedxAsgwrDG jb .L_last_num_blocks_is_11_9_hpinkedxAsgwrDG cmpl $15,%r10d je .L_last_num_blocks_is_15_hpinkedxAsgwrDG ja .L_last_num_blocks_is_16_hpinkedxAsgwrDG cmpl $14,%r10d je .L_last_num_blocks_is_14_hpinkedxAsgwrDG jmp .L_last_num_blocks_is_13_hpinkedxAsgwrDG .L_last_num_blocks_is_11_9_hpinkedxAsgwrDG: cmpl $10,%r10d je .L_last_num_blocks_is_10_hpinkedxAsgwrDG ja .L_last_num_blocks_is_11_hpinkedxAsgwrDG jmp .L_last_num_blocks_is_9_hpinkedxAsgwrDG .L_last_num_blocks_is_7_1_hpinkedxAsgwrDG: cmpl $4,%r10d je .L_last_num_blocks_is_4_hpinkedxAsgwrDG jb .L_last_num_blocks_is_3_1_hpinkedxAsgwrDG cmpl $6,%r10d ja .L_last_num_blocks_is_7_hpinkedxAsgwrDG je .L_last_num_blocks_is_6_hpinkedxAsgwrDG jmp .L_last_num_blocks_is_5_hpinkedxAsgwrDG .L_last_num_blocks_is_3_1_hpinkedxAsgwrDG: cmpl $2,%r10d ja .L_last_num_blocks_is_3_hpinkedxAsgwrDG je .L_last_num_blocks_is_2_hpinkedxAsgwrDG .L_last_num_blocks_is_1_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_fBBmqqamxsbkcrt vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_fBBmqqamxsbkcrt .L_16_blocks_overflow_fBBmqqamxsbkcrt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_fBBmqqamxsbkcrt: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %xmm31,%xmm0,%xmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_lrfgmFpfobGvwfj subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lrfgmFpfobGvwfj .L_small_initial_partial_block_lrfgmFpfobGvwfj: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_lrfgmFpfobGvwfj .L_small_initial_compute_done_lrfgmFpfobGvwfj: .L_after_reduction_lrfgmFpfobGvwfj: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_2_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_xDanrAoaAcACiFw vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_xDanrAoaAcACiFw .L_16_blocks_overflow_xDanrAoaAcACiFw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_xDanrAoaAcACiFw: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %ymm31,%ymm0,%ymm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rgsstcnEqnxrxBs subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rgsstcnEqnxrxBs .L_small_initial_partial_block_rgsstcnEqnxrxBs: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rgsstcnEqnxrxBs: orq %r8,%r8 je .L_after_reduction_rgsstcnEqnxrxBs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rgsstcnEqnxrxBs: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_3_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_lrqqcheobutysur vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_lrqqcheobutysur .L_16_blocks_overflow_lrqqcheobutysur: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_lrqqcheobutysur: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xejmrnqBpubjbjg subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xejmrnqBpubjbjg .L_small_initial_partial_block_xejmrnqBpubjbjg: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xejmrnqBpubjbjg: orq %r8,%r8 je .L_after_reduction_xejmrnqBpubjbjg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xejmrnqBpubjbjg: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_4_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_gjemvxDziwfmcyi vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_gjemvxDziwfmcyi .L_16_blocks_overflow_gjemvxDziwfmcyi: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_gjemvxDziwfmcyi: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_fCcphAbbvbdCpEo subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fCcphAbbvbdCpEo .L_small_initial_partial_block_fCcphAbbvbdCpEo: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fCcphAbbvbdCpEo: orq %r8,%r8 je .L_after_reduction_fCcphAbbvbdCpEo vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_fCcphAbbvbdCpEo: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_5_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_ftkjlfgrvFmBAqj vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_ftkjlfgrvFmBAqj .L_16_blocks_overflow_ftkjlfgrvFmBAqj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_ftkjlfgrvFmBAqj: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GcmEpgzDnksqGvv subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GcmEpgzDnksqGvv .L_small_initial_partial_block_GcmEpgzDnksqGvv: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GcmEpgzDnksqGvv: orq %r8,%r8 je .L_after_reduction_GcmEpgzDnksqGvv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_GcmEpgzDnksqGvv: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_6_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_wcFtAwbEGtnhhov vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_wcFtAwbEGtnhhov .L_16_blocks_overflow_wcFtAwbEGtnhhov: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_wcFtAwbEGtnhhov: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ljhumqErtfjivdq subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ljhumqErtfjivdq .L_small_initial_partial_block_ljhumqErtfjivdq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ljhumqErtfjivdq: orq %r8,%r8 je .L_after_reduction_ljhumqErtfjivdq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ljhumqErtfjivdq: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_7_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_xipoAqDkcCyBFhx vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_xipoAqDkcCyBFhx .L_16_blocks_overflow_xipoAqDkcCyBFhx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_xipoAqDkcCyBFhx: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jeohFFoGiiGxanC subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jeohFFoGiiGxanC .L_small_initial_partial_block_jeohFFoGiiGxanC: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jeohFFoGiiGxanC: orq %r8,%r8 je .L_after_reduction_jeohFFoGiiGxanC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jeohFFoGiiGxanC: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_8_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_CxhquljwEiGywcd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_CxhquljwEiGywcd .L_16_blocks_overflow_CxhquljwEiGywcd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_CxhquljwEiGywcd: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_eqywyFyndjkBDnx subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_eqywyFyndjkBDnx .L_small_initial_partial_block_eqywyFyndjkBDnx: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_eqywyFyndjkBDnx: orq %r8,%r8 je .L_after_reduction_eqywyFyndjkBDnx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_eqywyFyndjkBDnx: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_9_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_tqfxslkwuCurEnc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_tqfxslkwuCurEnc .L_16_blocks_overflow_tqfxslkwuCurEnc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_tqfxslkwuCurEnc: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pxwcCmexoxpnkgA subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pxwcCmexoxpnkgA .L_small_initial_partial_block_pxwcCmexoxpnkgA: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pxwcCmexoxpnkgA: orq %r8,%r8 je .L_after_reduction_pxwcCmexoxpnkgA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pxwcCmexoxpnkgA: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_10_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_tiwCrijFxfsopuz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_tiwCrijFxfsopuz .L_16_blocks_overflow_tiwCrijFxfsopuz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_tiwCrijFxfsopuz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rjgbwiCDGnxhaGp subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rjgbwiCDGnxhaGp .L_small_initial_partial_block_rjgbwiCDGnxhaGp: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rjgbwiCDGnxhaGp: orq %r8,%r8 je .L_after_reduction_rjgbwiCDGnxhaGp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rjgbwiCDGnxhaGp: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_11_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_wphxdqsnBGrxkBa vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_wphxdqsnBGrxkBa .L_16_blocks_overflow_wphxdqsnBGrxkBa: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_wphxdqsnBGrxkBa: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DAeDyvlteBcjnnm subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DAeDyvlteBcjnnm .L_small_initial_partial_block_DAeDyvlteBcjnnm: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DAeDyvlteBcjnnm: orq %r8,%r8 je .L_after_reduction_DAeDyvlteBcjnnm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DAeDyvlteBcjnnm: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_12_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_btzqkvdAeDABvcj vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_btzqkvdAeDABvcj .L_16_blocks_overflow_btzqkvdAeDABvcj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_btzqkvdAeDABvcj: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BAFapfuAGyFkstm subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BAFapfuAGyFkstm .L_small_initial_partial_block_BAFapfuAGyFkstm: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BAFapfuAGyFkstm: orq %r8,%r8 je .L_after_reduction_BAFapfuAGyFkstm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BAFapfuAGyFkstm: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_13_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_eqBacrjkweGnBBv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_eqBacrjkweGnBBv .L_16_blocks_overflow_eqBacrjkweGnBBv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_eqBacrjkweGnBBv: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zzCAagwwuuueoBh subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zzCAagwwuuueoBh .L_small_initial_partial_block_zzCAagwwuuueoBh: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zzCAagwwuuueoBh: orq %r8,%r8 je .L_after_reduction_zzCAagwwuuueoBh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zzCAagwwuuueoBh: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_14_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_hBvbhuzsjeqFuma vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_hBvbhuzsjeqFuma .L_16_blocks_overflow_hBvbhuzsjeqFuma: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_hBvbhuzsjeqFuma: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mwionbCzEjjlanp subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mwionbCzEjjlanp .L_small_initial_partial_block_mwionbCzEjjlanp: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mwionbCzEjjlanp: orq %r8,%r8 je .L_after_reduction_mwionbCzEjjlanp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_mwionbCzEjjlanp: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_15_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_BDaqedvcvzqmjwo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_BDaqedvcvzqmjwo .L_16_blocks_overflow_BDaqedvcvzqmjwo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_BDaqedvcvzqmjwo: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EFDnDGjBfhFbjps subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EFDnDGjBfhFbjps .L_small_initial_partial_block_EFDnDGjBfhFbjps: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EFDnDGjBfhFbjps: orq %r8,%r8 je .L_after_reduction_EFDnDGjBfhFbjps vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EFDnDGjBfhFbjps: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_16_hpinkedxAsgwrDG: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_etaGdjDbzcppuhm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_etaGdjDbzcppuhm .L_16_blocks_overflow_etaGdjDbzcppuhm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_etaGdjDbzcppuhm: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_zcehcCvffqhlrEC: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zcehcCvffqhlrEC: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zcehcCvffqhlrEC: jmp .L_last_blocks_done_hpinkedxAsgwrDG .L_last_num_blocks_is_0_hpinkedxAsgwrDG: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_hpinkedxAsgwrDG: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_brADimEeCnCcDmv .L_message_below_32_blocks_brADimEeCnCcDmv: subq $256,%r8 addq $256,%r11 movl %r8d,%r10d testq %r14,%r14 jnz .L_skip_hkeys_precomputation_hlnFoocmixcFBsB vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_hlnFoocmixcFBsB: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_ytkmwztBxmufdeg cmpl $8,%r10d je .L_last_num_blocks_is_8_ytkmwztBxmufdeg jb .L_last_num_blocks_is_7_1_ytkmwztBxmufdeg cmpl $12,%r10d je .L_last_num_blocks_is_12_ytkmwztBxmufdeg jb .L_last_num_blocks_is_11_9_ytkmwztBxmufdeg cmpl $15,%r10d je .L_last_num_blocks_is_15_ytkmwztBxmufdeg ja .L_last_num_blocks_is_16_ytkmwztBxmufdeg cmpl $14,%r10d je .L_last_num_blocks_is_14_ytkmwztBxmufdeg jmp .L_last_num_blocks_is_13_ytkmwztBxmufdeg .L_last_num_blocks_is_11_9_ytkmwztBxmufdeg: cmpl $10,%r10d je .L_last_num_blocks_is_10_ytkmwztBxmufdeg ja .L_last_num_blocks_is_11_ytkmwztBxmufdeg jmp .L_last_num_blocks_is_9_ytkmwztBxmufdeg .L_last_num_blocks_is_7_1_ytkmwztBxmufdeg: cmpl $4,%r10d je .L_last_num_blocks_is_4_ytkmwztBxmufdeg jb .L_last_num_blocks_is_3_1_ytkmwztBxmufdeg cmpl $6,%r10d ja .L_last_num_blocks_is_7_ytkmwztBxmufdeg je .L_last_num_blocks_is_6_ytkmwztBxmufdeg jmp .L_last_num_blocks_is_5_ytkmwztBxmufdeg .L_last_num_blocks_is_3_1_ytkmwztBxmufdeg: cmpl $2,%r10d ja .L_last_num_blocks_is_3_ytkmwztBxmufdeg je .L_last_num_blocks_is_2_ytkmwztBxmufdeg .L_last_num_blocks_is_1_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_bGwqvrBoAiaAwkr vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_bGwqvrBoAiaAwkr .L_16_blocks_overflow_bGwqvrBoAiaAwkr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_bGwqvrBoAiaAwkr: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_dqohylvpeBErAsj subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dqohylvpeBErAsj .L_small_initial_partial_block_dqohylvpeBErAsj: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_dqohylvpeBErAsj .L_small_initial_compute_done_dqohylvpeBErAsj: .L_after_reduction_dqohylvpeBErAsj: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_2_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_lsDChrkFfFrGvvk vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_lsDChrkFfFrGvvk .L_16_blocks_overflow_lsDChrkFfFrGvvk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_lsDChrkFfFrGvvk: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_Bgmdyvgptvfwdit subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_Bgmdyvgptvfwdit .L_small_initial_partial_block_Bgmdyvgptvfwdit: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_Bgmdyvgptvfwdit: orq %r8,%r8 je .L_after_reduction_Bgmdyvgptvfwdit vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_Bgmdyvgptvfwdit: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_3_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_srEocbwAwxsxpma vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_srEocbwAwxsxpma .L_16_blocks_overflow_srEocbwAwxsxpma: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_srEocbwAwxsxpma: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ErkzfxFAbndCAAg subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ErkzfxFAbndCAAg .L_small_initial_partial_block_ErkzfxFAbndCAAg: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ErkzfxFAbndCAAg: orq %r8,%r8 je .L_after_reduction_ErkzfxFAbndCAAg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ErkzfxFAbndCAAg: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_4_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_wbyjFiCBFhEhwdm vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_wbyjFiCBFhEhwdm .L_16_blocks_overflow_wbyjFiCBFhEhwdm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_wbyjFiCBFhEhwdm: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_sEeExElgbeebmrl subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sEeExElgbeebmrl .L_small_initial_partial_block_sEeExElgbeebmrl: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_sEeExElgbeebmrl: orq %r8,%r8 je .L_after_reduction_sEeExElgbeebmrl vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_sEeExElgbeebmrl: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_5_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_FhnyaskgxleEyeh vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_FhnyaskgxleEyeh .L_16_blocks_overflow_FhnyaskgxleEyeh: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_FhnyaskgxleEyeh: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wcgcyCwrColDBul subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wcgcyCwrColDBul .L_small_initial_partial_block_wcgcyCwrColDBul: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wcgcyCwrColDBul: orq %r8,%r8 je .L_after_reduction_wcgcyCwrColDBul vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wcgcyCwrColDBul: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_6_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_EfyidiDbmAaAaju vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_EfyidiDbmAaAaju .L_16_blocks_overflow_EfyidiDbmAaAaju: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_EfyidiDbmAaAaju: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jGjykEdEyDattqe subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jGjykEdEyDattqe .L_small_initial_partial_block_jGjykEdEyDattqe: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jGjykEdEyDattqe: orq %r8,%r8 je .L_after_reduction_jGjykEdEyDattqe vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jGjykEdEyDattqe: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_7_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_mzDdvEgkDwBlewp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_mzDdvEgkDwBlewp .L_16_blocks_overflow_mzDdvEgkDwBlewp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_mzDdvEgkDwBlewp: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zwgGbbACgGfeFja subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zwgGbbACgGfeFja .L_small_initial_partial_block_zwgGbbACgGfeFja: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zwgGbbACgGfeFja: orq %r8,%r8 je .L_after_reduction_zwgGbbACgGfeFja vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zwgGbbACgGfeFja: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_8_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_jqmGdhzdkozCBlA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_jqmGdhzdkozCBlA .L_16_blocks_overflow_jqmGdhzdkozCBlA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_jqmGdhzdkozCBlA: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_Daizbjyimqaduru subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_Daizbjyimqaduru .L_small_initial_partial_block_Daizbjyimqaduru: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_Daizbjyimqaduru: orq %r8,%r8 je .L_after_reduction_Daizbjyimqaduru vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_Daizbjyimqaduru: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_9_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_CDuwyvGbafyeBuk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_CDuwyvGbafyeBuk .L_16_blocks_overflow_CDuwyvGbafyeBuk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_CDuwyvGbafyeBuk: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kpAafwlxkcfbCCh subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kpAafwlxkcfbCCh .L_small_initial_partial_block_kpAafwlxkcfbCCh: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kpAafwlxkcfbCCh: orq %r8,%r8 je .L_after_reduction_kpAafwlxkcfbCCh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_kpAafwlxkcfbCCh: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_10_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_tDtiElGDCfanulC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_tDtiElGDCfanulC .L_16_blocks_overflow_tDtiElGDCfanulC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_tDtiElGDCfanulC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zphfokajCjwqcAg subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zphfokajCjwqcAg .L_small_initial_partial_block_zphfokajCjwqcAg: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zphfokajCjwqcAg: orq %r8,%r8 je .L_after_reduction_zphfokajCjwqcAg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zphfokajCjwqcAg: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_11_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_wqmiytsuGwmqxEk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_wqmiytsuGwmqxEk .L_16_blocks_overflow_wqmiytsuGwmqxEk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_wqmiytsuGwmqxEk: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DlBrprmzzykyokm subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DlBrprmzzykyokm .L_small_initial_partial_block_DlBrprmzzykyokm: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DlBrprmzzykyokm: orq %r8,%r8 je .L_after_reduction_DlBrprmzzykyokm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DlBrprmzzykyokm: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_12_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_annCtoGejoBwwxn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_annCtoGejoBwwxn .L_16_blocks_overflow_annCtoGejoBwwxn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_annCtoGejoBwwxn: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_viBlGurDavwztrf subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_viBlGurDavwztrf .L_small_initial_partial_block_viBlGurDavwztrf: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_viBlGurDavwztrf: orq %r8,%r8 je .L_after_reduction_viBlGurDavwztrf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_viBlGurDavwztrf: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_13_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_zmshcCvwkdwGlaB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_zmshcCvwkdwGlaB .L_16_blocks_overflow_zmshcCvwkdwGlaB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_zmshcCvwkdwGlaB: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kqdfAoFcBDkeGbm subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kqdfAoFcBDkeGbm .L_small_initial_partial_block_kqdfAoFcBDkeGbm: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kqdfAoFcBDkeGbm: orq %r8,%r8 je .L_after_reduction_kqdfAoFcBDkeGbm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_kqdfAoFcBDkeGbm: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_14_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_boziaaCCygjjfxw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_boziaaCCygjjfxw .L_16_blocks_overflow_boziaaCCygjjfxw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_boziaaCCygjjfxw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_znbGdxrosrCeabB subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_znbGdxrosrCeabB .L_small_initial_partial_block_znbGdxrosrCeabB: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_znbGdxrosrCeabB: orq %r8,%r8 je .L_after_reduction_znbGdxrosrCeabB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_znbGdxrosrCeabB: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_15_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_rliugxzwdyFGiBD vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_rliugxzwdyFGiBD .L_16_blocks_overflow_rliugxzwdyFGiBD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_rliugxzwdyFGiBD: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_olnbAdcngmvvEdn subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_olnbAdcngmvvEdn .L_small_initial_partial_block_olnbAdcngmvvEdn: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_olnbAdcngmvvEdn: orq %r8,%r8 je .L_after_reduction_olnbAdcngmvvEdn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_olnbAdcngmvvEdn: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_16_ytkmwztBxmufdeg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_gmEGrjFikmwGcAm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_gmEGrjFikmwGcAm .L_16_blocks_overflow_gmEGrjFikmwGcAm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_gmEGrjFikmwGcAm: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_dplntcAkoiBEkDo: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dplntcAkoiBEkDo: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dplntcAkoiBEkDo: jmp .L_last_blocks_done_ytkmwztBxmufdeg .L_last_num_blocks_is_0_ytkmwztBxmufdeg: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_ytkmwztBxmufdeg: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_brADimEeCnCcDmv .L_message_below_equal_16_blocks_brADimEeCnCcDmv: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_nmhEfDfgEBvcjnt jl .L_small_initial_num_blocks_is_7_1_nmhEfDfgEBvcjnt cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_nmhEfDfgEBvcjnt jl .L_small_initial_num_blocks_is_11_9_nmhEfDfgEBvcjnt cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_nmhEfDfgEBvcjnt cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_nmhEfDfgEBvcjnt cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_nmhEfDfgEBvcjnt jmp .L_small_initial_num_blocks_is_13_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_11_9_nmhEfDfgEBvcjnt: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_nmhEfDfgEBvcjnt cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_nmhEfDfgEBvcjnt jmp .L_small_initial_num_blocks_is_9_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_7_1_nmhEfDfgEBvcjnt: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_nmhEfDfgEBvcjnt jl .L_small_initial_num_blocks_is_3_1_nmhEfDfgEBvcjnt cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_nmhEfDfgEBvcjnt cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_nmhEfDfgEBvcjnt jmp .L_small_initial_num_blocks_is_5_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_3_1_nmhEfDfgEBvcjnt: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_nmhEfDfgEBvcjnt cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_1_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONE(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %xmm15,%xmm0,%xmm0 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm6,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 cmpq $16,%r8 jl .L_small_initial_partial_block_AyfivemhvfDjwew subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AyfivemhvfDjwew .L_small_initial_partial_block_AyfivemhvfDjwew: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_AyfivemhvfDjwew .L_small_initial_compute_done_AyfivemhvfDjwew: .L_after_reduction_AyfivemhvfDjwew: jmp .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_2_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %ymm15,%ymm0,%ymm0 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm6,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mFdfDiDtuhyrCwk subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mFdfDiDtuhyrCwk .L_small_initial_partial_block_mFdfDiDtuhyrCwk: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mFdfDiDtuhyrCwk: orq %r8,%r8 je .L_after_reduction_mFdfDiDtuhyrCwk vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_mFdfDiDtuhyrCwk: jmp .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_3_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AvGtGumzxshjiFB subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AvGtGumzxshjiFB .L_small_initial_partial_block_AvGtGumzxshjiFB: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AvGtGumzxshjiFB: orq %r8,%r8 je .L_after_reduction_AvGtGumzxshjiFB vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_AvGtGumzxshjiFB: jmp .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_4_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DbentnbaeCzAufz subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DbentnbaeCzAufz .L_small_initial_partial_block_DbentnbaeCzAufz: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DbentnbaeCzAufz: orq %r8,%r8 je .L_after_reduction_DbentnbaeCzAufz vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_DbentnbaeCzAufz: jmp .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_5_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %xmm15,%xmm3,%xmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %xmm29,%xmm7,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dnEAtijzGEDlswn subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dnEAtijzGEDlswn .L_small_initial_partial_block_dnEAtijzGEDlswn: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dnEAtijzGEDlswn: orq %r8,%r8 je .L_after_reduction_dnEAtijzGEDlswn vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_dnEAtijzGEDlswn: jmp .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_6_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %ymm15,%ymm3,%ymm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %ymm29,%ymm7,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_umqipkezFkCyFdu subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_umqipkezFkCyFdu .L_small_initial_partial_block_umqipkezFkCyFdu: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_umqipkezFkCyFdu: orq %r8,%r8 je .L_after_reduction_umqipkezFkCyFdu vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_umqipkezFkCyFdu: jmp .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_7_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lEGtnzekhyuwBFz subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lEGtnzekhyuwBFz .L_small_initial_partial_block_lEGtnzekhyuwBFz: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lEGtnzekhyuwBFz: orq %r8,%r8 je .L_after_reduction_lEGtnzekhyuwBFz vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_lEGtnzekhyuwBFz: jmp .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_8_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EasGBEsimbhszDy subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EasGBEsimbhszDy .L_small_initial_partial_block_EasGBEsimbhszDy: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EasGBEsimbhszDy: orq %r8,%r8 je .L_after_reduction_EasGBEsimbhszDy vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_EasGBEsimbhszDy: jmp .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_9_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %xmm15,%xmm4,%xmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %xmm29,%xmm10,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DlhndmhlkxypvAb subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DlhndmhlkxypvAb .L_small_initial_partial_block_DlhndmhlkxypvAb: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DlhndmhlkxypvAb: orq %r8,%r8 je .L_after_reduction_DlhndmhlkxypvAb vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_DlhndmhlkxypvAb: jmp .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_10_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %ymm15,%ymm4,%ymm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %ymm29,%ymm10,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_cwsdomEqheptkED subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_cwsdomEqheptkED .L_small_initial_partial_block_cwsdomEqheptkED: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cwsdomEqheptkED: orq %r8,%r8 je .L_after_reduction_cwsdomEqheptkED vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_cwsdomEqheptkED: jmp .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_11_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qxeFvgzdwFFywqx subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qxeFvgzdwFFywqx .L_small_initial_partial_block_qxeFvgzdwFFywqx: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qxeFvgzdwFFywqx: orq %r8,%r8 je .L_after_reduction_qxeFvgzdwFFywqx vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_qxeFvgzdwFFywqx: jmp .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_12_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_oqzAvlGuDiExAmm subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_oqzAvlGuDiExAmm .L_small_initial_partial_block_oqzAvlGuDiExAmm: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_oqzAvlGuDiExAmm: orq %r8,%r8 je .L_after_reduction_oqzAvlGuDiExAmm vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_oqzAvlGuDiExAmm: jmp .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_13_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %xmm15,%xmm5,%xmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %xmm29,%xmm11,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yqGygqlhwnnpjbq subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yqGygqlhwnnpjbq .L_small_initial_partial_block_yqGygqlhwnnpjbq: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yqGygqlhwnnpjbq: orq %r8,%r8 je .L_after_reduction_yqGygqlhwnnpjbq vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_yqGygqlhwnnpjbq: jmp .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_14_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %ymm15,%ymm5,%ymm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %ymm29,%ymm11,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wByexunpeunlcgC subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wByexunpeunlcgC .L_small_initial_partial_block_wByexunpeunlcgC: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wByexunpeunlcgC: orq %r8,%r8 je .L_after_reduction_wByexunpeunlcgC vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_wByexunpeunlcgC: jmp .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_15_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_sAhCDvCwGcBErvs subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sAhCDvCwGcBErvs .L_small_initial_partial_block_sAhCDvCwGcBErvs: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_sAhCDvCwGcBErvs: orq %r8,%r8 je .L_after_reduction_sAhCDvCwGcBErvs vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_sAhCDvCwGcBErvs: jmp .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt .L_small_initial_num_blocks_is_16_nmhEfDfgEBvcjnt: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_BGcpniuuBjzyonj: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BGcpniuuBjzyonj: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_BGcpniuuBjzyonj: .L_small_initial_blocks_encrypted_nmhEfDfgEBvcjnt: .L_ghash_done_brADimEeCnCcDmv: vmovdqu64 %xmm2,0(%rsi) vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_done_brADimEeCnCcDmv: jmp .Lexit_gcm_decrypt .align 32 .Laes_gcm_decrypt_192_avx512: orq %r8,%r8 je .L_enc_dec_done_yiifChpfBbxhAhe xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 movq (%rdx),%r11 orq %r11,%r11 je .L_partial_block_done_EexishzBqqwurDt movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 vmovdqu64 336(%rsi),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %r11,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vmovdqa64 %xmm0,%xmm6 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%r11,1),%r13 subq $16,%r13 jge .L_no_extra_mask_EexishzBqqwurDt subq %r13,%r12 .L_no_extra_mask_EexishzBqqwurDt: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpand %xmm0,%xmm6,%xmm6 vpshufb SHUF_MASK(%rip),%xmm6,%xmm6 vpshufb %xmm5,%xmm6,%xmm6 vpxorq %xmm6,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_EexishzBqqwurDt vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 vpsrldq $4,%xmm10,%xmm10 vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movq $0,(%rdx) movq %r11,%r12 movq $16,%r11 subq %r12,%r11 jmp .L_enc_dec_done_EexishzBqqwurDt .L_partial_incomplete_EexishzBqqwurDt: addq %r8,(%rdx) movq %r8,%r11 .L_enc_dec_done_EexishzBqqwurDt: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%r11,2),%k1 vmovdqu64 %xmm14,64(%rsi) movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_EexishzBqqwurDt: vmovdqu64 0(%rsi),%xmm2 subq %r11,%r8 je .L_enc_dec_done_yiifChpfBbxhAhe cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_yiifChpfBbxhAhe vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_tfgagBztCGiipfj vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_tfgagBztCGiipfj .L_next_16_overflow_tfgagBztCGiipfj: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_tfgagBztCGiipfj: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%r11,1),%zmm0 vmovdqu8 64(%rcx,%r11,1),%zmm3 vmovdqu8 128(%rcx,%r11,1),%zmm4 vmovdqu8 192(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%r11,1) vmovdqu8 %zmm10,64(%r10,%r11,1) vmovdqu8 %zmm11,128(%r10,%r11,1) vmovdqu8 %zmm12,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_wuytBaevFghAmde vmovdqu64 288(%rsi),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 224(%rsi),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 160(%rsi),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 96(%rsi),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_wuytBaevFghAmde: cmpq $512,%r8 jb .L_message_below_32_blocks_yiifChpfBbxhAhe cmpb $240,%r15b jae .L_next_16_overflow_nzEGCllDaFxsseu vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_nzEGCllDaFxsseu .L_next_16_overflow_nzEGCllDaFxsseu: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_nzEGCllDaFxsseu: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%r11,1),%zmm0 vmovdqu8 320(%rcx,%r11,1),%zmm3 vmovdqu8 384(%rcx,%r11,1),%zmm4 vmovdqu8 448(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%r11,1) vmovdqu8 %zmm10,320(%r10,%r11,1) vmovdqu8 %zmm11,384(%r10,%r11,1) vmovdqu8 %zmm12,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_CDApkmzFaysFbmb vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_CDApkmzFaysFbmb: movq $1,%r14 addq $512,%r11 subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_yiifChpfBbxhAhe .L_encrypt_big_nblocks_yiifChpfBbxhAhe: cmpb $240,%r15b jae .L_16_blocks_overflow_EkchfDegrAlelEj vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_EkchfDegrAlelEj .L_16_blocks_overflow_EkchfDegrAlelEj: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_EkchfDegrAlelEj: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_ymdbteyxuoqtqnl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_ymdbteyxuoqtqnl .L_16_blocks_overflow_ymdbteyxuoqtqnl: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_ymdbteyxuoqtqnl: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_tyfBFhaGurfjEFr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_tyfBFhaGurfjEFr .L_16_blocks_overflow_tyfBFhaGurfjEFr: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_tyfBFhaGurfjEFr: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%r11,1),%zmm17 vmovdqu8 576(%rcx,%r11,1),%zmm19 vmovdqu8 640(%rcx,%r11,1),%zmm20 vmovdqu8 704(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%r11,1) vmovdqu8 %zmm3,576(%r10,%r11,1) vmovdqu8 %zmm4,640(%r10,%r11,1) vmovdqu8 %zmm5,704(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%r11 subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_yiifChpfBbxhAhe .L_no_more_big_nblocks_yiifChpfBbxhAhe: cmpq $512,%r8 jae .L_encrypt_32_blocks_yiifChpfBbxhAhe cmpq $256,%r8 jae .L_encrypt_16_blocks_yiifChpfBbxhAhe .L_encrypt_0_blocks_ghash_32_yiifChpfBbxhAhe: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_EnDAnndDABDpwrg cmpl $8,%r10d je .L_last_num_blocks_is_8_EnDAnndDABDpwrg jb .L_last_num_blocks_is_7_1_EnDAnndDABDpwrg cmpl $12,%r10d je .L_last_num_blocks_is_12_EnDAnndDABDpwrg jb .L_last_num_blocks_is_11_9_EnDAnndDABDpwrg cmpl $15,%r10d je .L_last_num_blocks_is_15_EnDAnndDABDpwrg ja .L_last_num_blocks_is_16_EnDAnndDABDpwrg cmpl $14,%r10d je .L_last_num_blocks_is_14_EnDAnndDABDpwrg jmp .L_last_num_blocks_is_13_EnDAnndDABDpwrg .L_last_num_blocks_is_11_9_EnDAnndDABDpwrg: cmpl $10,%r10d je .L_last_num_blocks_is_10_EnDAnndDABDpwrg ja .L_last_num_blocks_is_11_EnDAnndDABDpwrg jmp .L_last_num_blocks_is_9_EnDAnndDABDpwrg .L_last_num_blocks_is_7_1_EnDAnndDABDpwrg: cmpl $4,%r10d je .L_last_num_blocks_is_4_EnDAnndDABDpwrg jb .L_last_num_blocks_is_3_1_EnDAnndDABDpwrg cmpl $6,%r10d ja .L_last_num_blocks_is_7_EnDAnndDABDpwrg je .L_last_num_blocks_is_6_EnDAnndDABDpwrg jmp .L_last_num_blocks_is_5_EnDAnndDABDpwrg .L_last_num_blocks_is_3_1_EnDAnndDABDpwrg: cmpl $2,%r10d ja .L_last_num_blocks_is_3_EnDAnndDABDpwrg je .L_last_num_blocks_is_2_EnDAnndDABDpwrg .L_last_num_blocks_is_1_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_GgCAgFtCzDDmtga vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_GgCAgFtCzDDmtga .L_16_blocks_overflow_GgCAgFtCzDDmtga: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_GgCAgFtCzDDmtga: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_muErgpqjgcDnuvy subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_muErgpqjgcDnuvy .L_small_initial_partial_block_muErgpqjgcDnuvy: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_muErgpqjgcDnuvy .L_small_initial_compute_done_muErgpqjgcDnuvy: .L_after_reduction_muErgpqjgcDnuvy: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_2_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_sGdlxeauwrjkrtA vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_sGdlxeauwrjkrtA .L_16_blocks_overflow_sGdlxeauwrjkrtA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_sGdlxeauwrjkrtA: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mixrqrhnvplnBsa subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mixrqrhnvplnBsa .L_small_initial_partial_block_mixrqrhnvplnBsa: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mixrqrhnvplnBsa: orq %r8,%r8 je .L_after_reduction_mixrqrhnvplnBsa vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_mixrqrhnvplnBsa: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_3_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_whibjFbDFpmwsdg vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_whibjFbDFpmwsdg .L_16_blocks_overflow_whibjFbDFpmwsdg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_whibjFbDFpmwsdg: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lAnoBCFfkdkhBpw subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lAnoBCFfkdkhBpw .L_small_initial_partial_block_lAnoBCFfkdkhBpw: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lAnoBCFfkdkhBpw: orq %r8,%r8 je .L_after_reduction_lAnoBCFfkdkhBpw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lAnoBCFfkdkhBpw: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_4_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_CACaGmtylGFBBes vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_CACaGmtylGFBBes .L_16_blocks_overflow_CACaGmtylGFBBes: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_CACaGmtylGFBBes: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bDpjzbsFvemyBzb subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bDpjzbsFvemyBzb .L_small_initial_partial_block_bDpjzbsFvemyBzb: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bDpjzbsFvemyBzb: orq %r8,%r8 je .L_after_reduction_bDpjzbsFvemyBzb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bDpjzbsFvemyBzb: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_5_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_imFzBFrgiBtDFwx vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_imFzBFrgiBtDFwx .L_16_blocks_overflow_imFzBFrgiBtDFwx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_imFzBFrgiBtDFwx: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vnnCjDqmzbcdpik subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vnnCjDqmzbcdpik .L_small_initial_partial_block_vnnCjDqmzbcdpik: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vnnCjDqmzbcdpik: orq %r8,%r8 je .L_after_reduction_vnnCjDqmzbcdpik vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vnnCjDqmzbcdpik: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_6_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_pAdtiatocvAeptw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_pAdtiatocvAeptw .L_16_blocks_overflow_pAdtiatocvAeptw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_pAdtiatocvAeptw: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gvfhgipCiigqdGj subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gvfhgipCiigqdGj .L_small_initial_partial_block_gvfhgipCiigqdGj: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gvfhgipCiigqdGj: orq %r8,%r8 je .L_after_reduction_gvfhgipCiigqdGj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_gvfhgipCiigqdGj: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_7_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_xxGFqeesBsuBajd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_xxGFqeesBsuBajd .L_16_blocks_overflow_xxGFqeesBsuBajd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_xxGFqeesBsuBajd: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nFyvcbadpdjqnGl subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nFyvcbadpdjqnGl .L_small_initial_partial_block_nFyvcbadpdjqnGl: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nFyvcbadpdjqnGl: orq %r8,%r8 je .L_after_reduction_nFyvcbadpdjqnGl vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nFyvcbadpdjqnGl: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_8_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_qtzDbmlGiqglyFC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_qtzDbmlGiqglyFC .L_16_blocks_overflow_qtzDbmlGiqglyFC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_qtzDbmlGiqglyFC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jhfdGzoqFGvFnBz subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jhfdGzoqFGvFnBz .L_small_initial_partial_block_jhfdGzoqFGvFnBz: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jhfdGzoqFGvFnBz: orq %r8,%r8 je .L_after_reduction_jhfdGzoqFGvFnBz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jhfdGzoqFGvFnBz: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_9_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_wmBlfbGwbkoxgju vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_wmBlfbGwbkoxgju .L_16_blocks_overflow_wmBlfbGwbkoxgju: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_wmBlfbGwbkoxgju: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_baszqDAmduvhiiE subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_baszqDAmduvhiiE .L_small_initial_partial_block_baszqDAmduvhiiE: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_baszqDAmduvhiiE: orq %r8,%r8 je .L_after_reduction_baszqDAmduvhiiE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_baszqDAmduvhiiE: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_10_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_stwxpAgbfshrvAC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_stwxpAgbfshrvAC .L_16_blocks_overflow_stwxpAgbfshrvAC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_stwxpAgbfshrvAC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_exAeuCGujFxiqAh subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_exAeuCGujFxiqAh .L_small_initial_partial_block_exAeuCGujFxiqAh: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_exAeuCGujFxiqAh: orq %r8,%r8 je .L_after_reduction_exAeuCGujFxiqAh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_exAeuCGujFxiqAh: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_11_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_AxBbgslpvfAEaln vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_AxBbgslpvfAEaln .L_16_blocks_overflow_AxBbgslpvfAEaln: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_AxBbgslpvfAEaln: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DbcpAfrkzFcgwwp subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DbcpAfrkzFcgwwp .L_small_initial_partial_block_DbcpAfrkzFcgwwp: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DbcpAfrkzFcgwwp: orq %r8,%r8 je .L_after_reduction_DbcpAfrkzFcgwwp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DbcpAfrkzFcgwwp: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_12_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_smrhssarGEoyasa vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_smrhssarGEoyasa .L_16_blocks_overflow_smrhssarGEoyasa: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_smrhssarGEoyasa: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rouvbBEfwtDrsEg subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rouvbBEfwtDrsEg .L_small_initial_partial_block_rouvbBEfwtDrsEg: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rouvbBEfwtDrsEg: orq %r8,%r8 je .L_after_reduction_rouvbBEfwtDrsEg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rouvbBEfwtDrsEg: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_13_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_DrfxGvBzxdbnqak vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_DrfxGvBzxdbnqak .L_16_blocks_overflow_DrfxGvBzxdbnqak: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_DrfxGvBzxdbnqak: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wcayAkkuiehcgnC subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wcayAkkuiehcgnC .L_small_initial_partial_block_wcayAkkuiehcgnC: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wcayAkkuiehcgnC: orq %r8,%r8 je .L_after_reduction_wcayAkkuiehcgnC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wcayAkkuiehcgnC: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_14_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_kAcyvjjAkbnGGoE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_kAcyvjjAkbnGGoE .L_16_blocks_overflow_kAcyvjjAkbnGGoE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_kAcyvjjAkbnGGoE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lECstFkGozakhDE subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lECstFkGozakhDE .L_small_initial_partial_block_lECstFkGozakhDE: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lECstFkGozakhDE: orq %r8,%r8 je .L_after_reduction_lECstFkGozakhDE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lECstFkGozakhDE: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_15_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_uvsntmjBtmwoAgA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_uvsntmjBtmwoAgA .L_16_blocks_overflow_uvsntmjBtmwoAgA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_uvsntmjBtmwoAgA: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gFfyGkDCahpvfAe subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gFfyGkDCahpvfAe .L_small_initial_partial_block_gFfyGkDCahpvfAe: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gFfyGkDCahpvfAe: orq %r8,%r8 je .L_after_reduction_gFfyGkDCahpvfAe vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_gFfyGkDCahpvfAe: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_16_EnDAnndDABDpwrg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_jwffjzkjrdbGmqd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_jwffjzkjrdbGmqd .L_16_blocks_overflow_jwffjzkjrdbGmqd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_jwffjzkjrdbGmqd: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_ccvdpppmDomgiCD: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ccvdpppmDomgiCD: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ccvdpppmDomgiCD: jmp .L_last_blocks_done_EnDAnndDABDpwrg .L_last_num_blocks_is_0_EnDAnndDABDpwrg: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_EnDAnndDABDpwrg: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_yiifChpfBbxhAhe .L_encrypt_32_blocks_yiifChpfBbxhAhe: cmpb $240,%r15b jae .L_16_blocks_overflow_igclhxhftlBGfml vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_igclhxhftlBGfml .L_16_blocks_overflow_igclhxhftlBGfml: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_igclhxhftlBGfml: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_hgchDvhDwhDhkhj vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_hgchDvhDwhDhkhj .L_16_blocks_overflow_hgchDvhDwhDhkhj: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_hgchDvhDwhDhkhj: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%r11 movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_mzebEnFmrFgqunA cmpl $8,%r10d je .L_last_num_blocks_is_8_mzebEnFmrFgqunA jb .L_last_num_blocks_is_7_1_mzebEnFmrFgqunA cmpl $12,%r10d je .L_last_num_blocks_is_12_mzebEnFmrFgqunA jb .L_last_num_blocks_is_11_9_mzebEnFmrFgqunA cmpl $15,%r10d je .L_last_num_blocks_is_15_mzebEnFmrFgqunA ja .L_last_num_blocks_is_16_mzebEnFmrFgqunA cmpl $14,%r10d je .L_last_num_blocks_is_14_mzebEnFmrFgqunA jmp .L_last_num_blocks_is_13_mzebEnFmrFgqunA .L_last_num_blocks_is_11_9_mzebEnFmrFgqunA: cmpl $10,%r10d je .L_last_num_blocks_is_10_mzebEnFmrFgqunA ja .L_last_num_blocks_is_11_mzebEnFmrFgqunA jmp .L_last_num_blocks_is_9_mzebEnFmrFgqunA .L_last_num_blocks_is_7_1_mzebEnFmrFgqunA: cmpl $4,%r10d je .L_last_num_blocks_is_4_mzebEnFmrFgqunA jb .L_last_num_blocks_is_3_1_mzebEnFmrFgqunA cmpl $6,%r10d ja .L_last_num_blocks_is_7_mzebEnFmrFgqunA je .L_last_num_blocks_is_6_mzebEnFmrFgqunA jmp .L_last_num_blocks_is_5_mzebEnFmrFgqunA .L_last_num_blocks_is_3_1_mzebEnFmrFgqunA: cmpl $2,%r10d ja .L_last_num_blocks_is_3_mzebEnFmrFgqunA je .L_last_num_blocks_is_2_mzebEnFmrFgqunA .L_last_num_blocks_is_1_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_nGCoqEFBGnmxbxd vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_nGCoqEFBGnmxbxd .L_16_blocks_overflow_nGCoqEFBGnmxbxd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_nGCoqEFBGnmxbxd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_pteDFgEDjspDekt subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pteDFgEDjspDekt .L_small_initial_partial_block_pteDFgEDjspDekt: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_pteDFgEDjspDekt .L_small_initial_compute_done_pteDFgEDjspDekt: .L_after_reduction_pteDFgEDjspDekt: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_2_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_BnoeeeAuxpuGrCd vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_BnoeeeAuxpuGrCd .L_16_blocks_overflow_BnoeeeAuxpuGrCd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_BnoeeeAuxpuGrCd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pGCaGvdapDriFwq subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pGCaGvdapDriFwq .L_small_initial_partial_block_pGCaGvdapDriFwq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pGCaGvdapDriFwq: orq %r8,%r8 je .L_after_reduction_pGCaGvdapDriFwq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pGCaGvdapDriFwq: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_3_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_rpvBmmdleounkfg vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_rpvBmmdleounkfg .L_16_blocks_overflow_rpvBmmdleounkfg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_rpvBmmdleounkfg: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EDfFbxCoAeBbBmG subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EDfFbxCoAeBbBmG .L_small_initial_partial_block_EDfFbxCoAeBbBmG: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EDfFbxCoAeBbBmG: orq %r8,%r8 je .L_after_reduction_EDfFbxCoAeBbBmG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EDfFbxCoAeBbBmG: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_4_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_eejufxFfpkhainn vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_eejufxFfpkhainn .L_16_blocks_overflow_eejufxFfpkhainn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_eejufxFfpkhainn: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rtqFkraGudeyaFm subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rtqFkraGudeyaFm .L_small_initial_partial_block_rtqFkraGudeyaFm: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rtqFkraGudeyaFm: orq %r8,%r8 je .L_after_reduction_rtqFkraGudeyaFm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rtqFkraGudeyaFm: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_5_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_bgofyFpgEnsntBw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_bgofyFpgEnsntBw .L_16_blocks_overflow_bgofyFpgEnsntBw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_bgofyFpgEnsntBw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_uCfkbGGrphGcGba subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_uCfkbGGrphGcGba .L_small_initial_partial_block_uCfkbGGrphGcGba: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uCfkbGGrphGcGba: orq %r8,%r8 je .L_after_reduction_uCfkbGGrphGcGba vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_uCfkbGGrphGcGba: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_6_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_GvptlszrGgmFuve vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_GvptlszrGgmFuve .L_16_blocks_overflow_GvptlszrGgmFuve: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_GvptlszrGgmFuve: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_oFAlvAhpbuuoctp subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_oFAlvAhpbuuoctp .L_small_initial_partial_block_oFAlvAhpbuuoctp: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_oFAlvAhpbuuoctp: orq %r8,%r8 je .L_after_reduction_oFAlvAhpbuuoctp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_oFAlvAhpbuuoctp: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_7_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_DxbjcygrgxudEjb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_DxbjcygrgxudEjb .L_16_blocks_overflow_DxbjcygrgxudEjb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_DxbjcygrgxudEjb: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xFeGbEcEyBujjsd subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xFeGbEcEyBujjsd .L_small_initial_partial_block_xFeGbEcEyBujjsd: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xFeGbEcEyBujjsd: orq %r8,%r8 je .L_after_reduction_xFeGbEcEyBujjsd vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xFeGbEcEyBujjsd: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_8_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_njjFmdkzFAzEDDa vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_njjFmdkzFAzEDDa .L_16_blocks_overflow_njjFmdkzFAzEDDa: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_njjFmdkzFAzEDDa: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ozrwtEFqpzbbFif subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ozrwtEFqpzbbFif .L_small_initial_partial_block_ozrwtEFqpzbbFif: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ozrwtEFqpzbbFif: orq %r8,%r8 je .L_after_reduction_ozrwtEFqpzbbFif vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ozrwtEFqpzbbFif: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_9_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_tzqaclAtnqeEABy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_tzqaclAtnqeEABy .L_16_blocks_overflow_tzqaclAtnqeEABy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_tzqaclAtnqeEABy: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_akxrmDCvAwmtoBq subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_akxrmDCvAwmtoBq .L_small_initial_partial_block_akxrmDCvAwmtoBq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_akxrmDCvAwmtoBq: orq %r8,%r8 je .L_after_reduction_akxrmDCvAwmtoBq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_akxrmDCvAwmtoBq: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_10_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_mdrttBDhusakuks vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_mdrttBDhusakuks .L_16_blocks_overflow_mdrttBDhusakuks: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_mdrttBDhusakuks: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_iAgGclofsEyxAFd subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_iAgGclofsEyxAFd .L_small_initial_partial_block_iAgGclofsEyxAFd: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_iAgGclofsEyxAFd: orq %r8,%r8 je .L_after_reduction_iAgGclofsEyxAFd vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_iAgGclofsEyxAFd: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_11_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_dngFDcgnxjanBrr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_dngFDcgnxjanBrr .L_16_blocks_overflow_dngFDcgnxjanBrr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_dngFDcgnxjanBrr: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_okvBnGbFccGxioi subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_okvBnGbFccGxioi .L_small_initial_partial_block_okvBnGbFccGxioi: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_okvBnGbFccGxioi: orq %r8,%r8 je .L_after_reduction_okvBnGbFccGxioi vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_okvBnGbFccGxioi: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_12_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_aubdtmlCEjgrkqC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_aubdtmlCEjgrkqC .L_16_blocks_overflow_aubdtmlCEjgrkqC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_aubdtmlCEjgrkqC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_fAvjEssplkpFDzu subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fAvjEssplkpFDzu .L_small_initial_partial_block_fAvjEssplkpFDzu: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fAvjEssplkpFDzu: orq %r8,%r8 je .L_after_reduction_fAvjEssplkpFDzu vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_fAvjEssplkpFDzu: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_13_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_tgGfmxsfvvfjlut vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_tgGfmxsfvvfjlut .L_16_blocks_overflow_tgGfmxsfvvfjlut: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_tgGfmxsfvvfjlut: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dGgFeCerpjagCtb subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dGgFeCerpjagCtb .L_small_initial_partial_block_dGgFeCerpjagCtb: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dGgFeCerpjagCtb: orq %r8,%r8 je .L_after_reduction_dGgFeCerpjagCtb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dGgFeCerpjagCtb: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_14_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_GjeuEqvcyhCdAlB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_GjeuEqvcyhCdAlB .L_16_blocks_overflow_GjeuEqvcyhCdAlB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_GjeuEqvcyhCdAlB: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_CbnaspueplphnCn subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CbnaspueplphnCn .L_small_initial_partial_block_CbnaspueplphnCn: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CbnaspueplphnCn: orq %r8,%r8 je .L_after_reduction_CbnaspueplphnCn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CbnaspueplphnCn: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_15_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_vduCxcjofxGqAou vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_vduCxcjofxGqAou .L_16_blocks_overflow_vduCxcjofxGqAou: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_vduCxcjofxGqAou: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xdoEhGjsfscahrp subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xdoEhGjsfscahrp .L_small_initial_partial_block_xdoEhGjsfscahrp: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xdoEhGjsfscahrp: orq %r8,%r8 je .L_after_reduction_xdoEhGjsfscahrp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xdoEhGjsfscahrp: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_16_mzebEnFmrFgqunA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_skEyjqiskGfxdvC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_skEyjqiskGfxdvC .L_16_blocks_overflow_skEyjqiskGfxdvC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_skEyjqiskGfxdvC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_dxixdfuDqivveAt: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dxixdfuDqivveAt: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dxixdfuDqivveAt: jmp .L_last_blocks_done_mzebEnFmrFgqunA .L_last_num_blocks_is_0_mzebEnFmrFgqunA: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_mzebEnFmrFgqunA: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_yiifChpfBbxhAhe .L_encrypt_16_blocks_yiifChpfBbxhAhe: cmpb $240,%r15b jae .L_16_blocks_overflow_lGoEsFGcBhBnEgo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_lGoEsFGcBhBnEgo .L_16_blocks_overflow_lGoEsFGcBhBnEgo: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_lGoEsFGcBhBnEgo: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%r11 movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_GGlifssooGvFomC cmpl $8,%r10d je .L_last_num_blocks_is_8_GGlifssooGvFomC jb .L_last_num_blocks_is_7_1_GGlifssooGvFomC cmpl $12,%r10d je .L_last_num_blocks_is_12_GGlifssooGvFomC jb .L_last_num_blocks_is_11_9_GGlifssooGvFomC cmpl $15,%r10d je .L_last_num_blocks_is_15_GGlifssooGvFomC ja .L_last_num_blocks_is_16_GGlifssooGvFomC cmpl $14,%r10d je .L_last_num_blocks_is_14_GGlifssooGvFomC jmp .L_last_num_blocks_is_13_GGlifssooGvFomC .L_last_num_blocks_is_11_9_GGlifssooGvFomC: cmpl $10,%r10d je .L_last_num_blocks_is_10_GGlifssooGvFomC ja .L_last_num_blocks_is_11_GGlifssooGvFomC jmp .L_last_num_blocks_is_9_GGlifssooGvFomC .L_last_num_blocks_is_7_1_GGlifssooGvFomC: cmpl $4,%r10d je .L_last_num_blocks_is_4_GGlifssooGvFomC jb .L_last_num_blocks_is_3_1_GGlifssooGvFomC cmpl $6,%r10d ja .L_last_num_blocks_is_7_GGlifssooGvFomC je .L_last_num_blocks_is_6_GGlifssooGvFomC jmp .L_last_num_blocks_is_5_GGlifssooGvFomC .L_last_num_blocks_is_3_1_GGlifssooGvFomC: cmpl $2,%r10d ja .L_last_num_blocks_is_3_GGlifssooGvFomC je .L_last_num_blocks_is_2_GGlifssooGvFomC .L_last_num_blocks_is_1_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_DFdkfCEpyEuzGts vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_DFdkfCEpyEuzGts .L_16_blocks_overflow_DFdkfCEpyEuzGts: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_DFdkfCEpyEuzGts: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %xmm31,%xmm0,%xmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_znzDmxCrzeqhmtt subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_znzDmxCrzeqhmtt .L_small_initial_partial_block_znzDmxCrzeqhmtt: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_znzDmxCrzeqhmtt .L_small_initial_compute_done_znzDmxCrzeqhmtt: .L_after_reduction_znzDmxCrzeqhmtt: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_2_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_fxAkfvCdnqqGArm vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_fxAkfvCdnqqGArm .L_16_blocks_overflow_fxAkfvCdnqqGArm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_fxAkfvCdnqqGArm: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %ymm31,%ymm0,%ymm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kgAaABygmxmrDhD subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kgAaABygmxmrDhD .L_small_initial_partial_block_kgAaABygmxmrDhD: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kgAaABygmxmrDhD: orq %r8,%r8 je .L_after_reduction_kgAaABygmxmrDhD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_kgAaABygmxmrDhD: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_3_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_DnqopufcDlfooBF vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_DnqopufcDlfooBF .L_16_blocks_overflow_DnqopufcDlfooBF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_DnqopufcDlfooBF: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qgbxmvAdpcwjFGD subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qgbxmvAdpcwjFGD .L_small_initial_partial_block_qgbxmvAdpcwjFGD: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qgbxmvAdpcwjFGD: orq %r8,%r8 je .L_after_reduction_qgbxmvAdpcwjFGD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qgbxmvAdpcwjFGD: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_4_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_zzorvqhpvdBckcq vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_zzorvqhpvdBckcq .L_16_blocks_overflow_zzorvqhpvdBckcq: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_zzorvqhpvdBckcq: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_giCxqwgmxrChxdc subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_giCxqwgmxrChxdc .L_small_initial_partial_block_giCxqwgmxrChxdc: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_giCxqwgmxrChxdc: orq %r8,%r8 je .L_after_reduction_giCxqwgmxrChxdc vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_giCxqwgmxrChxdc: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_5_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_qzjnvgqjjxsfmEr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_qzjnvgqjjxsfmEr .L_16_blocks_overflow_qzjnvgqjjxsfmEr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_qzjnvgqjjxsfmEr: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xoEftvygjvpovck subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xoEftvygjvpovck .L_small_initial_partial_block_xoEftvygjvpovck: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xoEftvygjvpovck: orq %r8,%r8 je .L_after_reduction_xoEftvygjvpovck vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xoEftvygjvpovck: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_6_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_mvFwizCezuedAbr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_mvFwizCezuedAbr .L_16_blocks_overflow_mvFwizCezuedAbr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_mvFwizCezuedAbr: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FDuhyDmhetmzsvq subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FDuhyDmhetmzsvq .L_small_initial_partial_block_FDuhyDmhetmzsvq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FDuhyDmhetmzsvq: orq %r8,%r8 je .L_after_reduction_FDuhyDmhetmzsvq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FDuhyDmhetmzsvq: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_7_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_owtBaGpzgzgcxrC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_owtBaGpzgzgcxrC .L_16_blocks_overflow_owtBaGpzgzgcxrC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_owtBaGpzgzgcxrC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DncaxytjCyxiknt subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DncaxytjCyxiknt .L_small_initial_partial_block_DncaxytjCyxiknt: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DncaxytjCyxiknt: orq %r8,%r8 je .L_after_reduction_DncaxytjCyxiknt vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DncaxytjCyxiknt: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_8_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_dAhdphrDhhiFfvd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_dAhdphrDhhiFfvd .L_16_blocks_overflow_dAhdphrDhhiFfvd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_dAhdphrDhhiFfvd: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_CnEvizjBlzFFnif subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CnEvizjBlzFFnif .L_small_initial_partial_block_CnEvizjBlzFFnif: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CnEvizjBlzFFnif: orq %r8,%r8 je .L_after_reduction_CnEvizjBlzFFnif vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CnEvizjBlzFFnif: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_9_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_eaicByEvunpebxo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_eaicByEvunpebxo .L_16_blocks_overflow_eaicByEvunpebxo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_eaicByEvunpebxo: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gfgCplcDGBrovbz subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gfgCplcDGBrovbz .L_small_initial_partial_block_gfgCplcDGBrovbz: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gfgCplcDGBrovbz: orq %r8,%r8 je .L_after_reduction_gfgCplcDGBrovbz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_gfgCplcDGBrovbz: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_10_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_bfFejorcehrytqq vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_bfFejorcehrytqq .L_16_blocks_overflow_bfFejorcehrytqq: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_bfFejorcehrytqq: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ebiAndfrelejgeD subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ebiAndfrelejgeD .L_small_initial_partial_block_ebiAndfrelejgeD: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ebiAndfrelejgeD: orq %r8,%r8 je .L_after_reduction_ebiAndfrelejgeD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ebiAndfrelejgeD: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_11_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_nsakvpcBnizduGq vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_nsakvpcBnizduGq .L_16_blocks_overflow_nsakvpcBnizduGq: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_nsakvpcBnizduGq: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FeAoudrbheqBGiy subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FeAoudrbheqBGiy .L_small_initial_partial_block_FeAoudrbheqBGiy: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FeAoudrbheqBGiy: orq %r8,%r8 je .L_after_reduction_FeAoudrbheqBGiy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FeAoudrbheqBGiy: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_12_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_bwFzciofFgjcilw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_bwFzciofFgjcilw .L_16_blocks_overflow_bwFzciofFgjcilw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_bwFzciofFgjcilw: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_cfkroClFdpzvhum subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_cfkroClFdpzvhum .L_small_initial_partial_block_cfkroClFdpzvhum: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cfkroClFdpzvhum: orq %r8,%r8 je .L_after_reduction_cfkroClFdpzvhum vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_cfkroClFdpzvhum: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_13_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_wabAfqhkitemmDb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_wabAfqhkitemmDb .L_16_blocks_overflow_wabAfqhkitemmDb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_wabAfqhkitemmDb: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_sdmohCiFjxvtkha subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sdmohCiFjxvtkha .L_small_initial_partial_block_sdmohCiFjxvtkha: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_sdmohCiFjxvtkha: orq %r8,%r8 je .L_after_reduction_sdmohCiFjxvtkha vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_sdmohCiFjxvtkha: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_14_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_xpqoqezlFcomfjA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_xpqoqezlFcomfjA .L_16_blocks_overflow_xpqoqezlFcomfjA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_xpqoqezlFcomfjA: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_fexjdoDflollEzw subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fexjdoDflollEzw .L_small_initial_partial_block_fexjdoDflollEzw: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fexjdoDflollEzw: orq %r8,%r8 je .L_after_reduction_fexjdoDflollEzw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_fexjdoDflollEzw: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_15_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_iupvxgCFjryaArw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_iupvxgCFjryaArw .L_16_blocks_overflow_iupvxgCFjryaArw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_iupvxgCFjryaArw: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lxborjzgtwFghrg subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lxborjzgtwFghrg .L_small_initial_partial_block_lxborjzgtwFghrg: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lxborjzgtwFghrg: orq %r8,%r8 je .L_after_reduction_lxborjzgtwFghrg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lxborjzgtwFghrg: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_16_GGlifssooGvFomC: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_moDvkAftCFCxmvo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_moDvkAftCFCxmvo .L_16_blocks_overflow_moDvkAftCFCxmvo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_moDvkAftCFCxmvo: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_xrrskpkhizncrkw: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xrrskpkhizncrkw: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xrrskpkhizncrkw: jmp .L_last_blocks_done_GGlifssooGvFomC .L_last_num_blocks_is_0_GGlifssooGvFomC: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_GGlifssooGvFomC: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_yiifChpfBbxhAhe .L_message_below_32_blocks_yiifChpfBbxhAhe: subq $256,%r8 addq $256,%r11 movl %r8d,%r10d testq %r14,%r14 jnz .L_skip_hkeys_precomputation_ixpbnbdqqmnximo vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_ixpbnbdqqmnximo: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_GEDbrBwahgCtBua cmpl $8,%r10d je .L_last_num_blocks_is_8_GEDbrBwahgCtBua jb .L_last_num_blocks_is_7_1_GEDbrBwahgCtBua cmpl $12,%r10d je .L_last_num_blocks_is_12_GEDbrBwahgCtBua jb .L_last_num_blocks_is_11_9_GEDbrBwahgCtBua cmpl $15,%r10d je .L_last_num_blocks_is_15_GEDbrBwahgCtBua ja .L_last_num_blocks_is_16_GEDbrBwahgCtBua cmpl $14,%r10d je .L_last_num_blocks_is_14_GEDbrBwahgCtBua jmp .L_last_num_blocks_is_13_GEDbrBwahgCtBua .L_last_num_blocks_is_11_9_GEDbrBwahgCtBua: cmpl $10,%r10d je .L_last_num_blocks_is_10_GEDbrBwahgCtBua ja .L_last_num_blocks_is_11_GEDbrBwahgCtBua jmp .L_last_num_blocks_is_9_GEDbrBwahgCtBua .L_last_num_blocks_is_7_1_GEDbrBwahgCtBua: cmpl $4,%r10d je .L_last_num_blocks_is_4_GEDbrBwahgCtBua jb .L_last_num_blocks_is_3_1_GEDbrBwahgCtBua cmpl $6,%r10d ja .L_last_num_blocks_is_7_GEDbrBwahgCtBua je .L_last_num_blocks_is_6_GEDbrBwahgCtBua jmp .L_last_num_blocks_is_5_GEDbrBwahgCtBua .L_last_num_blocks_is_3_1_GEDbrBwahgCtBua: cmpl $2,%r10d ja .L_last_num_blocks_is_3_GEDbrBwahgCtBua je .L_last_num_blocks_is_2_GEDbrBwahgCtBua .L_last_num_blocks_is_1_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_uopvqADFnvomDpc vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_uopvqADFnvomDpc .L_16_blocks_overflow_uopvqADFnvomDpc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_uopvqADFnvomDpc: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_DnfzexoyiBDakur subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DnfzexoyiBDakur .L_small_initial_partial_block_DnfzexoyiBDakur: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_DnfzexoyiBDakur .L_small_initial_compute_done_DnfzexoyiBDakur: .L_after_reduction_DnfzexoyiBDakur: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_2_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_frftcwjeGlwitcu vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_frftcwjeGlwitcu .L_16_blocks_overflow_frftcwjeGlwitcu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_frftcwjeGlwitcu: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ebldtywbExmpuki subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ebldtywbExmpuki .L_small_initial_partial_block_ebldtywbExmpuki: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ebldtywbExmpuki: orq %r8,%r8 je .L_after_reduction_ebldtywbExmpuki vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ebldtywbExmpuki: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_3_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_hAiudycBxwjzccs vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_hAiudycBxwjzccs .L_16_blocks_overflow_hAiudycBxwjzccs: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_hAiudycBxwjzccs: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gkjuFBcoGtpvwjC subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gkjuFBcoGtpvwjC .L_small_initial_partial_block_gkjuFBcoGtpvwjC: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gkjuFBcoGtpvwjC: orq %r8,%r8 je .L_after_reduction_gkjuFBcoGtpvwjC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_gkjuFBcoGtpvwjC: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_4_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_oahqGxwjdGuFmgl vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_oahqGxwjdGuFmgl .L_16_blocks_overflow_oahqGxwjdGuFmgl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_oahqGxwjdGuFmgl: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_eiywasarDnqsmGr subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_eiywasarDnqsmGr .L_small_initial_partial_block_eiywasarDnqsmGr: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_eiywasarDnqsmGr: orq %r8,%r8 je .L_after_reduction_eiywasarDnqsmGr vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_eiywasarDnqsmGr: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_5_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_hnCCvmCdnDGyqwm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_hnCCvmCdnDGyqwm .L_16_blocks_overflow_hnCCvmCdnDGyqwm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_hnCCvmCdnDGyqwm: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ClsDvmjDyaivejA subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ClsDvmjDyaivejA .L_small_initial_partial_block_ClsDvmjDyaivejA: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ClsDvmjDyaivejA: orq %r8,%r8 je .L_after_reduction_ClsDvmjDyaivejA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ClsDvmjDyaivejA: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_6_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_wuftgpncuosGzzy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_wuftgpncuosGzzy .L_16_blocks_overflow_wuftgpncuosGzzy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_wuftgpncuosGzzy: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zFcpqFaCfaxEfGi subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zFcpqFaCfaxEfGi .L_small_initial_partial_block_zFcpqFaCfaxEfGi: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zFcpqFaCfaxEfGi: orq %r8,%r8 je .L_after_reduction_zFcpqFaCfaxEfGi vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zFcpqFaCfaxEfGi: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_7_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_klwFEoGBGuBizdw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_klwFEoGBGuBizdw .L_16_blocks_overflow_klwFEoGBGuBizdw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_klwFEoGBGuBizdw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xbzdhFqEauEAyBq subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xbzdhFqEauEAyBq .L_small_initial_partial_block_xbzdhFqEauEAyBq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xbzdhFqEauEAyBq: orq %r8,%r8 je .L_after_reduction_xbzdhFqEauEAyBq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xbzdhFqEauEAyBq: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_8_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_jAucrepCBmxevpC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_jAucrepCBmxevpC .L_16_blocks_overflow_jAucrepCBmxevpC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_jAucrepCBmxevpC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xBnzffrFrcfhxcA subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xBnzffrFrcfhxcA .L_small_initial_partial_block_xBnzffrFrcfhxcA: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xBnzffrFrcfhxcA: orq %r8,%r8 je .L_after_reduction_xBnzffrFrcfhxcA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xBnzffrFrcfhxcA: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_9_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_lnAxGywxkpnspqj vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_lnAxGywxkpnspqj .L_16_blocks_overflow_lnAxGywxkpnspqj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_lnAxGywxkpnspqj: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AFvqyugwjoGBwEa subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AFvqyugwjoGBwEa .L_small_initial_partial_block_AFvqyugwjoGBwEa: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AFvqyugwjoGBwEa: orq %r8,%r8 je .L_after_reduction_AFvqyugwjoGBwEa vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AFvqyugwjoGBwEa: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_10_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_ffDgumCtogFyFDv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_ffDgumCtogFyFDv .L_16_blocks_overflow_ffDgumCtogFyFDv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_ffDgumCtogFyFDv: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_erArFgBvhusaEfz subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_erArFgBvhusaEfz .L_small_initial_partial_block_erArFgBvhusaEfz: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_erArFgBvhusaEfz: orq %r8,%r8 je .L_after_reduction_erArFgBvhusaEfz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_erArFgBvhusaEfz: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_11_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_bFwwBhxumkFGgCj vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_bFwwBhxumkFGgCj .L_16_blocks_overflow_bFwwBhxumkFGgCj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_bFwwBhxumkFGgCj: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GsrdkhxzEjDjspu subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GsrdkhxzEjDjspu .L_small_initial_partial_block_GsrdkhxzEjDjspu: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GsrdkhxzEjDjspu: orq %r8,%r8 je .L_after_reduction_GsrdkhxzEjDjspu vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_GsrdkhxzEjDjspu: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_12_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_EhylpkcoptuvDCF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_EhylpkcoptuvDCF .L_16_blocks_overflow_EhylpkcoptuvDCF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_EhylpkcoptuvDCF: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rxjldaleyvljAtn subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rxjldaleyvljAtn .L_small_initial_partial_block_rxjldaleyvljAtn: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rxjldaleyvljAtn: orq %r8,%r8 je .L_after_reduction_rxjldaleyvljAtn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rxjldaleyvljAtn: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_13_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_fbDDAjuqhDzbgcz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_fbDDAjuqhDzbgcz .L_16_blocks_overflow_fbDDAjuqhDzbgcz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_fbDDAjuqhDzbgcz: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rvBgbcAEiGvppxE subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rvBgbcAEiGvppxE .L_small_initial_partial_block_rvBgbcAEiGvppxE: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rvBgbcAEiGvppxE: orq %r8,%r8 je .L_after_reduction_rvBgbcAEiGvppxE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rvBgbcAEiGvppxE: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_14_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_gqnBxnvCCiecpBb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_gqnBxnvCCiecpBb .L_16_blocks_overflow_gqnBxnvCCiecpBb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_gqnBxnvCCiecpBb: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_eqvhEpqoCboGBGs subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_eqvhEpqoCboGBGs .L_small_initial_partial_block_eqvhEpqoCboGBGs: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_eqvhEpqoCboGBGs: orq %r8,%r8 je .L_after_reduction_eqvhEpqoCboGBGs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_eqvhEpqoCboGBGs: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_15_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_dnxqlgAbmkEzAAl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_dnxqlgAbmkEzAAl .L_16_blocks_overflow_dnxqlgAbmkEzAAl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_dnxqlgAbmkEzAAl: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vubecvzrvvmvkjn subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vubecvzrvvmvkjn .L_small_initial_partial_block_vubecvzrvvmvkjn: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vubecvzrvvmvkjn: orq %r8,%r8 je .L_after_reduction_vubecvzrvvmvkjn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vubecvzrvvmvkjn: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_16_GEDbrBwahgCtBua: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_CvkndtfiFrebkyC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_CvkndtfiFrebkyC .L_16_blocks_overflow_CvkndtfiFrebkyC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_CvkndtfiFrebkyC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_lvDgrdjdyCeaixF: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lvDgrdjdyCeaixF: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lvDgrdjdyCeaixF: jmp .L_last_blocks_done_GEDbrBwahgCtBua .L_last_num_blocks_is_0_GEDbrBwahgCtBua: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_GEDbrBwahgCtBua: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_yiifChpfBbxhAhe .L_message_below_equal_16_blocks_yiifChpfBbxhAhe: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_mplqBbEupjaGmpE jl .L_small_initial_num_blocks_is_7_1_mplqBbEupjaGmpE cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_mplqBbEupjaGmpE jl .L_small_initial_num_blocks_is_11_9_mplqBbEupjaGmpE cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_mplqBbEupjaGmpE cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_mplqBbEupjaGmpE cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_mplqBbEupjaGmpE jmp .L_small_initial_num_blocks_is_13_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_11_9_mplqBbEupjaGmpE: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_mplqBbEupjaGmpE cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_mplqBbEupjaGmpE jmp .L_small_initial_num_blocks_is_9_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_7_1_mplqBbEupjaGmpE: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_mplqBbEupjaGmpE jl .L_small_initial_num_blocks_is_3_1_mplqBbEupjaGmpE cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_mplqBbEupjaGmpE cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_mplqBbEupjaGmpE jmp .L_small_initial_num_blocks_is_5_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_3_1_mplqBbEupjaGmpE: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_mplqBbEupjaGmpE cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_1_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONE(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %xmm15,%xmm0,%xmm0 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm6,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 cmpq $16,%r8 jl .L_small_initial_partial_block_nsFdAskshxaeupv subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nsFdAskshxaeupv .L_small_initial_partial_block_nsFdAskshxaeupv: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_nsFdAskshxaeupv .L_small_initial_compute_done_nsFdAskshxaeupv: .L_after_reduction_nsFdAskshxaeupv: jmp .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_2_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %ymm15,%ymm0,%ymm0 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm6,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_fCBepgtpwtinebu subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fCBepgtpwtinebu .L_small_initial_partial_block_fCBepgtpwtinebu: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fCBepgtpwtinebu: orq %r8,%r8 je .L_after_reduction_fCBepgtpwtinebu vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_fCBepgtpwtinebu: jmp .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_3_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ofgdrgACzgoBoBr subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ofgdrgACzgoBoBr .L_small_initial_partial_block_ofgdrgACzgoBoBr: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ofgdrgACzgoBoBr: orq %r8,%r8 je .L_after_reduction_ofgdrgACzgoBoBr vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_ofgdrgACzgoBoBr: jmp .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_4_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dEtigFagnjrsGpg subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dEtigFagnjrsGpg .L_small_initial_partial_block_dEtigFagnjrsGpg: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dEtigFagnjrsGpg: orq %r8,%r8 je .L_after_reduction_dEtigFagnjrsGpg vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_dEtigFagnjrsGpg: jmp .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_5_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %xmm15,%xmm3,%xmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %xmm29,%xmm7,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dCteGnCoiDfemGr subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dCteGnCoiDfemGr .L_small_initial_partial_block_dCteGnCoiDfemGr: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dCteGnCoiDfemGr: orq %r8,%r8 je .L_after_reduction_dCteGnCoiDfemGr vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_dCteGnCoiDfemGr: jmp .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_6_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %ymm15,%ymm3,%ymm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %ymm29,%ymm7,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bGkgeCcdmBAvnkd subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bGkgeCcdmBAvnkd .L_small_initial_partial_block_bGkgeCcdmBAvnkd: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bGkgeCcdmBAvnkd: orq %r8,%r8 je .L_after_reduction_bGkgeCcdmBAvnkd vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_bGkgeCcdmBAvnkd: jmp .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_7_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yFpypBfpEqGmDpc subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yFpypBfpEqGmDpc .L_small_initial_partial_block_yFpypBfpEqGmDpc: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yFpypBfpEqGmDpc: orq %r8,%r8 je .L_after_reduction_yFpypBfpEqGmDpc vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_yFpypBfpEqGmDpc: jmp .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_8_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hjijhggGtBGkmFD subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hjijhggGtBGkmFD .L_small_initial_partial_block_hjijhggGtBGkmFD: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hjijhggGtBGkmFD: orq %r8,%r8 je .L_after_reduction_hjijhggGtBGkmFD vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_hjijhggGtBGkmFD: jmp .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_9_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %xmm15,%xmm4,%xmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %xmm29,%xmm10,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rEnEygbAhbwkuDv subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rEnEygbAhbwkuDv .L_small_initial_partial_block_rEnEygbAhbwkuDv: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rEnEygbAhbwkuDv: orq %r8,%r8 je .L_after_reduction_rEnEygbAhbwkuDv vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_rEnEygbAhbwkuDv: jmp .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_10_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %ymm15,%ymm4,%ymm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %ymm29,%ymm10,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ycofttvCgGxDvfA subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ycofttvCgGxDvfA .L_small_initial_partial_block_ycofttvCgGxDvfA: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ycofttvCgGxDvfA: orq %r8,%r8 je .L_after_reduction_ycofttvCgGxDvfA vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_ycofttvCgGxDvfA: jmp .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_11_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ltkvxnnCtyaDcot subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ltkvxnnCtyaDcot .L_small_initial_partial_block_ltkvxnnCtyaDcot: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ltkvxnnCtyaDcot: orq %r8,%r8 je .L_after_reduction_ltkvxnnCtyaDcot vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_ltkvxnnCtyaDcot: jmp .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_12_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zoBxutsDfgEkfdl subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zoBxutsDfgEkfdl .L_small_initial_partial_block_zoBxutsDfgEkfdl: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zoBxutsDfgEkfdl: orq %r8,%r8 je .L_after_reduction_zoBxutsDfgEkfdl vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_zoBxutsDfgEkfdl: jmp .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_13_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %xmm15,%xmm5,%xmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %xmm29,%xmm11,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_fgsEocrdhfxmzmp subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fgsEocrdhfxmzmp .L_small_initial_partial_block_fgsEocrdhfxmzmp: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fgsEocrdhfxmzmp: orq %r8,%r8 je .L_after_reduction_fgsEocrdhfxmzmp vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_fgsEocrdhfxmzmp: jmp .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_14_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %ymm15,%ymm5,%ymm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %ymm29,%ymm11,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_aBllprqbyydDmyj subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_aBllprqbyydDmyj .L_small_initial_partial_block_aBllprqbyydDmyj: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_aBllprqbyydDmyj: orq %r8,%r8 je .L_after_reduction_aBllprqbyydDmyj vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_aBllprqbyydDmyj: jmp .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_15_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AexewybgiAbCusw subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AexewybgiAbCusw .L_small_initial_partial_block_AexewybgiAbCusw: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AexewybgiAbCusw: orq %r8,%r8 je .L_after_reduction_AexewybgiAbCusw vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_AexewybgiAbCusw: jmp .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE .L_small_initial_num_blocks_is_16_mplqBbEupjaGmpE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_wjciopnfEgwwghE: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wjciopnfEgwwghE: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_wjciopnfEgwwghE: .L_small_initial_blocks_encrypted_mplqBbEupjaGmpE: .L_ghash_done_yiifChpfBbxhAhe: vmovdqu64 %xmm2,0(%rsi) vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_done_yiifChpfBbxhAhe: jmp .Lexit_gcm_decrypt .align 32 .Laes_gcm_decrypt_256_avx512: orq %r8,%r8 je .L_enc_dec_done_kgypzeldFqsBnqw xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 movq (%rdx),%r11 orq %r11,%r11 je .L_partial_block_done_nggFpEjksmvdyrl movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 vmovdqu64 336(%rsi),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %r11,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vmovdqa64 %xmm0,%xmm6 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%r11,1),%r13 subq $16,%r13 jge .L_no_extra_mask_nggFpEjksmvdyrl subq %r13,%r12 .L_no_extra_mask_nggFpEjksmvdyrl: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpand %xmm0,%xmm6,%xmm6 vpshufb SHUF_MASK(%rip),%xmm6,%xmm6 vpshufb %xmm5,%xmm6,%xmm6 vpxorq %xmm6,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_nggFpEjksmvdyrl vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 vpsrldq $4,%xmm10,%xmm10 vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movq $0,(%rdx) movq %r11,%r12 movq $16,%r11 subq %r12,%r11 jmp .L_enc_dec_done_nggFpEjksmvdyrl .L_partial_incomplete_nggFpEjksmvdyrl: addq %r8,(%rdx) movq %r8,%r11 .L_enc_dec_done_nggFpEjksmvdyrl: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%r11,2),%k1 vmovdqu64 %xmm14,64(%rsi) movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_nggFpEjksmvdyrl: vmovdqu64 0(%rsi),%xmm2 subq %r11,%r8 je .L_enc_dec_done_kgypzeldFqsBnqw cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_kgypzeldFqsBnqw vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_tAigrohrtcimtjt vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_tAigrohrtcimtjt .L_next_16_overflow_tAigrohrtcimtjt: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_tAigrohrtcimtjt: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%r11,1),%zmm0 vmovdqu8 64(%rcx,%r11,1),%zmm3 vmovdqu8 128(%rcx,%r11,1),%zmm4 vmovdqu8 192(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 208(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 224(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%r11,1) vmovdqu8 %zmm10,64(%r10,%r11,1) vmovdqu8 %zmm11,128(%r10,%r11,1) vmovdqu8 %zmm12,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_ghxCyjhEqsFobgk vmovdqu64 288(%rsi),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 224(%rsi),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 160(%rsi),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 96(%rsi),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_ghxCyjhEqsFobgk: cmpq $512,%r8 jb .L_message_below_32_blocks_kgypzeldFqsBnqw cmpb $240,%r15b jae .L_next_16_overflow_ChqoygvwrfptFdk vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_ChqoygvwrfptFdk .L_next_16_overflow_ChqoygvwrfptFdk: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_ChqoygvwrfptFdk: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%r11,1),%zmm0 vmovdqu8 320(%rcx,%r11,1),%zmm3 vmovdqu8 384(%rcx,%r11,1),%zmm4 vmovdqu8 448(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 208(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 224(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%r11,1) vmovdqu8 %zmm10,320(%r10,%r11,1) vmovdqu8 %zmm11,384(%r10,%r11,1) vmovdqu8 %zmm12,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_mmnytfEfrGqjjzv vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_mmnytfEfrGqjjzv: movq $1,%r14 addq $512,%r11 subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_kgypzeldFqsBnqw .L_encrypt_big_nblocks_kgypzeldFqsBnqw: cmpb $240,%r15b jae .L_16_blocks_overflow_eCBAbsCxcdjldmp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_eCBAbsCxcdjldmp .L_16_blocks_overflow_eCBAbsCxcdjldmp: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_eCBAbsCxcdjldmp: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_vakicEdockyEGlr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_vakicEdockyEGlr .L_16_blocks_overflow_vakicEdockyEGlr: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_vakicEdockyEGlr: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_DpGlguFoEuofxlo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_DpGlguFoEuofxlo .L_16_blocks_overflow_DpGlguFoEuofxlo: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_DpGlguFoEuofxlo: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%r11,1),%zmm17 vmovdqu8 576(%rcx,%r11,1),%zmm19 vmovdqu8 640(%rcx,%r11,1),%zmm20 vmovdqu8 704(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%r11,1) vmovdqu8 %zmm3,576(%r10,%r11,1) vmovdqu8 %zmm4,640(%r10,%r11,1) vmovdqu8 %zmm5,704(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%r11 subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_kgypzeldFqsBnqw .L_no_more_big_nblocks_kgypzeldFqsBnqw: cmpq $512,%r8 jae .L_encrypt_32_blocks_kgypzeldFqsBnqw cmpq $256,%r8 jae .L_encrypt_16_blocks_kgypzeldFqsBnqw .L_encrypt_0_blocks_ghash_32_kgypzeldFqsBnqw: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_ClvEnqtsgcyzxra cmpl $8,%r10d je .L_last_num_blocks_is_8_ClvEnqtsgcyzxra jb .L_last_num_blocks_is_7_1_ClvEnqtsgcyzxra cmpl $12,%r10d je .L_last_num_blocks_is_12_ClvEnqtsgcyzxra jb .L_last_num_blocks_is_11_9_ClvEnqtsgcyzxra cmpl $15,%r10d je .L_last_num_blocks_is_15_ClvEnqtsgcyzxra ja .L_last_num_blocks_is_16_ClvEnqtsgcyzxra cmpl $14,%r10d je .L_last_num_blocks_is_14_ClvEnqtsgcyzxra jmp .L_last_num_blocks_is_13_ClvEnqtsgcyzxra .L_last_num_blocks_is_11_9_ClvEnqtsgcyzxra: cmpl $10,%r10d je .L_last_num_blocks_is_10_ClvEnqtsgcyzxra ja .L_last_num_blocks_is_11_ClvEnqtsgcyzxra jmp .L_last_num_blocks_is_9_ClvEnqtsgcyzxra .L_last_num_blocks_is_7_1_ClvEnqtsgcyzxra: cmpl $4,%r10d je .L_last_num_blocks_is_4_ClvEnqtsgcyzxra jb .L_last_num_blocks_is_3_1_ClvEnqtsgcyzxra cmpl $6,%r10d ja .L_last_num_blocks_is_7_ClvEnqtsgcyzxra je .L_last_num_blocks_is_6_ClvEnqtsgcyzxra jmp .L_last_num_blocks_is_5_ClvEnqtsgcyzxra .L_last_num_blocks_is_3_1_ClvEnqtsgcyzxra: cmpl $2,%r10d ja .L_last_num_blocks_is_3_ClvEnqtsgcyzxra je .L_last_num_blocks_is_2_ClvEnqtsgcyzxra .L_last_num_blocks_is_1_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_kfstzqbddCmrAgf vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_kfstzqbddCmrAgf .L_16_blocks_overflow_kfstzqbddCmrAgf: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_kfstzqbddCmrAgf: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_tzfDxgvlfbGFphv subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tzfDxgvlfbGFphv .L_small_initial_partial_block_tzfDxgvlfbGFphv: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_tzfDxgvlfbGFphv .L_small_initial_compute_done_tzfDxgvlfbGFphv: .L_after_reduction_tzfDxgvlfbGFphv: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_2_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_rEDkqlsspBphEcE vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_rEDkqlsspBphEcE .L_16_blocks_overflow_rEDkqlsspBphEcE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_rEDkqlsspBphEcE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ctfxgFaGttixvxc subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ctfxgFaGttixvxc .L_small_initial_partial_block_ctfxgFaGttixvxc: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ctfxgFaGttixvxc: orq %r8,%r8 je .L_after_reduction_ctfxgFaGttixvxc vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ctfxgFaGttixvxc: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_3_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_ghEEltEpFsCnyoi vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_ghEEltEpFsCnyoi .L_16_blocks_overflow_ghEEltEpFsCnyoi: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_ghEEltEpFsCnyoi: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pdGCGzyrnusufbk subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pdGCGzyrnusufbk .L_small_initial_partial_block_pdGCGzyrnusufbk: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pdGCGzyrnusufbk: orq %r8,%r8 je .L_after_reduction_pdGCGzyrnusufbk vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pdGCGzyrnusufbk: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_4_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_vrGynyzBBkFtoug vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_vrGynyzBBkFtoug .L_16_blocks_overflow_vrGynyzBBkFtoug: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_vrGynyzBBkFtoug: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vbpuzolxwysglov subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vbpuzolxwysglov .L_small_initial_partial_block_vbpuzolxwysglov: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vbpuzolxwysglov: orq %r8,%r8 je .L_after_reduction_vbpuzolxwysglov vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vbpuzolxwysglov: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_5_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_kkiaoGfqlrecpbg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_kkiaoGfqlrecpbg .L_16_blocks_overflow_kkiaoGfqlrecpbg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_kkiaoGfqlrecpbg: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ephjiBFojtbqzgd subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ephjiBFojtbqzgd .L_small_initial_partial_block_ephjiBFojtbqzgd: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ephjiBFojtbqzgd: orq %r8,%r8 je .L_after_reduction_ephjiBFojtbqzgd vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ephjiBFojtbqzgd: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_6_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_BGjhpBrnvbegsga vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_BGjhpBrnvbegsga .L_16_blocks_overflow_BGjhpBrnvbegsga: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_BGjhpBrnvbegsga: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_fcljjovquiEbomB subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fcljjovquiEbomB .L_small_initial_partial_block_fcljjovquiEbomB: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fcljjovquiEbomB: orq %r8,%r8 je .L_after_reduction_fcljjovquiEbomB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_fcljjovquiEbomB: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_7_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_izrwrwtizdFmmop vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_izrwrwtizdFmmop .L_16_blocks_overflow_izrwrwtizdFmmop: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_izrwrwtizdFmmop: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BGxuGiljxiGuGwj subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BGxuGiljxiGuGwj .L_small_initial_partial_block_BGxuGiljxiGuGwj: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BGxuGiljxiGuGwj: orq %r8,%r8 je .L_after_reduction_BGxuGiljxiGuGwj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BGxuGiljxiGuGwj: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_8_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_uokAwEtutqrxEoF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_uokAwEtutqrxEoF .L_16_blocks_overflow_uokAwEtutqrxEoF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_uokAwEtutqrxEoF: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_CannrFuxFceaxhk subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CannrFuxFceaxhk .L_small_initial_partial_block_CannrFuxFceaxhk: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CannrFuxFceaxhk: orq %r8,%r8 je .L_after_reduction_CannrFuxFceaxhk vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CannrFuxFceaxhk: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_9_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_ydCuzccyysxjEtE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_ydCuzccyysxjEtE .L_16_blocks_overflow_ydCuzccyysxjEtE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_ydCuzccyysxjEtE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hlxwfcoEeochjmF subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hlxwfcoEeochjmF .L_small_initial_partial_block_hlxwfcoEeochjmF: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hlxwfcoEeochjmF: orq %r8,%r8 je .L_after_reduction_hlxwfcoEeochjmF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hlxwfcoEeochjmF: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_10_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_uhxcibFtDluhCCB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_uhxcibFtDluhCCB .L_16_blocks_overflow_uhxcibFtDluhCCB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_uhxcibFtDluhCCB: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_uwCCphGGeEaqtbf subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_uwCCphGGeEaqtbf .L_small_initial_partial_block_uwCCphGGeEaqtbf: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uwCCphGGeEaqtbf: orq %r8,%r8 je .L_after_reduction_uwCCphGGeEaqtbf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_uwCCphGGeEaqtbf: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_11_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_ndAbfmoGyFeFtFs vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_ndAbfmoGyFeFtFs .L_16_blocks_overflow_ndAbfmoGyFeFtFs: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_ndAbfmoGyFeFtFs: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tojfqqaoGtkzuaq subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tojfqqaoGtkzuaq .L_small_initial_partial_block_tojfqqaoGtkzuaq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tojfqqaoGtkzuaq: orq %r8,%r8 je .L_after_reduction_tojfqqaoGtkzuaq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tojfqqaoGtkzuaq: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_12_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_rwelfyvzphiDsjE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_rwelfyvzphiDsjE .L_16_blocks_overflow_rwelfyvzphiDsjE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_rwelfyvzphiDsjE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_CzrAuaBADCucxbj subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CzrAuaBADCucxbj .L_small_initial_partial_block_CzrAuaBADCucxbj: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CzrAuaBADCucxbj: orq %r8,%r8 je .L_after_reduction_CzrAuaBADCucxbj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CzrAuaBADCucxbj: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_13_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_aizclGCjAeGBapi vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_aizclGCjAeGBapi .L_16_blocks_overflow_aizclGCjAeGBapi: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_aizclGCjAeGBapi: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rsvakfaFrrcdnmn subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rsvakfaFrrcdnmn .L_small_initial_partial_block_rsvakfaFrrcdnmn: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rsvakfaFrrcdnmn: orq %r8,%r8 je .L_after_reduction_rsvakfaFrrcdnmn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rsvakfaFrrcdnmn: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_14_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_CifFuwhmDnsajva vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_CifFuwhmDnsajva .L_16_blocks_overflow_CifFuwhmDnsajva: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_CifFuwhmDnsajva: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_eAqADtqcmpkizGe subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_eAqADtqcmpkizGe .L_small_initial_partial_block_eAqADtqcmpkizGe: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_eAqADtqcmpkizGe: orq %r8,%r8 je .L_after_reduction_eAqADtqcmpkizGe vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_eAqADtqcmpkizGe: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_15_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_oiyvxmCxqthGqom vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_oiyvxmCxqthGqom .L_16_blocks_overflow_oiyvxmCxqthGqom: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_oiyvxmCxqthGqom: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ugFbqvmchjEBBBz subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ugFbqvmchjEBBBz .L_small_initial_partial_block_ugFbqvmchjEBBBz: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ugFbqvmchjEBBBz: orq %r8,%r8 je .L_after_reduction_ugFbqvmchjEBBBz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ugFbqvmchjEBBBz: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_16_ClvEnqtsgcyzxra: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_wCdnfleczoFcEzf vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_wCdnfleczoFcEzf .L_16_blocks_overflow_wCdnfleczoFcEzf: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_wCdnfleczoFcEzf: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_qkhBhqDFAyxsceq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qkhBhqDFAyxsceq: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qkhBhqDFAyxsceq: jmp .L_last_blocks_done_ClvEnqtsgcyzxra .L_last_num_blocks_is_0_ClvEnqtsgcyzxra: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_ClvEnqtsgcyzxra: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_kgypzeldFqsBnqw .L_encrypt_32_blocks_kgypzeldFqsBnqw: cmpb $240,%r15b jae .L_16_blocks_overflow_vGiehzfobkckAyi vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_vGiehzfobkckAyi .L_16_blocks_overflow_vGiehzfobkckAyi: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_vGiehzfobkckAyi: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_aBfhhtmiojjovim vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_aBfhhtmiojjovim .L_16_blocks_overflow_aBfhhtmiojjovim: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_aBfhhtmiojjovim: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%r11 movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_AwFklinDrcbFgzn cmpl $8,%r10d je .L_last_num_blocks_is_8_AwFklinDrcbFgzn jb .L_last_num_blocks_is_7_1_AwFklinDrcbFgzn cmpl $12,%r10d je .L_last_num_blocks_is_12_AwFklinDrcbFgzn jb .L_last_num_blocks_is_11_9_AwFklinDrcbFgzn cmpl $15,%r10d je .L_last_num_blocks_is_15_AwFklinDrcbFgzn ja .L_last_num_blocks_is_16_AwFklinDrcbFgzn cmpl $14,%r10d je .L_last_num_blocks_is_14_AwFklinDrcbFgzn jmp .L_last_num_blocks_is_13_AwFklinDrcbFgzn .L_last_num_blocks_is_11_9_AwFklinDrcbFgzn: cmpl $10,%r10d je .L_last_num_blocks_is_10_AwFklinDrcbFgzn ja .L_last_num_blocks_is_11_AwFklinDrcbFgzn jmp .L_last_num_blocks_is_9_AwFklinDrcbFgzn .L_last_num_blocks_is_7_1_AwFklinDrcbFgzn: cmpl $4,%r10d je .L_last_num_blocks_is_4_AwFklinDrcbFgzn jb .L_last_num_blocks_is_3_1_AwFklinDrcbFgzn cmpl $6,%r10d ja .L_last_num_blocks_is_7_AwFklinDrcbFgzn je .L_last_num_blocks_is_6_AwFklinDrcbFgzn jmp .L_last_num_blocks_is_5_AwFklinDrcbFgzn .L_last_num_blocks_is_3_1_AwFklinDrcbFgzn: cmpl $2,%r10d ja .L_last_num_blocks_is_3_AwFklinDrcbFgzn je .L_last_num_blocks_is_2_AwFklinDrcbFgzn .L_last_num_blocks_is_1_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_FvFeevCgruEuomy vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_FvFeevCgruEuomy .L_16_blocks_overflow_FvFeevCgruEuomy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_FvFeevCgruEuomy: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_vocdDxlyexcAqgk subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vocdDxlyexcAqgk .L_small_initial_partial_block_vocdDxlyexcAqgk: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_vocdDxlyexcAqgk .L_small_initial_compute_done_vocdDxlyexcAqgk: .L_after_reduction_vocdDxlyexcAqgk: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_2_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_rufCyEuzhyCcBum vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_rufCyEuzhyCcBum .L_16_blocks_overflow_rufCyEuzhyCcBum: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_rufCyEuzhyCcBum: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hFhwFAnywtirqFm subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hFhwFAnywtirqFm .L_small_initial_partial_block_hFhwFAnywtirqFm: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hFhwFAnywtirqFm: orq %r8,%r8 je .L_after_reduction_hFhwFAnywtirqFm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hFhwFAnywtirqFm: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_3_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_oiFAsBBekBeEcll vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_oiFAsBBekBeEcll .L_16_blocks_overflow_oiFAsBBekBeEcll: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_oiFAsBBekBeEcll: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DakDxmbzhjsFccp subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DakDxmbzhjsFccp .L_small_initial_partial_block_DakDxmbzhjsFccp: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DakDxmbzhjsFccp: orq %r8,%r8 je .L_after_reduction_DakDxmbzhjsFccp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DakDxmbzhjsFccp: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_4_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_EeBjyjCzBemkiyn vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_EeBjyjCzBemkiyn .L_16_blocks_overflow_EeBjyjCzBemkiyn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_EeBjyjCzBemkiyn: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pkDoGcykctqxwtv subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pkDoGcykctqxwtv .L_small_initial_partial_block_pkDoGcykctqxwtv: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pkDoGcykctqxwtv: orq %r8,%r8 je .L_after_reduction_pkDoGcykctqxwtv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pkDoGcykctqxwtv: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_5_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_ygonEcumvGgxonp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_ygonEcumvGgxonp .L_16_blocks_overflow_ygonEcumvGgxonp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_ygonEcumvGgxonp: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FBDnovehzAhxoFz subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FBDnovehzAhxoFz .L_small_initial_partial_block_FBDnovehzAhxoFz: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FBDnovehzAhxoFz: orq %r8,%r8 je .L_after_reduction_FBDnovehzAhxoFz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FBDnovehzAhxoFz: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_6_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_zAwamddcsGuDbsw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_zAwamddcsGuDbsw .L_16_blocks_overflow_zAwamddcsGuDbsw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_zAwamddcsGuDbsw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nBiEFoifDnlnCnA subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nBiEFoifDnlnCnA .L_small_initial_partial_block_nBiEFoifDnlnCnA: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nBiEFoifDnlnCnA: orq %r8,%r8 je .L_after_reduction_nBiEFoifDnlnCnA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nBiEFoifDnlnCnA: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_7_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_pwBmqBGFfnBFiBx vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_pwBmqBGFfnBFiBx .L_16_blocks_overflow_pwBmqBGFfnBFiBx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_pwBmqBGFfnBFiBx: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wChogqeEderiszq subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wChogqeEderiszq .L_small_initial_partial_block_wChogqeEderiszq: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wChogqeEderiszq: orq %r8,%r8 je .L_after_reduction_wChogqeEderiszq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wChogqeEderiszq: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_8_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_xgcteGoksvqdvwC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_xgcteGoksvqdvwC .L_16_blocks_overflow_xgcteGoksvqdvwC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_xgcteGoksvqdvwC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bwfvAfrqwqvnlGG subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bwfvAfrqwqvnlGG .L_small_initial_partial_block_bwfvAfrqwqvnlGG: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bwfvAfrqwqvnlGG: orq %r8,%r8 je .L_after_reduction_bwfvAfrqwqvnlGG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bwfvAfrqwqvnlGG: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_9_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_nGFogvFjmdjnsvt vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_nGFogvFjmdjnsvt .L_16_blocks_overflow_nGFogvFjmdjnsvt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_nGFogvFjmdjnsvt: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pkinwzuhxhaEgCa subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pkinwzuhxhaEgCa .L_small_initial_partial_block_pkinwzuhxhaEgCa: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pkinwzuhxhaEgCa: orq %r8,%r8 je .L_after_reduction_pkinwzuhxhaEgCa vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pkinwzuhxhaEgCa: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_10_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_ryszgunyrqgvyfB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_ryszgunyrqgvyfB .L_16_blocks_overflow_ryszgunyrqgvyfB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_ryszgunyrqgvyfB: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jypDCauhjquEuyb subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jypDCauhjquEuyb .L_small_initial_partial_block_jypDCauhjquEuyb: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jypDCauhjquEuyb: orq %r8,%r8 je .L_after_reduction_jypDCauhjquEuyb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jypDCauhjquEuyb: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_11_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_DvudExkamyfuGdv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_DvudExkamyfuGdv .L_16_blocks_overflow_DvudExkamyfuGdv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_DvudExkamyfuGdv: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dlfpdlkfExhwjDu subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dlfpdlkfExhwjDu .L_small_initial_partial_block_dlfpdlkfExhwjDu: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dlfpdlkfExhwjDu: orq %r8,%r8 je .L_after_reduction_dlfpdlkfExhwjDu vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dlfpdlkfExhwjDu: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_12_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_pycvwiovDfFylBw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_pycvwiovDfFylBw .L_16_blocks_overflow_pycvwiovDfFylBw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_pycvwiovDfFylBw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DazlrGdgfFiEaoe subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DazlrGdgfFiEaoe .L_small_initial_partial_block_DazlrGdgfFiEaoe: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DazlrGdgfFiEaoe: orq %r8,%r8 je .L_after_reduction_DazlrGdgfFiEaoe vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DazlrGdgfFiEaoe: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_13_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_sFwEGaAnGxDowcc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_sFwEGaAnGxDowcc .L_16_blocks_overflow_sFwEGaAnGxDowcc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_sFwEGaAnGxDowcc: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tohyxsArdntzjGo subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tohyxsArdntzjGo .L_small_initial_partial_block_tohyxsArdntzjGo: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tohyxsArdntzjGo: orq %r8,%r8 je .L_after_reduction_tohyxsArdntzjGo vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tohyxsArdntzjGo: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_14_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_fapGrcjmuhklgzo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_fapGrcjmuhklgzo .L_16_blocks_overflow_fapGrcjmuhklgzo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_fapGrcjmuhklgzo: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BeFutuwFnozaige subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BeFutuwFnozaige .L_small_initial_partial_block_BeFutuwFnozaige: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BeFutuwFnozaige: orq %r8,%r8 je .L_after_reduction_BeFutuwFnozaige vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BeFutuwFnozaige: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_15_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_aByDeEDFBCjvqGx vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_aByDeEDFBCjvqGx .L_16_blocks_overflow_aByDeEDFBCjvqGx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_aByDeEDFBCjvqGx: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hAxtmivtdwAsvmz subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hAxtmivtdwAsvmz .L_small_initial_partial_block_hAxtmivtdwAsvmz: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hAxtmivtdwAsvmz: orq %r8,%r8 je .L_after_reduction_hAxtmivtdwAsvmz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hAxtmivtdwAsvmz: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_16_AwFklinDrcbFgzn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_BwrcaiuzmxchdBE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_BwrcaiuzmxchdBE .L_16_blocks_overflow_BwrcaiuzmxchdBE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_BwrcaiuzmxchdBE: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_xniaaigktwmycDh: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xniaaigktwmycDh: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xniaaigktwmycDh: jmp .L_last_blocks_done_AwFklinDrcbFgzn .L_last_num_blocks_is_0_AwFklinDrcbFgzn: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_AwFklinDrcbFgzn: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_kgypzeldFqsBnqw .L_encrypt_16_blocks_kgypzeldFqsBnqw: cmpb $240,%r15b jae .L_16_blocks_overflow_itlreegehzzFvho vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_itlreegehzzFvho .L_16_blocks_overflow_itlreegehzzFvho: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_itlreegehzzFvho: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%r11 movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_xAfbdFbjfoyBlDz cmpl $8,%r10d je .L_last_num_blocks_is_8_xAfbdFbjfoyBlDz jb .L_last_num_blocks_is_7_1_xAfbdFbjfoyBlDz cmpl $12,%r10d je .L_last_num_blocks_is_12_xAfbdFbjfoyBlDz jb .L_last_num_blocks_is_11_9_xAfbdFbjfoyBlDz cmpl $15,%r10d je .L_last_num_blocks_is_15_xAfbdFbjfoyBlDz ja .L_last_num_blocks_is_16_xAfbdFbjfoyBlDz cmpl $14,%r10d je .L_last_num_blocks_is_14_xAfbdFbjfoyBlDz jmp .L_last_num_blocks_is_13_xAfbdFbjfoyBlDz .L_last_num_blocks_is_11_9_xAfbdFbjfoyBlDz: cmpl $10,%r10d je .L_last_num_blocks_is_10_xAfbdFbjfoyBlDz ja .L_last_num_blocks_is_11_xAfbdFbjfoyBlDz jmp .L_last_num_blocks_is_9_xAfbdFbjfoyBlDz .L_last_num_blocks_is_7_1_xAfbdFbjfoyBlDz: cmpl $4,%r10d je .L_last_num_blocks_is_4_xAfbdFbjfoyBlDz jb .L_last_num_blocks_is_3_1_xAfbdFbjfoyBlDz cmpl $6,%r10d ja .L_last_num_blocks_is_7_xAfbdFbjfoyBlDz je .L_last_num_blocks_is_6_xAfbdFbjfoyBlDz jmp .L_last_num_blocks_is_5_xAfbdFbjfoyBlDz .L_last_num_blocks_is_3_1_xAfbdFbjfoyBlDz: cmpl $2,%r10d ja .L_last_num_blocks_is_3_xAfbdFbjfoyBlDz je .L_last_num_blocks_is_2_xAfbdFbjfoyBlDz .L_last_num_blocks_is_1_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_lapolqbccExufla vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_lapolqbccExufla .L_16_blocks_overflow_lapolqbccExufla: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_lapolqbccExufla: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %xmm31,%xmm0,%xmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_aksayyCEvBwkqCs subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_aksayyCEvBwkqCs .L_small_initial_partial_block_aksayyCEvBwkqCs: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_aksayyCEvBwkqCs .L_small_initial_compute_done_aksayyCEvBwkqCs: .L_after_reduction_aksayyCEvBwkqCs: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_2_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_EnCCsEpwCxDywbA vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_EnCCsEpwCxDywbA .L_16_blocks_overflow_EnCCsEpwCxDywbA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_EnCCsEpwCxDywbA: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %ymm31,%ymm0,%ymm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_enwlcwbgseiBryB subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_enwlcwbgseiBryB .L_small_initial_partial_block_enwlcwbgseiBryB: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_enwlcwbgseiBryB: orq %r8,%r8 je .L_after_reduction_enwlcwbgseiBryB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_enwlcwbgseiBryB: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_3_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_bEsbraEgeohwpzz vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_bEsbraEgeohwpzz .L_16_blocks_overflow_bEsbraEgeohwpzz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_bEsbraEgeohwpzz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jrkEfawFjAdFFAw subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jrkEfawFjAdFFAw .L_small_initial_partial_block_jrkEfawFjAdFFAw: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jrkEfawFjAdFFAw: orq %r8,%r8 je .L_after_reduction_jrkEfawFjAdFFAw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jrkEfawFjAdFFAw: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_4_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_jxvxvtaszlAuveu vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_jxvxvtaszlAuveu .L_16_blocks_overflow_jxvxvtaszlAuveu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_jxvxvtaszlAuveu: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BoECtwduirkpGbd subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BoECtwduirkpGbd .L_small_initial_partial_block_BoECtwduirkpGbd: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BoECtwduirkpGbd: orq %r8,%r8 je .L_after_reduction_BoECtwduirkpGbd vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BoECtwduirkpGbd: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_5_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_AemnsnzilvGaDvl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_AemnsnzilvGaDvl .L_16_blocks_overflow_AemnsnzilvGaDvl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_AemnsnzilvGaDvl: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AChbnzckEtGqvia subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AChbnzckEtGqvia .L_small_initial_partial_block_AChbnzckEtGqvia: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AChbnzckEtGqvia: orq %r8,%r8 je .L_after_reduction_AChbnzckEtGqvia vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AChbnzckEtGqvia: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_6_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_pGnpmuquowsenAC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_pGnpmuquowsenAC .L_16_blocks_overflow_pGnpmuquowsenAC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_pGnpmuquowsenAC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kcatvpdGCtefzAw subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kcatvpdGCtefzAw .L_small_initial_partial_block_kcatvpdGCtefzAw: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kcatvpdGCtefzAw: orq %r8,%r8 je .L_after_reduction_kcatvpdGCtefzAw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_kcatvpdGCtefzAw: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_7_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_vBcFztzloamdDFg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_vBcFztzloamdDFg .L_16_blocks_overflow_vBcFztzloamdDFg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_vBcFztzloamdDFg: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yfFcsqkvhbddwyy subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yfFcsqkvhbddwyy .L_small_initial_partial_block_yfFcsqkvhbddwyy: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yfFcsqkvhbddwyy: orq %r8,%r8 je .L_after_reduction_yfFcsqkvhbddwyy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yfFcsqkvhbddwyy: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_8_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_FdAnkzzirEtjwrb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_FdAnkzzirEtjwrb .L_16_blocks_overflow_FdAnkzzirEtjwrb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_FdAnkzzirEtjwrb: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wvyqkgDlqezddls subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wvyqkgDlqezddls .L_small_initial_partial_block_wvyqkgDlqezddls: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wvyqkgDlqezddls: orq %r8,%r8 je .L_after_reduction_wvyqkgDlqezddls vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wvyqkgDlqezddls: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_9_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_lhtDngmdlssnvDG vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_lhtDngmdlssnvDG .L_16_blocks_overflow_lhtDngmdlssnvDG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_lhtDngmdlssnvDG: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ptjDGBmufbAkAGG subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ptjDGBmufbAkAGG .L_small_initial_partial_block_ptjDGBmufbAkAGG: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ptjDGBmufbAkAGG: orq %r8,%r8 je .L_after_reduction_ptjDGBmufbAkAGG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ptjDGBmufbAkAGG: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_10_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_wsaFiGmrqxypimt vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_wsaFiGmrqxypimt .L_16_blocks_overflow_wsaFiGmrqxypimt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_wsaFiGmrqxypimt: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gnctxlhtglgbgvx subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gnctxlhtglgbgvx .L_small_initial_partial_block_gnctxlhtglgbgvx: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gnctxlhtglgbgvx: orq %r8,%r8 je .L_after_reduction_gnctxlhtglgbgvx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_gnctxlhtglgbgvx: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_11_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_neydhuxthowjDfe vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_neydhuxthowjDfe .L_16_blocks_overflow_neydhuxthowjDfe: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_neydhuxthowjDfe: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_btfsxwwBfubFEhw subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_btfsxwwBfubFEhw .L_small_initial_partial_block_btfsxwwBfubFEhw: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_btfsxwwBfubFEhw: orq %r8,%r8 je .L_after_reduction_btfsxwwBfubFEhw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_btfsxwwBfubFEhw: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_12_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_vmmvFmFAAqpDrjc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_vmmvFmFAAqpDrjc .L_16_blocks_overflow_vmmvFmFAAqpDrjc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_vmmvFmFAAqpDrjc: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_muxxrlxFvpCuucj subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_muxxrlxFvpCuucj .L_small_initial_partial_block_muxxrlxFvpCuucj: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_muxxrlxFvpCuucj: orq %r8,%r8 je .L_after_reduction_muxxrlxFvpCuucj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_muxxrlxFvpCuucj: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_13_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_BtCEtGboibyzmkz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_BtCEtGboibyzmkz .L_16_blocks_overflow_BtCEtGboibyzmkz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_BtCEtGboibyzmkz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_niubrurEemqlCeh subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_niubrurEemqlCeh .L_small_initial_partial_block_niubrurEemqlCeh: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_niubrurEemqlCeh: orq %r8,%r8 je .L_after_reduction_niubrurEemqlCeh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_niubrurEemqlCeh: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_14_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_mybAsEhdaxgnGrE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_mybAsEhdaxgnGrE .L_16_blocks_overflow_mybAsEhdaxgnGrE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_mybAsEhdaxgnGrE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qtDEunzdagagyyt subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qtDEunzdagagyyt .L_small_initial_partial_block_qtDEunzdagagyyt: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qtDEunzdagagyyt: orq %r8,%r8 je .L_after_reduction_qtDEunzdagagyyt vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qtDEunzdagagyyt: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_15_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_Bofftlllstcnhmp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_Bofftlllstcnhmp .L_16_blocks_overflow_Bofftlllstcnhmp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_Bofftlllstcnhmp: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ahcvvxeChlezaBm subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ahcvvxeChlezaBm .L_small_initial_partial_block_ahcvvxeChlezaBm: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ahcvvxeChlezaBm: orq %r8,%r8 je .L_after_reduction_ahcvvxeChlezaBm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ahcvvxeChlezaBm: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_16_xAfbdFbjfoyBlDz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_nowrnsGGyachzjc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_nowrnsGGyachzjc .L_16_blocks_overflow_nowrnsGGyachzjc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_nowrnsGGyachzjc: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_AoBCchcjotapvgu: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AoBCchcjotapvgu: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AoBCchcjotapvgu: jmp .L_last_blocks_done_xAfbdFbjfoyBlDz .L_last_num_blocks_is_0_xAfbdFbjfoyBlDz: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_xAfbdFbjfoyBlDz: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_kgypzeldFqsBnqw .L_message_below_32_blocks_kgypzeldFqsBnqw: subq $256,%r8 addq $256,%r11 movl %r8d,%r10d testq %r14,%r14 jnz .L_skip_hkeys_precomputation_qckdlimbBeqylyq vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_qckdlimbBeqylyq: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_qdswuDcxyhGmasp cmpl $8,%r10d je .L_last_num_blocks_is_8_qdswuDcxyhGmasp jb .L_last_num_blocks_is_7_1_qdswuDcxyhGmasp cmpl $12,%r10d je .L_last_num_blocks_is_12_qdswuDcxyhGmasp jb .L_last_num_blocks_is_11_9_qdswuDcxyhGmasp cmpl $15,%r10d je .L_last_num_blocks_is_15_qdswuDcxyhGmasp ja .L_last_num_blocks_is_16_qdswuDcxyhGmasp cmpl $14,%r10d je .L_last_num_blocks_is_14_qdswuDcxyhGmasp jmp .L_last_num_blocks_is_13_qdswuDcxyhGmasp .L_last_num_blocks_is_11_9_qdswuDcxyhGmasp: cmpl $10,%r10d je .L_last_num_blocks_is_10_qdswuDcxyhGmasp ja .L_last_num_blocks_is_11_qdswuDcxyhGmasp jmp .L_last_num_blocks_is_9_qdswuDcxyhGmasp .L_last_num_blocks_is_7_1_qdswuDcxyhGmasp: cmpl $4,%r10d je .L_last_num_blocks_is_4_qdswuDcxyhGmasp jb .L_last_num_blocks_is_3_1_qdswuDcxyhGmasp cmpl $6,%r10d ja .L_last_num_blocks_is_7_qdswuDcxyhGmasp je .L_last_num_blocks_is_6_qdswuDcxyhGmasp jmp .L_last_num_blocks_is_5_qdswuDcxyhGmasp .L_last_num_blocks_is_3_1_qdswuDcxyhGmasp: cmpl $2,%r10d ja .L_last_num_blocks_is_3_qdswuDcxyhGmasp je .L_last_num_blocks_is_2_qdswuDcxyhGmasp .L_last_num_blocks_is_1_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_AqvkjwfuBmvGzFo vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_AqvkjwfuBmvGzFo .L_16_blocks_overflow_AqvkjwfuBmvGzFo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_AqvkjwfuBmvGzFo: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_zDugdiozxlCaAFc subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zDugdiozxlCaAFc .L_small_initial_partial_block_zDugdiozxlCaAFc: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_zDugdiozxlCaAFc .L_small_initial_compute_done_zDugdiozxlCaAFc: .L_after_reduction_zDugdiozxlCaAFc: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_2_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_BFBqcyfExFAkGzj vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_BFBqcyfExFAkGzj .L_16_blocks_overflow_BFBqcyfExFAkGzj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_BFBqcyfExFAkGzj: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bgisyxAEeEpkobG subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bgisyxAEeEpkobG .L_small_initial_partial_block_bgisyxAEeEpkobG: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bgisyxAEeEpkobG: orq %r8,%r8 je .L_after_reduction_bgisyxAEeEpkobG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bgisyxAEeEpkobG: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_3_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_yizvcDtiefGCDev vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_yizvcDtiefGCDev .L_16_blocks_overflow_yizvcDtiefGCDev: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_yizvcDtiefGCDev: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_fegyzcDscsgdCgo subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fegyzcDscsgdCgo .L_small_initial_partial_block_fegyzcDscsgdCgo: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fegyzcDscsgdCgo: orq %r8,%r8 je .L_after_reduction_fegyzcDscsgdCgo vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_fegyzcDscsgdCgo: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_4_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_DGjzymFiusiuxvc vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_DGjzymFiusiuxvc .L_16_blocks_overflow_DGjzymFiusiuxvc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_DGjzymFiusiuxvc: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DyGAAdrBpclAjrf subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DyGAAdrBpclAjrf .L_small_initial_partial_block_DyGAAdrBpclAjrf: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DyGAAdrBpclAjrf: orq %r8,%r8 je .L_after_reduction_DyGAAdrBpclAjrf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DyGAAdrBpclAjrf: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_5_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_qmnbjAabAnlrekx vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_qmnbjAabAnlrekx .L_16_blocks_overflow_qmnbjAabAnlrekx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_qmnbjAabAnlrekx: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qdgqavzegrGAAjz subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qdgqavzegrGAAjz .L_small_initial_partial_block_qdgqavzegrGAAjz: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qdgqavzegrGAAjz: orq %r8,%r8 je .L_after_reduction_qdgqavzegrGAAjz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qdgqavzegrGAAjz: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_6_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_AkAddilhnCabyyf vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_AkAddilhnCabyyf .L_16_blocks_overflow_AkAddilhnCabyyf: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_AkAddilhnCabyyf: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_iibprCbqDlikAnd subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_iibprCbqDlikAnd .L_small_initial_partial_block_iibprCbqDlikAnd: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_iibprCbqDlikAnd: orq %r8,%r8 je .L_after_reduction_iibprCbqDlikAnd vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_iibprCbqDlikAnd: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_7_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_lxvhGbsbefzGdxF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_lxvhGbsbefzGdxF .L_16_blocks_overflow_lxvhGbsbefzGdxF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_lxvhGbsbefzGdxF: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GthoECEdfcnGsvc subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GthoECEdfcnGsvc .L_small_initial_partial_block_GthoECEdfcnGsvc: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GthoECEdfcnGsvc: orq %r8,%r8 je .L_after_reduction_GthoECEdfcnGsvc vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_GthoECEdfcnGsvc: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_8_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_qwiyktwmAFnlrAv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_qwiyktwmAFnlrAv .L_16_blocks_overflow_qwiyktwmAFnlrAv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_qwiyktwmAFnlrAv: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hBGcauuiubbhsmg subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hBGcauuiubbhsmg .L_small_initial_partial_block_hBGcauuiubbhsmg: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hBGcauuiubbhsmg: orq %r8,%r8 je .L_after_reduction_hBGcauuiubbhsmg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hBGcauuiubbhsmg: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_9_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_Aahazrycncacmjd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_Aahazrycncacmjd .L_16_blocks_overflow_Aahazrycncacmjd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_Aahazrycncacmjd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xijDGphAfrrjvcn subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xijDGphAfrrjvcn .L_small_initial_partial_block_xijDGphAfrrjvcn: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xijDGphAfrrjvcn: orq %r8,%r8 je .L_after_reduction_xijDGphAfrrjvcn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xijDGphAfrrjvcn: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_10_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_hkbadvpbxvroayG vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_hkbadvpbxvroayG .L_16_blocks_overflow_hkbadvpbxvroayG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_hkbadvpbxvroayG: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_oahmBbxzjdosefa subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_oahmBbxzjdosefa .L_small_initial_partial_block_oahmBbxzjdosefa: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_oahmBbxzjdosefa: orq %r8,%r8 je .L_after_reduction_oahmBbxzjdosefa vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_oahmBbxzjdosefa: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_11_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_FsdwrjvehsptDBd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_FsdwrjvehsptDBd .L_16_blocks_overflow_FsdwrjvehsptDBd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_FsdwrjvehsptDBd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yodgBeqbEhheCDd subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yodgBeqbEhheCDd .L_small_initial_partial_block_yodgBeqbEhheCDd: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yodgBeqbEhheCDd: orq %r8,%r8 je .L_after_reduction_yodgBeqbEhheCDd vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yodgBeqbEhheCDd: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_12_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_thkeiGylBuuojur vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_thkeiGylBuuojur .L_16_blocks_overflow_thkeiGylBuuojur: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_thkeiGylBuuojur: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yzbzfadAzvvaytc subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yzbzfadAzvvaytc .L_small_initial_partial_block_yzbzfadAzvvaytc: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yzbzfadAzvvaytc: orq %r8,%r8 je .L_after_reduction_yzbzfadAzvvaytc vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yzbzfadAzvvaytc: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_13_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_eFxvoygBEBGohmA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_eFxvoygBEBGohmA .L_16_blocks_overflow_eFxvoygBEBGohmA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_eFxvoygBEBGohmA: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zzewAuyevyjoCwC subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zzewAuyevyjoCwC .L_small_initial_partial_block_zzewAuyevyjoCwC: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zzewAuyevyjoCwC: orq %r8,%r8 je .L_after_reduction_zzewAuyevyjoCwC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zzewAuyevyjoCwC: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_14_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_wcubmfDtExvnDlb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_wcubmfDtExvnDlb .L_16_blocks_overflow_wcubmfDtExvnDlb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_wcubmfDtExvnDlb: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wbcvGrEDxndwxqw subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wbcvGrEDxndwxqw .L_small_initial_partial_block_wbcvGrEDxndwxqw: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wbcvGrEDxndwxqw: orq %r8,%r8 je .L_after_reduction_wbcvGrEDxndwxqw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wbcvGrEDxndwxqw: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_15_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_hDvByfpahyymzEv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_hDvByfpahyymzEv .L_16_blocks_overflow_hDvByfpahyymzEv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_hDvByfpahyymzEv: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_uAckhsjfbEBxdkE subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_uAckhsjfbEBxdkE .L_small_initial_partial_block_uAckhsjfbEBxdkE: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uAckhsjfbEBxdkE: orq %r8,%r8 je .L_after_reduction_uAckhsjfbEBxdkE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_uAckhsjfbEBxdkE: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_16_qdswuDcxyhGmasp: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_rnhelBbtegFkzjj vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_rnhelBbtegFkzjj .L_16_blocks_overflow_rnhelBbtegFkzjj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_rnhelBbtegFkzjj: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_wEgqnyhjgyEjfkm: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wEgqnyhjgyEjfkm: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wEgqnyhjgyEjfkm: jmp .L_last_blocks_done_qdswuDcxyhGmasp .L_last_num_blocks_is_0_qdswuDcxyhGmasp: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_qdswuDcxyhGmasp: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_kgypzeldFqsBnqw .L_message_below_equal_16_blocks_kgypzeldFqsBnqw: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_uBFzjxzanCsxGGe jl .L_small_initial_num_blocks_is_7_1_uBFzjxzanCsxGGe cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_uBFzjxzanCsxGGe jl .L_small_initial_num_blocks_is_11_9_uBFzjxzanCsxGGe cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_uBFzjxzanCsxGGe cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_uBFzjxzanCsxGGe cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_uBFzjxzanCsxGGe jmp .L_small_initial_num_blocks_is_13_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_11_9_uBFzjxzanCsxGGe: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_uBFzjxzanCsxGGe cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_uBFzjxzanCsxGGe jmp .L_small_initial_num_blocks_is_9_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_7_1_uBFzjxzanCsxGGe: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_uBFzjxzanCsxGGe jl .L_small_initial_num_blocks_is_3_1_uBFzjxzanCsxGGe cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_uBFzjxzanCsxGGe cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_uBFzjxzanCsxGGe jmp .L_small_initial_num_blocks_is_5_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_3_1_uBFzjxzanCsxGGe: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_uBFzjxzanCsxGGe cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_1_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONE(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %xmm15,%xmm0,%xmm0 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm6,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 cmpq $16,%r8 jl .L_small_initial_partial_block_usvkeoywsioAnfD subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_usvkeoywsioAnfD .L_small_initial_partial_block_usvkeoywsioAnfD: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_usvkeoywsioAnfD .L_small_initial_compute_done_usvkeoywsioAnfD: .L_after_reduction_usvkeoywsioAnfD: jmp .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_2_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %ymm15,%ymm0,%ymm0 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm6,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yvjeqFrhsrkxcss subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yvjeqFrhsrkxcss .L_small_initial_partial_block_yvjeqFrhsrkxcss: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yvjeqFrhsrkxcss: orq %r8,%r8 je .L_after_reduction_yvjeqFrhsrkxcss vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_yvjeqFrhsrkxcss: jmp .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_3_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mvdynCrzwGwegAr subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mvdynCrzwGwegAr .L_small_initial_partial_block_mvdynCrzwGwegAr: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mvdynCrzwGwegAr: orq %r8,%r8 je .L_after_reduction_mvdynCrzwGwegAr vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_mvdynCrzwGwegAr: jmp .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_4_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pjDzAfyivuABgdr subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pjDzAfyivuABgdr .L_small_initial_partial_block_pjDzAfyivuABgdr: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pjDzAfyivuABgdr: orq %r8,%r8 je .L_after_reduction_pjDzAfyivuABgdr vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_pjDzAfyivuABgdr: jmp .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_5_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %xmm15,%xmm3,%xmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %xmm29,%xmm7,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_fcBludqftzBwbAa subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fcBludqftzBwbAa .L_small_initial_partial_block_fcBludqftzBwbAa: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fcBludqftzBwbAa: orq %r8,%r8 je .L_after_reduction_fcBludqftzBwbAa vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_fcBludqftzBwbAa: jmp .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_6_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %ymm15,%ymm3,%ymm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %ymm29,%ymm7,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gpklsvBmbaGumBx subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gpklsvBmbaGumBx .L_small_initial_partial_block_gpklsvBmbaGumBx: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gpklsvBmbaGumBx: orq %r8,%r8 je .L_after_reduction_gpklsvBmbaGumBx vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_gpklsvBmbaGumBx: jmp .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_7_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_fFxDDorEtzfbsCi subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fFxDDorEtzfbsCi .L_small_initial_partial_block_fFxDDorEtzfbsCi: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fFxDDorEtzfbsCi: orq %r8,%r8 je .L_after_reduction_fFxDDorEtzfbsCi vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_fFxDDorEtzfbsCi: jmp .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_8_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mhgromrjcFpqAxA subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mhgromrjcFpqAxA .L_small_initial_partial_block_mhgromrjcFpqAxA: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mhgromrjcFpqAxA: orq %r8,%r8 je .L_after_reduction_mhgromrjcFpqAxA vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_mhgromrjcFpqAxA: jmp .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_9_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %xmm15,%xmm4,%xmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %xmm29,%xmm10,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_eghzedifwilpnEF subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_eghzedifwilpnEF .L_small_initial_partial_block_eghzedifwilpnEF: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_eghzedifwilpnEF: orq %r8,%r8 je .L_after_reduction_eghzedifwilpnEF vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_eghzedifwilpnEF: jmp .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_10_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %ymm15,%ymm4,%ymm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %ymm29,%ymm10,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_aBEqcFFmwBplgFE subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_aBEqcFFmwBplgFE .L_small_initial_partial_block_aBEqcFFmwBplgFE: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_aBEqcFFmwBplgFE: orq %r8,%r8 je .L_after_reduction_aBEqcFFmwBplgFE vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_aBEqcFFmwBplgFE: jmp .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_11_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ozteDdAwrbobDia subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ozteDdAwrbobDia .L_small_initial_partial_block_ozteDdAwrbobDia: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ozteDdAwrbobDia: orq %r8,%r8 je .L_after_reduction_ozteDdAwrbobDia vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_ozteDdAwrbobDia: jmp .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_12_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xaldGCCAFmcudnD subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xaldGCCAFmcudnD .L_small_initial_partial_block_xaldGCCAFmcudnD: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xaldGCCAFmcudnD: orq %r8,%r8 je .L_after_reduction_xaldGCCAFmcudnD vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_xaldGCCAFmcudnD: jmp .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_13_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %xmm15,%xmm5,%xmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %xmm29,%xmm11,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_txhExvepwglFbiC subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_txhExvepwglFbiC .L_small_initial_partial_block_txhExvepwglFbiC: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_txhExvepwglFbiC: orq %r8,%r8 je .L_after_reduction_txhExvepwglFbiC vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_txhExvepwglFbiC: jmp .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_14_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %ymm15,%ymm5,%ymm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %ymm29,%ymm11,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_usDayEFvfwmlydb subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_usDayEFvfwmlydb .L_small_initial_partial_block_usDayEFvfwmlydb: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_usDayEFvfwmlydb: orq %r8,%r8 je .L_after_reduction_usDayEFvfwmlydb vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_usDayEFvfwmlydb: jmp .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_15_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DrCACnmarBwymye subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DrCACnmarBwymye .L_small_initial_partial_block_DrCACnmarBwymye: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DrCACnmarBwymye: orq %r8,%r8 je .L_after_reduction_DrCACnmarBwymye vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_DrCACnmarBwymye: jmp .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe .L_small_initial_num_blocks_is_16_uBFzjxzanCsxGGe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_khwfpcqckgAmFnr: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_khwfpcqckgAmFnr: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_khwfpcqckgAmFnr: .L_small_initial_blocks_encrypted_uBFzjxzanCsxGGe: .L_ghash_done_kgypzeldFqsBnqw: vmovdqu64 %xmm2,0(%rsi) vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_done_kgypzeldFqsBnqw: jmp .Lexit_gcm_decrypt .Lexit_gcm_decrypt: cmpq $256,%r8 jbe .Lskip_hkeys_cleanup_cdrboBdzwmggbeq vpxor %xmm0,%xmm0,%xmm0 vmovdqa64 %zmm0,0(%rsp) vmovdqa64 %zmm0,64(%rsp) vmovdqa64 %zmm0,128(%rsp) vmovdqa64 %zmm0,192(%rsp) vmovdqa64 %zmm0,256(%rsp) vmovdqa64 %zmm0,320(%rsp) vmovdqa64 %zmm0,384(%rsp) vmovdqa64 %zmm0,448(%rsp) vmovdqa64 %zmm0,512(%rsp) vmovdqa64 %zmm0,576(%rsp) vmovdqa64 %zmm0,640(%rsp) vmovdqa64 %zmm0,704(%rsp) .Lskip_hkeys_cleanup_cdrboBdzwmggbeq: vzeroupper leaq (%rbp),%rsp .cfi_def_cfa_register %rsp popq %r15 .cfi_adjust_cfa_offset -8 .cfi_restore %r15 popq %r14 .cfi_adjust_cfa_offset -8 .cfi_restore %r14 popq %r13 .cfi_adjust_cfa_offset -8 .cfi_restore %r13 popq %r12 .cfi_adjust_cfa_offset -8 .cfi_restore %r12 popq %rbp .cfi_adjust_cfa_offset -8 .cfi_restore %rbp popq %rbx .cfi_adjust_cfa_offset -8 .cfi_restore %rbx .byte 0xf3,0xc3 .Ldecrypt_seh_end: .cfi_endproc .size ossl_aes_gcm_decrypt_avx512, .-ossl_aes_gcm_decrypt_avx512 .globl ossl_aes_gcm_finalize_avx512 .type ossl_aes_gcm_finalize_avx512,@function .align 32 ossl_aes_gcm_finalize_avx512: .cfi_startproc .byte 243,15,30,250 vmovdqu 336(%rdi),%xmm2 vmovdqu 32(%rdi),%xmm3 vmovdqu 64(%rdi),%xmm4 cmpq $0,%rsi je .L_partial_done_sAyBcyeiDCmpxul vpclmulqdq $0x11,%xmm2,%xmm4,%xmm0 vpclmulqdq $0x00,%xmm2,%xmm4,%xmm16 vpclmulqdq $0x01,%xmm2,%xmm4,%xmm17 vpclmulqdq $0x10,%xmm2,%xmm4,%xmm4 vpxorq %xmm17,%xmm4,%xmm4 vpsrldq $8,%xmm4,%xmm17 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm17,%xmm0,%xmm0 vpxorq %xmm16,%xmm4,%xmm4 vmovdqu64 POLY2(%rip),%xmm17 vpclmulqdq $0x01,%xmm4,%xmm17,%xmm16 vpslldq $8,%xmm16,%xmm16 vpxorq %xmm16,%xmm4,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm17,%xmm16 vpsrldq $4,%xmm16,%xmm16 vpclmulqdq $0x10,%xmm4,%xmm17,%xmm4 vpslldq $4,%xmm4,%xmm4 vpternlogq $0x96,%xmm16,%xmm0,%xmm4 .L_partial_done_sAyBcyeiDCmpxul: vmovq 56(%rdi),%xmm5 vpinsrq $1,48(%rdi),%xmm5,%xmm5 vpsllq $3,%xmm5,%xmm5 vpxor %xmm5,%xmm4,%xmm4 vpclmulqdq $0x11,%xmm2,%xmm4,%xmm0 vpclmulqdq $0x00,%xmm2,%xmm4,%xmm16 vpclmulqdq $0x01,%xmm2,%xmm4,%xmm17 vpclmulqdq $0x10,%xmm2,%xmm4,%xmm4 vpxorq %xmm17,%xmm4,%xmm4 vpsrldq $8,%xmm4,%xmm17 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm17,%xmm0,%xmm0 vpxorq %xmm16,%xmm4,%xmm4 vmovdqu64 POLY2(%rip),%xmm17 vpclmulqdq $0x01,%xmm4,%xmm17,%xmm16 vpslldq $8,%xmm16,%xmm16 vpxorq %xmm16,%xmm4,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm17,%xmm16 vpsrldq $4,%xmm16,%xmm16 vpclmulqdq $0x10,%xmm4,%xmm17,%xmm4 vpslldq $4,%xmm4,%xmm4 vpternlogq $0x96,%xmm16,%xmm0,%xmm4 vpshufb SHUF_MASK(%rip),%xmm4,%xmm4 vpxor %xmm4,%xmm3,%xmm3 .L_return_T_sAyBcyeiDCmpxul: vmovdqu %xmm3,64(%rdi) .Labort_finalize: .byte 0xf3,0xc3 .cfi_endproc .size ossl_aes_gcm_finalize_avx512, .-ossl_aes_gcm_finalize_avx512 .globl ossl_gcm_gmult_avx512 .hidden ossl_gcm_gmult_avx512 .type ossl_gcm_gmult_avx512,@function .align 32 ossl_gcm_gmult_avx512: .cfi_startproc .byte 243,15,30,250 vmovdqu64 (%rdi),%xmm1 vmovdqu64 336(%rsi),%xmm2 vpclmulqdq $0x11,%xmm2,%xmm1,%xmm3 vpclmulqdq $0x00,%xmm2,%xmm1,%xmm4 vpclmulqdq $0x01,%xmm2,%xmm1,%xmm5 vpclmulqdq $0x10,%xmm2,%xmm1,%xmm1 vpxorq %xmm5,%xmm1,%xmm1 vpsrldq $8,%xmm1,%xmm5 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm5,%xmm3,%xmm3 vpxorq %xmm4,%xmm1,%xmm1 vmovdqu64 POLY2(%rip),%xmm5 vpclmulqdq $0x01,%xmm1,%xmm5,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm1,%xmm1 vpclmulqdq $0x00,%xmm1,%xmm5,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm5,%xmm1 vpslldq $4,%xmm1,%xmm1 vpternlogq $0x96,%xmm4,%xmm3,%xmm1 vmovdqu64 %xmm1,(%rdi) vzeroupper .Labort_gmult: .byte 0xf3,0xc3 .cfi_endproc .size ossl_gcm_gmult_avx512, .-ossl_gcm_gmult_avx512 .data .align 16 POLY:.quad 0x0000000000000001, 0xC200000000000000 .align 64 POLY2: .quad 0x00000001C2000000, 0xC200000000000000 .quad 0x00000001C2000000, 0xC200000000000000 .quad 0x00000001C2000000, 0xC200000000000000 .quad 0x00000001C2000000, 0xC200000000000000 .align 16 TWOONE:.quad 0x0000000000000001, 0x0000000100000000 .align 64 SHUF_MASK: .quad 0x08090A0B0C0D0E0F, 0x0001020304050607 .quad 0x08090A0B0C0D0E0F, 0x0001020304050607 .quad 0x08090A0B0C0D0E0F, 0x0001020304050607 .quad 0x08090A0B0C0D0E0F, 0x0001020304050607 .align 16 SHIFT_MASK: .quad 0x0706050403020100, 0x0f0e0d0c0b0a0908 ALL_F: .quad 0xffffffffffffffff, 0xffffffffffffffff ZERO: .quad 0x0000000000000000, 0x0000000000000000 .align 16 ONE: .quad 0x0000000000000001, 0x0000000000000000 .align 16 ONEf: .quad 0x0000000000000000, 0x0100000000000000 .align 64 ddq_add_1234: .quad 0x0000000000000001, 0x0000000000000000 .quad 0x0000000000000002, 0x0000000000000000 .quad 0x0000000000000003, 0x0000000000000000 .quad 0x0000000000000004, 0x0000000000000000 .align 64 ddq_add_5678: .quad 0x0000000000000005, 0x0000000000000000 .quad 0x0000000000000006, 0x0000000000000000 .quad 0x0000000000000007, 0x0000000000000000 .quad 0x0000000000000008, 0x0000000000000000 .align 64 ddq_add_4444: .quad 0x0000000000000004, 0x0000000000000000 .quad 0x0000000000000004, 0x0000000000000000 .quad 0x0000000000000004, 0x0000000000000000 .quad 0x0000000000000004, 0x0000000000000000 .align 64 ddq_add_8888: .quad 0x0000000000000008, 0x0000000000000000 .quad 0x0000000000000008, 0x0000000000000000 .quad 0x0000000000000008, 0x0000000000000000 .quad 0x0000000000000008, 0x0000000000000000 .align 64 ddq_addbe_1234: .quad 0x0000000000000000, 0x0100000000000000 .quad 0x0000000000000000, 0x0200000000000000 .quad 0x0000000000000000, 0x0300000000000000 .quad 0x0000000000000000, 0x0400000000000000 .align 64 ddq_addbe_4444: .quad 0x0000000000000000, 0x0400000000000000 .quad 0x0000000000000000, 0x0400000000000000 .quad 0x0000000000000000, 0x0400000000000000 .quad 0x0000000000000000, 0x0400000000000000 .align 64 byte_len_to_mask_table: .value 0x0000, 0x0001, 0x0003, 0x0007 .value 0x000f, 0x001f, 0x003f, 0x007f .value 0x00ff, 0x01ff, 0x03ff, 0x07ff .value 0x0fff, 0x1fff, 0x3fff, 0x7fff .value 0xffff .align 64 byte64_len_to_mask_table: .quad 0x0000000000000000, 0x0000000000000001 .quad 0x0000000000000003, 0x0000000000000007 .quad 0x000000000000000f, 0x000000000000001f .quad 0x000000000000003f, 0x000000000000007f .quad 0x00000000000000ff, 0x00000000000001ff .quad 0x00000000000003ff, 0x00000000000007ff .quad 0x0000000000000fff, 0x0000000000001fff .quad 0x0000000000003fff, 0x0000000000007fff .quad 0x000000000000ffff, 0x000000000001ffff .quad 0x000000000003ffff, 0x000000000007ffff .quad 0x00000000000fffff, 0x00000000001fffff .quad 0x00000000003fffff, 0x00000000007fffff .quad 0x0000000000ffffff, 0x0000000001ffffff .quad 0x0000000003ffffff, 0x0000000007ffffff .quad 0x000000000fffffff, 0x000000001fffffff .quad 0x000000003fffffff, 0x000000007fffffff .quad 0x00000000ffffffff, 0x00000001ffffffff .quad 0x00000003ffffffff, 0x00000007ffffffff .quad 0x0000000fffffffff, 0x0000001fffffffff .quad 0x0000003fffffffff, 0x0000007fffffffff .quad 0x000000ffffffffff, 0x000001ffffffffff .quad 0x000003ffffffffff, 0x000007ffffffffff .quad 0x00000fffffffffff, 0x00001fffffffffff .quad 0x00003fffffffffff, 0x00007fffffffffff .quad 0x0000ffffffffffff, 0x0001ffffffffffff .quad 0x0003ffffffffffff, 0x0007ffffffffffff .quad 0x000fffffffffffff, 0x001fffffffffffff .quad 0x003fffffffffffff, 0x007fffffffffffff .quad 0x00ffffffffffffff, 0x01ffffffffffffff .quad 0x03ffffffffffffff, 0x07ffffffffffffff .quad 0x0fffffffffffffff, 0x1fffffffffffffff .quad 0x3fffffffffffffff, 0x7fffffffffffffff .quad 0xffffffffffffffff .section ".note.gnu.property", "a" .p2align 3 .long 1f - 0f .long 4f - 1f .long 5 0: # "GNU" encoded with .byte, since .asciz isn't supported # on Solaris. .byte 0x47 .byte 0x4e .byte 0x55 .byte 0 1: .p2align 3 .long 0xc0000002 .long 3f - 2f 2: .long 3 3: .p2align 3 4: