/linux/arch/x86/crypto/ |
H A D | sm4-aesni-avx-asm_64.S | 37 #define RB1 %xmm13 macro 181 vmovdqa .Lpost_tf_lo_s rRIP, RB1; 197 transform_post(RX0, RB1, RB2, MASK_4BIT, RTMP0); \ 268 vpshufb RTMP2, RB1, RB1; 274 transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); 333 ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3); 334 ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0); 335 ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1); 336 ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2); 346 transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); [all …]
|
H A D | sm4-aesni-avx2-asm_64.S | 38 #define RB1 %ymm13 macro 174 vpshufb RTMP2, RB1, RB1; 180 transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); 245 ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3); 246 ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0); 247 ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1); 248 ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2); 258 transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1); 264 vpshufb RTMP2, RB1, RB1; 323 vpshufb RTMP3, RTMP0, RB1; [all …]
|
H A D | cast6-avx-x86_64-asm_64.S | 38 #define RB1 %xmm1 macro 269 inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); 293 outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); 317 inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); 340 outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); 358 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 362 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 381 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 385 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 406 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); [all …]
|
H A D | twofish-avx-x86_64-asm_64.S | 38 #define RB1 %xmm1 macro 245 inpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); 266 outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); 285 inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); 305 outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); 321 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 325 store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); 341 load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); 345 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 364 load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); [all …]
|
H A D | serpent-avx-x86_64-asm_64.S | 30 #define RB1 %xmm1 macro 563 read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); 600 write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); 616 read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); 653 write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); 667 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 671 store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 685 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 689 store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); 703 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); [all …]
|
H A D | serpent-avx2-asm_64.S | 32 #define RB1 %ymm4 macro 563 read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); 600 write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); 616 read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); 653 write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); 669 load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 673 store_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 691 load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 695 store_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); 713 load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); [all …]
|
H A D | serpent-sse2-x86_64-asm_64.S | 23 #define RB1 %xmm1 macro 633 read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 675 write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 681 xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 697 read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 735 write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
|