174750aa7SEric Biggers/* 274750aa7SEric Biggers * Intel SHA Extensions optimized implementation of a SHA-256 update function 374750aa7SEric Biggers * 474750aa7SEric Biggers * This file is provided under a dual BSD/GPLv2 license. When using or 574750aa7SEric Biggers * redistributing this file, you may do so under either license. 674750aa7SEric Biggers * 774750aa7SEric Biggers * GPL LICENSE SUMMARY 874750aa7SEric Biggers * 974750aa7SEric Biggers * Copyright(c) 2015 Intel Corporation. 1074750aa7SEric Biggers * 1174750aa7SEric Biggers * This program is free software; you can redistribute it and/or modify 1274750aa7SEric Biggers * it under the terms of version 2 of the GNU General Public License as 1374750aa7SEric Biggers * published by the Free Software Foundation. 1474750aa7SEric Biggers * 1574750aa7SEric Biggers * This program is distributed in the hope that it will be useful, but 1674750aa7SEric Biggers * WITHOUT ANY WARRANTY; without even the implied warranty of 1774750aa7SEric Biggers * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1874750aa7SEric Biggers * General Public License for more details. 1974750aa7SEric Biggers * 2074750aa7SEric Biggers * Contact Information: 2174750aa7SEric Biggers * Sean Gulley <sean.m.gulley@intel.com> 2274750aa7SEric Biggers * Tim Chen <tim.c.chen@linux.intel.com> 2374750aa7SEric Biggers * 2474750aa7SEric Biggers * BSD LICENSE 2574750aa7SEric Biggers * 2674750aa7SEric Biggers * Copyright(c) 2015 Intel Corporation. 2774750aa7SEric Biggers * 2874750aa7SEric Biggers * Redistribution and use in source and binary forms, with or without 2974750aa7SEric Biggers * modification, are permitted provided that the following conditions 3074750aa7SEric Biggers * are met: 3174750aa7SEric Biggers * 3274750aa7SEric Biggers * * Redistributions of source code must retain the above copyright 3374750aa7SEric Biggers * notice, this list of conditions and the following disclaimer. 3474750aa7SEric Biggers * * Redistributions in binary form must reproduce the above copyright 3574750aa7SEric Biggers * notice, this list of conditions and the following disclaimer in 3674750aa7SEric Biggers * the documentation and/or other materials provided with the 3774750aa7SEric Biggers * distribution. 3874750aa7SEric Biggers * * Neither the name of Intel Corporation nor the names of its 3974750aa7SEric Biggers * contributors may be used to endorse or promote products derived 4074750aa7SEric Biggers * from this software without specific prior written permission. 4174750aa7SEric Biggers * 4274750aa7SEric Biggers * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 4374750aa7SEric Biggers * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 4474750aa7SEric Biggers * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 4574750aa7SEric Biggers * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 4674750aa7SEric Biggers * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 4774750aa7SEric Biggers * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 4874750aa7SEric Biggers * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 4974750aa7SEric Biggers * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 5074750aa7SEric Biggers * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 5174750aa7SEric Biggers * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 5274750aa7SEric Biggers * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 5374750aa7SEric Biggers * 5474750aa7SEric Biggers */ 5574750aa7SEric Biggers 5674750aa7SEric Biggers#include <linux/linkage.h> 5774750aa7SEric Biggers 5874750aa7SEric Biggers#define STATE_PTR %rdi /* 1st arg */ 5974750aa7SEric Biggers#define DATA_PTR %rsi /* 2nd arg */ 6074750aa7SEric Biggers#define NUM_BLKS %rdx /* 3rd arg */ 6174750aa7SEric Biggers 6274750aa7SEric Biggers#define SHA256CONSTANTS %rax 6374750aa7SEric Biggers 6474750aa7SEric Biggers#define MSG %xmm0 /* sha256rnds2 implicit operand */ 6574750aa7SEric Biggers#define STATE0 %xmm1 6674750aa7SEric Biggers#define STATE1 %xmm2 6774750aa7SEric Biggers#define MSG0 %xmm3 6874750aa7SEric Biggers#define MSG1 %xmm4 6974750aa7SEric Biggers#define MSG2 %xmm5 7074750aa7SEric Biggers#define MSG3 %xmm6 7174750aa7SEric Biggers#define TMP %xmm7 7274750aa7SEric Biggers 7374750aa7SEric Biggers#define SHUF_MASK %xmm8 7474750aa7SEric Biggers 7574750aa7SEric Biggers#define ABEF_SAVE %xmm9 7674750aa7SEric Biggers#define CDGH_SAVE %xmm10 7774750aa7SEric Biggers 7874750aa7SEric Biggers.macro do_4rounds i, m0, m1, m2, m3 7974750aa7SEric Biggers.if \i < 16 8074750aa7SEric Biggers movdqu \i*4(DATA_PTR), \m0 8174750aa7SEric Biggers pshufb SHUF_MASK, \m0 8274750aa7SEric Biggers.endif 8374750aa7SEric Biggers movdqa (\i-32)*4(SHA256CONSTANTS), MSG 8474750aa7SEric Biggers paddd \m0, MSG 8574750aa7SEric Biggers sha256rnds2 STATE0, STATE1 8674750aa7SEric Biggers.if \i >= 12 && \i < 60 8774750aa7SEric Biggers movdqa \m0, TMP 8874750aa7SEric Biggers palignr $4, \m3, TMP 8974750aa7SEric Biggers paddd TMP, \m1 9074750aa7SEric Biggers sha256msg2 \m0, \m1 9174750aa7SEric Biggers.endif 9274750aa7SEric Biggers punpckhqdq MSG, MSG 9374750aa7SEric Biggers sha256rnds2 STATE1, STATE0 9474750aa7SEric Biggers.if \i >= 4 && \i < 52 9574750aa7SEric Biggers sha256msg1 \m0, \m3 9674750aa7SEric Biggers.endif 9774750aa7SEric Biggers.endm 9874750aa7SEric Biggers 9974750aa7SEric Biggers/* 10074750aa7SEric Biggers * Intel SHA Extensions optimized implementation of a SHA-256 block function 10174750aa7SEric Biggers * 10274750aa7SEric Biggers * This function takes a pointer to the current SHA-256 state, a pointer to the 10374750aa7SEric Biggers * input data, and the number of 64-byte blocks to process. Once all blocks 10474750aa7SEric Biggers * have been processed, the state is updated with the new state. This function 10574750aa7SEric Biggers * only processes complete blocks. State initialization, buffering of partial 10674750aa7SEric Biggers * blocks, and digest finalization is expected to be handled elsewhere. 10774750aa7SEric Biggers * 108*4c855d50SEric Biggers * void sha256_ni_transform(struct sha256_block_state *state, 10974750aa7SEric Biggers * const u8 *data, size_t nblocks); 11074750aa7SEric Biggers */ 11174750aa7SEric Biggers.text 11274750aa7SEric BiggersSYM_FUNC_START(sha256_ni_transform) 11374750aa7SEric Biggers 11474750aa7SEric Biggers shl $6, NUM_BLKS /* convert to bytes */ 11574750aa7SEric Biggers add DATA_PTR, NUM_BLKS /* pointer to end of data */ 11674750aa7SEric Biggers 11774750aa7SEric Biggers /* 11874750aa7SEric Biggers * load initial hash values 11974750aa7SEric Biggers * Need to reorder these appropriately 12074750aa7SEric Biggers * DCBA, HGFE -> ABEF, CDGH 12174750aa7SEric Biggers */ 12274750aa7SEric Biggers movdqu 0*16(STATE_PTR), STATE0 /* DCBA */ 12374750aa7SEric Biggers movdqu 1*16(STATE_PTR), STATE1 /* HGFE */ 12474750aa7SEric Biggers 12574750aa7SEric Biggers movdqa STATE0, TMP 12674750aa7SEric Biggers punpcklqdq STATE1, STATE0 /* FEBA */ 12774750aa7SEric Biggers punpckhqdq TMP, STATE1 /* DCHG */ 12874750aa7SEric Biggers pshufd $0x1B, STATE0, STATE0 /* ABEF */ 12974750aa7SEric Biggers pshufd $0xB1, STATE1, STATE1 /* CDGH */ 13074750aa7SEric Biggers 13174750aa7SEric Biggers movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK 13274750aa7SEric Biggers lea K256+32*4(%rip), SHA256CONSTANTS 13374750aa7SEric Biggers 13474750aa7SEric Biggers.Lloop0: 13574750aa7SEric Biggers /* Save hash values for addition after rounds */ 13674750aa7SEric Biggers movdqa STATE0, ABEF_SAVE 13774750aa7SEric Biggers movdqa STATE1, CDGH_SAVE 13874750aa7SEric Biggers 13974750aa7SEric Biggers.irp i, 0, 16, 32, 48 14074750aa7SEric Biggers do_4rounds (\i + 0), MSG0, MSG1, MSG2, MSG3 14174750aa7SEric Biggers do_4rounds (\i + 4), MSG1, MSG2, MSG3, MSG0 14274750aa7SEric Biggers do_4rounds (\i + 8), MSG2, MSG3, MSG0, MSG1 14374750aa7SEric Biggers do_4rounds (\i + 12), MSG3, MSG0, MSG1, MSG2 14474750aa7SEric Biggers.endr 14574750aa7SEric Biggers 14674750aa7SEric Biggers /* Add current hash values with previously saved */ 14774750aa7SEric Biggers paddd ABEF_SAVE, STATE0 14874750aa7SEric Biggers paddd CDGH_SAVE, STATE1 14974750aa7SEric Biggers 15074750aa7SEric Biggers /* Increment data pointer and loop if more to process */ 15174750aa7SEric Biggers add $64, DATA_PTR 15274750aa7SEric Biggers cmp NUM_BLKS, DATA_PTR 15374750aa7SEric Biggers jne .Lloop0 15474750aa7SEric Biggers 15574750aa7SEric Biggers /* Write hash values back in the correct order */ 15674750aa7SEric Biggers movdqa STATE0, TMP 15774750aa7SEric Biggers punpcklqdq STATE1, STATE0 /* GHEF */ 15874750aa7SEric Biggers punpckhqdq TMP, STATE1 /* ABCD */ 15974750aa7SEric Biggers pshufd $0xB1, STATE0, STATE0 /* HGFE */ 16074750aa7SEric Biggers pshufd $0x1B, STATE1, STATE1 /* DCBA */ 16174750aa7SEric Biggers 16274750aa7SEric Biggers movdqu STATE1, 0*16(STATE_PTR) 16374750aa7SEric Biggers movdqu STATE0, 1*16(STATE_PTR) 16474750aa7SEric Biggers 16574750aa7SEric Biggers RET 16674750aa7SEric BiggersSYM_FUNC_END(sha256_ni_transform) 16774750aa7SEric Biggers 16874750aa7SEric Biggers.section .rodata.cst256.K256, "aM", @progbits, 256 16974750aa7SEric Biggers.align 64 17074750aa7SEric BiggersK256: 17174750aa7SEric Biggers .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 17274750aa7SEric Biggers .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 17374750aa7SEric Biggers .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 17474750aa7SEric Biggers .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 17574750aa7SEric Biggers .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 17674750aa7SEric Biggers .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 17774750aa7SEric Biggers .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 17874750aa7SEric Biggers .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 17974750aa7SEric Biggers .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 18074750aa7SEric Biggers .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 18174750aa7SEric Biggers .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 18274750aa7SEric Biggers .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 18374750aa7SEric Biggers .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 18474750aa7SEric Biggers .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 18574750aa7SEric Biggers .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 18674750aa7SEric Biggers .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 18774750aa7SEric Biggers 18874750aa7SEric Biggers.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 18974750aa7SEric Biggers.align 16 19074750aa7SEric BiggersPSHUFFLE_BYTE_FLIP_MASK: 19174750aa7SEric Biggers .octa 0x0c0d0e0f08090a0b0405060700010203 192