1/* 2 * Intel SHA Extensions optimized implementation of a SHA-256 update function 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * Copyright(c) 2015 Intel Corporation. 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of version 2 of the GNU General Public License as 13 * published by the Free Software Foundation. 14 * 15 * This program is distributed in the hope that it will be useful, but 16 * WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * Contact Information: 21 * Sean Gulley <sean.m.gulley@intel.com> 22 * Tim Chen <tim.c.chen@linux.intel.com> 23 * 24 * BSD LICENSE 25 * 26 * Copyright(c) 2015 Intel Corporation. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 32 * * Redistributions of source code must retain the above copyright 33 * notice, this list of conditions and the following disclaimer. 34 * * Redistributions in binary form must reproduce the above copyright 35 * notice, this list of conditions and the following disclaimer in 36 * the documentation and/or other materials provided with the 37 * distribution. 38 * * Neither the name of Intel Corporation nor the names of its 39 * contributors may be used to endorse or promote products derived 40 * from this software without specific prior written permission. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53 * 54 */ 55 56#include <linux/linkage.h> 57 58#define STATE_PTR %rdi /* 1st arg */ 59#define DATA_PTR %rsi /* 2nd arg */ 60#define NUM_BLKS %rdx /* 3rd arg */ 61 62#define SHA256CONSTANTS %rax 63 64#define MSG %xmm0 /* sha256rnds2 implicit operand */ 65#define STATE0 %xmm1 66#define STATE1 %xmm2 67#define MSG0 %xmm3 68#define MSG1 %xmm4 69#define MSG2 %xmm5 70#define MSG3 %xmm6 71#define TMP %xmm7 72 73#define SHUF_MASK %xmm8 74 75#define ABEF_SAVE %xmm9 76#define CDGH_SAVE %xmm10 77 78.macro do_4rounds i, m0, m1, m2, m3 79.if \i < 16 80 movdqu \i*4(DATA_PTR), \m0 81 pshufb SHUF_MASK, \m0 82.endif 83 movdqa (\i-32)*4(SHA256CONSTANTS), MSG 84 paddd \m0, MSG 85 sha256rnds2 STATE0, STATE1 86.if \i >= 12 && \i < 60 87 movdqa \m0, TMP 88 palignr $4, \m3, TMP 89 paddd TMP, \m1 90 sha256msg2 \m0, \m1 91.endif 92 punpckhqdq MSG, MSG 93 sha256rnds2 STATE1, STATE0 94.if \i >= 4 && \i < 52 95 sha256msg1 \m0, \m3 96.endif 97.endm 98 99/* 100 * Intel SHA Extensions optimized implementation of a SHA-256 block function 101 * 102 * This function takes a pointer to the current SHA-256 state, a pointer to the 103 * input data, and the number of 64-byte blocks to process. Once all blocks 104 * have been processed, the state is updated with the new state. This function 105 * only processes complete blocks. State initialization, buffering of partial 106 * blocks, and digest finalization is expected to be handled elsewhere. 107 * 108 * void sha256_ni_transform(struct sha256_block_state *state, 109 * const u8 *data, size_t nblocks); 110 */ 111.text 112SYM_FUNC_START(sha256_ni_transform) 113 114 shl $6, NUM_BLKS /* convert to bytes */ 115 add DATA_PTR, NUM_BLKS /* pointer to end of data */ 116 117 /* 118 * load initial hash values 119 * Need to reorder these appropriately 120 * DCBA, HGFE -> ABEF, CDGH 121 */ 122 movdqu 0*16(STATE_PTR), STATE0 /* DCBA */ 123 movdqu 1*16(STATE_PTR), STATE1 /* HGFE */ 124 125 movdqa STATE0, TMP 126 punpcklqdq STATE1, STATE0 /* FEBA */ 127 punpckhqdq TMP, STATE1 /* DCHG */ 128 pshufd $0x1B, STATE0, STATE0 /* ABEF */ 129 pshufd $0xB1, STATE1, STATE1 /* CDGH */ 130 131 movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK 132 lea K256+32*4(%rip), SHA256CONSTANTS 133 134.Lloop0: 135 /* Save hash values for addition after rounds */ 136 movdqa STATE0, ABEF_SAVE 137 movdqa STATE1, CDGH_SAVE 138 139.irp i, 0, 16, 32, 48 140 do_4rounds (\i + 0), MSG0, MSG1, MSG2, MSG3 141 do_4rounds (\i + 4), MSG1, MSG2, MSG3, MSG0 142 do_4rounds (\i + 8), MSG2, MSG3, MSG0, MSG1 143 do_4rounds (\i + 12), MSG3, MSG0, MSG1, MSG2 144.endr 145 146 /* Add current hash values with previously saved */ 147 paddd ABEF_SAVE, STATE0 148 paddd CDGH_SAVE, STATE1 149 150 /* Increment data pointer and loop if more to process */ 151 add $64, DATA_PTR 152 cmp NUM_BLKS, DATA_PTR 153 jne .Lloop0 154 155 /* Write hash values back in the correct order */ 156 movdqa STATE0, TMP 157 punpcklqdq STATE1, STATE0 /* GHEF */ 158 punpckhqdq TMP, STATE1 /* ABCD */ 159 pshufd $0xB1, STATE0, STATE0 /* HGFE */ 160 pshufd $0x1B, STATE1, STATE1 /* DCBA */ 161 162 movdqu STATE1, 0*16(STATE_PTR) 163 movdqu STATE0, 1*16(STATE_PTR) 164 165 RET 166SYM_FUNC_END(sha256_ni_transform) 167 168.section .rodata.cst256.K256, "aM", @progbits, 256 169.align 64 170K256: 171 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 172 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 173 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 174 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 175 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 176 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 177 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 178 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 179 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 180 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 181 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 182 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 183 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 184 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 185 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 186 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 187 188.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 189.align 16 190PSHUFFLE_BYTE_FLIP_MASK: 191 .octa 0x0c0d0e0f08090a0b0405060700010203 192