1/* 2 * Intel SHA Extensions optimized implementation of a SHA-256 update function 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * Copyright(c) 2015 Intel Corporation. 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of version 2 of the GNU General Public License as 13 * published by the Free Software Foundation. 14 * 15 * This program is distributed in the hope that it will be useful, but 16 * WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * Contact Information: 21 * Sean Gulley <sean.m.gulley@intel.com> 22 * Tim Chen <tim.c.chen@linux.intel.com> 23 * 24 * BSD LICENSE 25 * 26 * Copyright(c) 2015 Intel Corporation. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 32 * * Redistributions of source code must retain the above copyright 33 * notice, this list of conditions and the following disclaimer. 34 * * Redistributions in binary form must reproduce the above copyright 35 * notice, this list of conditions and the following disclaimer in 36 * the documentation and/or other materials provided with the 37 * distribution. 38 * * Neither the name of Intel Corporation nor the names of its 39 * contributors may be used to endorse or promote products derived 40 * from this software without specific prior written permission. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53 * 54 */ 55 56#include <linux/linkage.h> 57#include <linux/cfi_types.h> 58 59#define DIGEST_PTR %rdi /* 1st arg */ 60#define DATA_PTR %rsi /* 2nd arg */ 61#define NUM_BLKS %rdx /* 3rd arg */ 62 63#define SHA256CONSTANTS %rax 64 65#define MSG %xmm0 /* sha256rnds2 implicit operand */ 66#define STATE0 %xmm1 67#define STATE1 %xmm2 68#define MSG0 %xmm3 69#define MSG1 %xmm4 70#define MSG2 %xmm5 71#define MSG3 %xmm6 72#define TMP %xmm7 73 74#define SHUF_MASK %xmm8 75 76#define ABEF_SAVE %xmm9 77#define CDGH_SAVE %xmm10 78 79.macro do_4rounds i, m0, m1, m2, m3 80.if \i < 16 81 movdqu \i*4(DATA_PTR), \m0 82 pshufb SHUF_MASK, \m0 83.endif 84 movdqa (\i-32)*4(SHA256CONSTANTS), MSG 85 paddd \m0, MSG 86 sha256rnds2 STATE0, STATE1 87.if \i >= 12 && \i < 60 88 movdqa \m0, TMP 89 palignr $4, \m3, TMP 90 paddd TMP, \m1 91 sha256msg2 \m0, \m1 92.endif 93 punpckhqdq MSG, MSG 94 sha256rnds2 STATE1, STATE0 95.if \i >= 4 && \i < 52 96 sha256msg1 \m0, \m3 97.endif 98.endm 99 100/* 101 * Intel SHA Extensions optimized implementation of a SHA-256 update function 102 * 103 * The function takes a pointer to the current hash values, a pointer to the 104 * input data, and a number of 64 byte blocks to process. Once all blocks have 105 * been processed, the digest pointer is updated with the resulting hash value. 106 * The function only processes complete blocks, there is no functionality to 107 * store partial blocks. All message padding and hash value initialization must 108 * be done outside the update function. 109 * 110 * void sha256_ni_transform(uint32_t *digest, const void *data, 111 uint32_t numBlocks); 112 * digest : pointer to digest 113 * data: pointer to input data 114 * numBlocks: Number of blocks to process 115 */ 116 117.text 118SYM_TYPED_FUNC_START(sha256_ni_transform) 119 120 shl $6, NUM_BLKS /* convert to bytes */ 121 jz .Ldone_hash 122 add DATA_PTR, NUM_BLKS /* pointer to end of data */ 123 124 /* 125 * load initial hash values 126 * Need to reorder these appropriately 127 * DCBA, HGFE -> ABEF, CDGH 128 */ 129 movdqu 0*16(DIGEST_PTR), STATE0 /* DCBA */ 130 movdqu 1*16(DIGEST_PTR), STATE1 /* HGFE */ 131 132 movdqa STATE0, TMP 133 punpcklqdq STATE1, STATE0 /* FEBA */ 134 punpckhqdq TMP, STATE1 /* DCHG */ 135 pshufd $0x1B, STATE0, STATE0 /* ABEF */ 136 pshufd $0xB1, STATE1, STATE1 /* CDGH */ 137 138 movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK 139 lea K256+32*4(%rip), SHA256CONSTANTS 140 141.Lloop0: 142 /* Save hash values for addition after rounds */ 143 movdqa STATE0, ABEF_SAVE 144 movdqa STATE1, CDGH_SAVE 145 146.irp i, 0, 16, 32, 48 147 do_4rounds (\i + 0), MSG0, MSG1, MSG2, MSG3 148 do_4rounds (\i + 4), MSG1, MSG2, MSG3, MSG0 149 do_4rounds (\i + 8), MSG2, MSG3, MSG0, MSG1 150 do_4rounds (\i + 12), MSG3, MSG0, MSG1, MSG2 151.endr 152 153 /* Add current hash values with previously saved */ 154 paddd ABEF_SAVE, STATE0 155 paddd CDGH_SAVE, STATE1 156 157 /* Increment data pointer and loop if more to process */ 158 add $64, DATA_PTR 159 cmp NUM_BLKS, DATA_PTR 160 jne .Lloop0 161 162 /* Write hash values back in the correct order */ 163 movdqa STATE0, TMP 164 punpcklqdq STATE1, STATE0 /* GHEF */ 165 punpckhqdq TMP, STATE1 /* ABCD */ 166 pshufd $0xB1, STATE0, STATE0 /* HGFE */ 167 pshufd $0x1B, STATE1, STATE1 /* DCBA */ 168 169 movdqu STATE1, 0*16(DIGEST_PTR) 170 movdqu STATE0, 1*16(DIGEST_PTR) 171 172.Ldone_hash: 173 174 RET 175SYM_FUNC_END(sha256_ni_transform) 176 177.section .rodata.cst256.K256, "aM", @progbits, 256 178.align 64 179K256: 180 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 181 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 182 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 183 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 184 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 185 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 186 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 187 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 188 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 189 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 190 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 191 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 192 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 193 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 194 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 195 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 196 197.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 198.align 16 199PSHUFFLE_BYTE_FLIP_MASK: 200 .octa 0x0c0d0e0f08090a0b0405060700010203 201