1*f3d6cb3dSEric Biggers/* 2*f3d6cb3dSEric Biggers * Intel SHA Extensions optimized implementation of a SHA-1 update function 3*f3d6cb3dSEric Biggers * 4*f3d6cb3dSEric Biggers * This file is provided under a dual BSD/GPLv2 license. When using or 5*f3d6cb3dSEric Biggers * redistributing this file, you may do so under either license. 6*f3d6cb3dSEric Biggers * 7*f3d6cb3dSEric Biggers * GPL LICENSE SUMMARY 8*f3d6cb3dSEric Biggers * 9*f3d6cb3dSEric Biggers * Copyright(c) 2015 Intel Corporation. 10*f3d6cb3dSEric Biggers * 11*f3d6cb3dSEric Biggers * This program is free software; you can redistribute it and/or modify 12*f3d6cb3dSEric Biggers * it under the terms of version 2 of the GNU General Public License as 13*f3d6cb3dSEric Biggers * published by the Free Software Foundation. 14*f3d6cb3dSEric Biggers * 15*f3d6cb3dSEric Biggers * This program is distributed in the hope that it will be useful, but 16*f3d6cb3dSEric Biggers * WITHOUT ANY WARRANTY; without even the implied warranty of 17*f3d6cb3dSEric Biggers * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18*f3d6cb3dSEric Biggers * General Public License for more details. 19*f3d6cb3dSEric Biggers * 20*f3d6cb3dSEric Biggers * Contact Information: 21*f3d6cb3dSEric Biggers * Sean Gulley <sean.m.gulley@intel.com> 22*f3d6cb3dSEric Biggers * Tim Chen <tim.c.chen@linux.intel.com> 23*f3d6cb3dSEric Biggers * 24*f3d6cb3dSEric Biggers * BSD LICENSE 25*f3d6cb3dSEric Biggers * 26*f3d6cb3dSEric Biggers * Copyright(c) 2015 Intel Corporation. 27*f3d6cb3dSEric Biggers * 28*f3d6cb3dSEric Biggers * Redistribution and use in source and binary forms, with or without 29*f3d6cb3dSEric Biggers * modification, are permitted provided that the following conditions 30*f3d6cb3dSEric Biggers * are met: 31*f3d6cb3dSEric Biggers * 32*f3d6cb3dSEric Biggers * * Redistributions of source code must retain the above copyright 33*f3d6cb3dSEric Biggers * notice, this list of conditions and the following disclaimer. 34*f3d6cb3dSEric Biggers * * Redistributions in binary form must reproduce the above copyright 35*f3d6cb3dSEric Biggers * notice, this list of conditions and the following disclaimer in 36*f3d6cb3dSEric Biggers * the documentation and/or other materials provided with the 37*f3d6cb3dSEric Biggers * distribution. 38*f3d6cb3dSEric Biggers * * Neither the name of Intel Corporation nor the names of its 39*f3d6cb3dSEric Biggers * contributors may be used to endorse or promote products derived 40*f3d6cb3dSEric Biggers * from this software without specific prior written permission. 41*f3d6cb3dSEric Biggers * 42*f3d6cb3dSEric Biggers * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 43*f3d6cb3dSEric Biggers * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 44*f3d6cb3dSEric Biggers * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 45*f3d6cb3dSEric Biggers * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 46*f3d6cb3dSEric Biggers * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 47*f3d6cb3dSEric Biggers * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 48*f3d6cb3dSEric Biggers * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 49*f3d6cb3dSEric Biggers * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 50*f3d6cb3dSEric Biggers * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 51*f3d6cb3dSEric Biggers * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 52*f3d6cb3dSEric Biggers * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53*f3d6cb3dSEric Biggers * 54*f3d6cb3dSEric Biggers */ 55*f3d6cb3dSEric Biggers 56*f3d6cb3dSEric Biggers#include <linux/linkage.h> 57*f3d6cb3dSEric Biggers 58*f3d6cb3dSEric Biggers#define DIGEST_PTR %rdi /* 1st arg */ 59*f3d6cb3dSEric Biggers#define DATA_PTR %rsi /* 2nd arg */ 60*f3d6cb3dSEric Biggers#define NUM_BLKS %rdx /* 3rd arg */ 61*f3d6cb3dSEric Biggers 62*f3d6cb3dSEric Biggers/* gcc conversion */ 63*f3d6cb3dSEric Biggers#define FRAME_SIZE 32 /* space for 2x16 bytes */ 64*f3d6cb3dSEric Biggers 65*f3d6cb3dSEric Biggers#define ABCD %xmm0 66*f3d6cb3dSEric Biggers#define E0 %xmm1 /* Need two E's b/c they ping pong */ 67*f3d6cb3dSEric Biggers#define E1 %xmm2 68*f3d6cb3dSEric Biggers#define MSG0 %xmm3 69*f3d6cb3dSEric Biggers#define MSG1 %xmm4 70*f3d6cb3dSEric Biggers#define MSG2 %xmm5 71*f3d6cb3dSEric Biggers#define MSG3 %xmm6 72*f3d6cb3dSEric Biggers#define SHUF_MASK %xmm7 73*f3d6cb3dSEric Biggers 74*f3d6cb3dSEric Biggers 75*f3d6cb3dSEric Biggers/* 76*f3d6cb3dSEric Biggers * Intel SHA Extensions optimized implementation of a SHA-1 block function 77*f3d6cb3dSEric Biggers * 78*f3d6cb3dSEric Biggers * This function takes a pointer to the current SHA-1 state, a pointer to the 79*f3d6cb3dSEric Biggers * input data, and the number of 64-byte blocks to process. Once all blocks 80*f3d6cb3dSEric Biggers * have been processed, the state is updated with the new state. This function 81*f3d6cb3dSEric Biggers * only processes complete blocks. State initialization, buffering of partial 82*f3d6cb3dSEric Biggers * blocks, and digest finalization are expected to be handled elsewhere. 83*f3d6cb3dSEric Biggers * 84*f3d6cb3dSEric Biggers * The indented lines in the loop are instructions related to rounds processing. 85*f3d6cb3dSEric Biggers * The non-indented lines are instructions related to the message schedule. 86*f3d6cb3dSEric Biggers * 87*f3d6cb3dSEric Biggers * void sha1_ni_transform(struct sha1_block_state *state, 88*f3d6cb3dSEric Biggers * const u8 *data, size_t nblocks) 89*f3d6cb3dSEric Biggers */ 90*f3d6cb3dSEric Biggers.text 91*f3d6cb3dSEric BiggersSYM_FUNC_START(sha1_ni_transform) 92*f3d6cb3dSEric Biggers push %rbp 93*f3d6cb3dSEric Biggers mov %rsp, %rbp 94*f3d6cb3dSEric Biggers sub $FRAME_SIZE, %rsp 95*f3d6cb3dSEric Biggers and $~0xF, %rsp 96*f3d6cb3dSEric Biggers 97*f3d6cb3dSEric Biggers shl $6, NUM_BLKS /* convert to bytes */ 98*f3d6cb3dSEric Biggers jz .Ldone_hash 99*f3d6cb3dSEric Biggers add DATA_PTR, NUM_BLKS /* pointer to end of data */ 100*f3d6cb3dSEric Biggers 101*f3d6cb3dSEric Biggers /* load initial hash values */ 102*f3d6cb3dSEric Biggers pinsrd $3, 1*16(DIGEST_PTR), E0 103*f3d6cb3dSEric Biggers movdqu 0*16(DIGEST_PTR), ABCD 104*f3d6cb3dSEric Biggers pand UPPER_WORD_MASK(%rip), E0 105*f3d6cb3dSEric Biggers pshufd $0x1B, ABCD, ABCD 106*f3d6cb3dSEric Biggers 107*f3d6cb3dSEric Biggers movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK 108*f3d6cb3dSEric Biggers 109*f3d6cb3dSEric Biggers.Lloop0: 110*f3d6cb3dSEric Biggers /* Save hash values for addition after rounds */ 111*f3d6cb3dSEric Biggers movdqa E0, (0*16)(%rsp) 112*f3d6cb3dSEric Biggers movdqa ABCD, (1*16)(%rsp) 113*f3d6cb3dSEric Biggers 114*f3d6cb3dSEric Biggers /* Rounds 0-3 */ 115*f3d6cb3dSEric Biggers movdqu 0*16(DATA_PTR), MSG0 116*f3d6cb3dSEric Biggers pshufb SHUF_MASK, MSG0 117*f3d6cb3dSEric Biggers paddd MSG0, E0 118*f3d6cb3dSEric Biggers movdqa ABCD, E1 119*f3d6cb3dSEric Biggers sha1rnds4 $0, E0, ABCD 120*f3d6cb3dSEric Biggers 121*f3d6cb3dSEric Biggers /* Rounds 4-7 */ 122*f3d6cb3dSEric Biggers movdqu 1*16(DATA_PTR), MSG1 123*f3d6cb3dSEric Biggers pshufb SHUF_MASK, MSG1 124*f3d6cb3dSEric Biggers sha1nexte MSG1, E1 125*f3d6cb3dSEric Biggers movdqa ABCD, E0 126*f3d6cb3dSEric Biggers sha1rnds4 $0, E1, ABCD 127*f3d6cb3dSEric Biggers sha1msg1 MSG1, MSG0 128*f3d6cb3dSEric Biggers 129*f3d6cb3dSEric Biggers /* Rounds 8-11 */ 130*f3d6cb3dSEric Biggers movdqu 2*16(DATA_PTR), MSG2 131*f3d6cb3dSEric Biggers pshufb SHUF_MASK, MSG2 132*f3d6cb3dSEric Biggers sha1nexte MSG2, E0 133*f3d6cb3dSEric Biggers movdqa ABCD, E1 134*f3d6cb3dSEric Biggers sha1rnds4 $0, E0, ABCD 135*f3d6cb3dSEric Biggers sha1msg1 MSG2, MSG1 136*f3d6cb3dSEric Biggers pxor MSG2, MSG0 137*f3d6cb3dSEric Biggers 138*f3d6cb3dSEric Biggers /* Rounds 12-15 */ 139*f3d6cb3dSEric Biggers movdqu 3*16(DATA_PTR), MSG3 140*f3d6cb3dSEric Biggers pshufb SHUF_MASK, MSG3 141*f3d6cb3dSEric Biggers sha1nexte MSG3, E1 142*f3d6cb3dSEric Biggers movdqa ABCD, E0 143*f3d6cb3dSEric Biggers sha1msg2 MSG3, MSG0 144*f3d6cb3dSEric Biggers sha1rnds4 $0, E1, ABCD 145*f3d6cb3dSEric Biggers sha1msg1 MSG3, MSG2 146*f3d6cb3dSEric Biggers pxor MSG3, MSG1 147*f3d6cb3dSEric Biggers 148*f3d6cb3dSEric Biggers /* Rounds 16-19 */ 149*f3d6cb3dSEric Biggers sha1nexte MSG0, E0 150*f3d6cb3dSEric Biggers movdqa ABCD, E1 151*f3d6cb3dSEric Biggers sha1msg2 MSG0, MSG1 152*f3d6cb3dSEric Biggers sha1rnds4 $0, E0, ABCD 153*f3d6cb3dSEric Biggers sha1msg1 MSG0, MSG3 154*f3d6cb3dSEric Biggers pxor MSG0, MSG2 155*f3d6cb3dSEric Biggers 156*f3d6cb3dSEric Biggers /* Rounds 20-23 */ 157*f3d6cb3dSEric Biggers sha1nexte MSG1, E1 158*f3d6cb3dSEric Biggers movdqa ABCD, E0 159*f3d6cb3dSEric Biggers sha1msg2 MSG1, MSG2 160*f3d6cb3dSEric Biggers sha1rnds4 $1, E1, ABCD 161*f3d6cb3dSEric Biggers sha1msg1 MSG1, MSG0 162*f3d6cb3dSEric Biggers pxor MSG1, MSG3 163*f3d6cb3dSEric Biggers 164*f3d6cb3dSEric Biggers /* Rounds 24-27 */ 165*f3d6cb3dSEric Biggers sha1nexte MSG2, E0 166*f3d6cb3dSEric Biggers movdqa ABCD, E1 167*f3d6cb3dSEric Biggers sha1msg2 MSG2, MSG3 168*f3d6cb3dSEric Biggers sha1rnds4 $1, E0, ABCD 169*f3d6cb3dSEric Biggers sha1msg1 MSG2, MSG1 170*f3d6cb3dSEric Biggers pxor MSG2, MSG0 171*f3d6cb3dSEric Biggers 172*f3d6cb3dSEric Biggers /* Rounds 28-31 */ 173*f3d6cb3dSEric Biggers sha1nexte MSG3, E1 174*f3d6cb3dSEric Biggers movdqa ABCD, E0 175*f3d6cb3dSEric Biggers sha1msg2 MSG3, MSG0 176*f3d6cb3dSEric Biggers sha1rnds4 $1, E1, ABCD 177*f3d6cb3dSEric Biggers sha1msg1 MSG3, MSG2 178*f3d6cb3dSEric Biggers pxor MSG3, MSG1 179*f3d6cb3dSEric Biggers 180*f3d6cb3dSEric Biggers /* Rounds 32-35 */ 181*f3d6cb3dSEric Biggers sha1nexte MSG0, E0 182*f3d6cb3dSEric Biggers movdqa ABCD, E1 183*f3d6cb3dSEric Biggers sha1msg2 MSG0, MSG1 184*f3d6cb3dSEric Biggers sha1rnds4 $1, E0, ABCD 185*f3d6cb3dSEric Biggers sha1msg1 MSG0, MSG3 186*f3d6cb3dSEric Biggers pxor MSG0, MSG2 187*f3d6cb3dSEric Biggers 188*f3d6cb3dSEric Biggers /* Rounds 36-39 */ 189*f3d6cb3dSEric Biggers sha1nexte MSG1, E1 190*f3d6cb3dSEric Biggers movdqa ABCD, E0 191*f3d6cb3dSEric Biggers sha1msg2 MSG1, MSG2 192*f3d6cb3dSEric Biggers sha1rnds4 $1, E1, ABCD 193*f3d6cb3dSEric Biggers sha1msg1 MSG1, MSG0 194*f3d6cb3dSEric Biggers pxor MSG1, MSG3 195*f3d6cb3dSEric Biggers 196*f3d6cb3dSEric Biggers /* Rounds 40-43 */ 197*f3d6cb3dSEric Biggers sha1nexte MSG2, E0 198*f3d6cb3dSEric Biggers movdqa ABCD, E1 199*f3d6cb3dSEric Biggers sha1msg2 MSG2, MSG3 200*f3d6cb3dSEric Biggers sha1rnds4 $2, E0, ABCD 201*f3d6cb3dSEric Biggers sha1msg1 MSG2, MSG1 202*f3d6cb3dSEric Biggers pxor MSG2, MSG0 203*f3d6cb3dSEric Biggers 204*f3d6cb3dSEric Biggers /* Rounds 44-47 */ 205*f3d6cb3dSEric Biggers sha1nexte MSG3, E1 206*f3d6cb3dSEric Biggers movdqa ABCD, E0 207*f3d6cb3dSEric Biggers sha1msg2 MSG3, MSG0 208*f3d6cb3dSEric Biggers sha1rnds4 $2, E1, ABCD 209*f3d6cb3dSEric Biggers sha1msg1 MSG3, MSG2 210*f3d6cb3dSEric Biggers pxor MSG3, MSG1 211*f3d6cb3dSEric Biggers 212*f3d6cb3dSEric Biggers /* Rounds 48-51 */ 213*f3d6cb3dSEric Biggers sha1nexte MSG0, E0 214*f3d6cb3dSEric Biggers movdqa ABCD, E1 215*f3d6cb3dSEric Biggers sha1msg2 MSG0, MSG1 216*f3d6cb3dSEric Biggers sha1rnds4 $2, E0, ABCD 217*f3d6cb3dSEric Biggers sha1msg1 MSG0, MSG3 218*f3d6cb3dSEric Biggers pxor MSG0, MSG2 219*f3d6cb3dSEric Biggers 220*f3d6cb3dSEric Biggers /* Rounds 52-55 */ 221*f3d6cb3dSEric Biggers sha1nexte MSG1, E1 222*f3d6cb3dSEric Biggers movdqa ABCD, E0 223*f3d6cb3dSEric Biggers sha1msg2 MSG1, MSG2 224*f3d6cb3dSEric Biggers sha1rnds4 $2, E1, ABCD 225*f3d6cb3dSEric Biggers sha1msg1 MSG1, MSG0 226*f3d6cb3dSEric Biggers pxor MSG1, MSG3 227*f3d6cb3dSEric Biggers 228*f3d6cb3dSEric Biggers /* Rounds 56-59 */ 229*f3d6cb3dSEric Biggers sha1nexte MSG2, E0 230*f3d6cb3dSEric Biggers movdqa ABCD, E1 231*f3d6cb3dSEric Biggers sha1msg2 MSG2, MSG3 232*f3d6cb3dSEric Biggers sha1rnds4 $2, E0, ABCD 233*f3d6cb3dSEric Biggers sha1msg1 MSG2, MSG1 234*f3d6cb3dSEric Biggers pxor MSG2, MSG0 235*f3d6cb3dSEric Biggers 236*f3d6cb3dSEric Biggers /* Rounds 60-63 */ 237*f3d6cb3dSEric Biggers sha1nexte MSG3, E1 238*f3d6cb3dSEric Biggers movdqa ABCD, E0 239*f3d6cb3dSEric Biggers sha1msg2 MSG3, MSG0 240*f3d6cb3dSEric Biggers sha1rnds4 $3, E1, ABCD 241*f3d6cb3dSEric Biggers sha1msg1 MSG3, MSG2 242*f3d6cb3dSEric Biggers pxor MSG3, MSG1 243*f3d6cb3dSEric Biggers 244*f3d6cb3dSEric Biggers /* Rounds 64-67 */ 245*f3d6cb3dSEric Biggers sha1nexte MSG0, E0 246*f3d6cb3dSEric Biggers movdqa ABCD, E1 247*f3d6cb3dSEric Biggers sha1msg2 MSG0, MSG1 248*f3d6cb3dSEric Biggers sha1rnds4 $3, E0, ABCD 249*f3d6cb3dSEric Biggers sha1msg1 MSG0, MSG3 250*f3d6cb3dSEric Biggers pxor MSG0, MSG2 251*f3d6cb3dSEric Biggers 252*f3d6cb3dSEric Biggers /* Rounds 68-71 */ 253*f3d6cb3dSEric Biggers sha1nexte MSG1, E1 254*f3d6cb3dSEric Biggers movdqa ABCD, E0 255*f3d6cb3dSEric Biggers sha1msg2 MSG1, MSG2 256*f3d6cb3dSEric Biggers sha1rnds4 $3, E1, ABCD 257*f3d6cb3dSEric Biggers pxor MSG1, MSG3 258*f3d6cb3dSEric Biggers 259*f3d6cb3dSEric Biggers /* Rounds 72-75 */ 260*f3d6cb3dSEric Biggers sha1nexte MSG2, E0 261*f3d6cb3dSEric Biggers movdqa ABCD, E1 262*f3d6cb3dSEric Biggers sha1msg2 MSG2, MSG3 263*f3d6cb3dSEric Biggers sha1rnds4 $3, E0, ABCD 264*f3d6cb3dSEric Biggers 265*f3d6cb3dSEric Biggers /* Rounds 76-79 */ 266*f3d6cb3dSEric Biggers sha1nexte MSG3, E1 267*f3d6cb3dSEric Biggers movdqa ABCD, E0 268*f3d6cb3dSEric Biggers sha1rnds4 $3, E1, ABCD 269*f3d6cb3dSEric Biggers 270*f3d6cb3dSEric Biggers /* Add current hash values with previously saved */ 271*f3d6cb3dSEric Biggers sha1nexte (0*16)(%rsp), E0 272*f3d6cb3dSEric Biggers paddd (1*16)(%rsp), ABCD 273*f3d6cb3dSEric Biggers 274*f3d6cb3dSEric Biggers /* Increment data pointer and loop if more to process */ 275*f3d6cb3dSEric Biggers add $64, DATA_PTR 276*f3d6cb3dSEric Biggers cmp NUM_BLKS, DATA_PTR 277*f3d6cb3dSEric Biggers jne .Lloop0 278*f3d6cb3dSEric Biggers 279*f3d6cb3dSEric Biggers /* Write hash values back in the correct order */ 280*f3d6cb3dSEric Biggers pshufd $0x1B, ABCD, ABCD 281*f3d6cb3dSEric Biggers movdqu ABCD, 0*16(DIGEST_PTR) 282*f3d6cb3dSEric Biggers pextrd $3, E0, 1*16(DIGEST_PTR) 283*f3d6cb3dSEric Biggers 284*f3d6cb3dSEric Biggers.Ldone_hash: 285*f3d6cb3dSEric Biggers mov %rbp, %rsp 286*f3d6cb3dSEric Biggers pop %rbp 287*f3d6cb3dSEric Biggers 288*f3d6cb3dSEric Biggers RET 289*f3d6cb3dSEric BiggersSYM_FUNC_END(sha1_ni_transform) 290*f3d6cb3dSEric Biggers 291*f3d6cb3dSEric Biggers.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 292*f3d6cb3dSEric Biggers.align 16 293*f3d6cb3dSEric BiggersPSHUFFLE_BYTE_FLIP_MASK: 294*f3d6cb3dSEric Biggers .octa 0x000102030405060708090a0b0c0d0e0f 295*f3d6cb3dSEric Biggers 296*f3d6cb3dSEric Biggers.section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16 297*f3d6cb3dSEric Biggers.align 16 298*f3d6cb3dSEric BiggersUPPER_WORD_MASK: 299*f3d6cb3dSEric Biggers .octa 0xFFFFFFFF000000000000000000000000 300