1f3d6cb3dSEric Biggers/* 2f3d6cb3dSEric Biggers * Intel SHA Extensions optimized implementation of a SHA-1 update function 3f3d6cb3dSEric Biggers * 4f3d6cb3dSEric Biggers * This file is provided under a dual BSD/GPLv2 license. When using or 5f3d6cb3dSEric Biggers * redistributing this file, you may do so under either license. 6f3d6cb3dSEric Biggers * 7f3d6cb3dSEric Biggers * GPL LICENSE SUMMARY 8f3d6cb3dSEric Biggers * 9f3d6cb3dSEric Biggers * Copyright(c) 2015 Intel Corporation. 10f3d6cb3dSEric Biggers * 11f3d6cb3dSEric Biggers * This program is free software; you can redistribute it and/or modify 12f3d6cb3dSEric Biggers * it under the terms of version 2 of the GNU General Public License as 13f3d6cb3dSEric Biggers * published by the Free Software Foundation. 14f3d6cb3dSEric Biggers * 15f3d6cb3dSEric Biggers * This program is distributed in the hope that it will be useful, but 16f3d6cb3dSEric Biggers * WITHOUT ANY WARRANTY; without even the implied warranty of 17f3d6cb3dSEric Biggers * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18f3d6cb3dSEric Biggers * General Public License for more details. 19f3d6cb3dSEric Biggers * 20f3d6cb3dSEric Biggers * Contact Information: 21f3d6cb3dSEric Biggers * Sean Gulley <sean.m.gulley@intel.com> 22f3d6cb3dSEric Biggers * Tim Chen <tim.c.chen@linux.intel.com> 23f3d6cb3dSEric Biggers * 24f3d6cb3dSEric Biggers * BSD LICENSE 25f3d6cb3dSEric Biggers * 26f3d6cb3dSEric Biggers * Copyright(c) 2015 Intel Corporation. 27f3d6cb3dSEric Biggers * 28f3d6cb3dSEric Biggers * Redistribution and use in source and binary forms, with or without 29f3d6cb3dSEric Biggers * modification, are permitted provided that the following conditions 30f3d6cb3dSEric Biggers * are met: 31f3d6cb3dSEric Biggers * 32f3d6cb3dSEric Biggers * * Redistributions of source code must retain the above copyright 33f3d6cb3dSEric Biggers * notice, this list of conditions and the following disclaimer. 34f3d6cb3dSEric Biggers * * Redistributions in binary form must reproduce the above copyright 35f3d6cb3dSEric Biggers * notice, this list of conditions and the following disclaimer in 36f3d6cb3dSEric Biggers * the documentation and/or other materials provided with the 37f3d6cb3dSEric Biggers * distribution. 38f3d6cb3dSEric Biggers * * Neither the name of Intel Corporation nor the names of its 39f3d6cb3dSEric Biggers * contributors may be used to endorse or promote products derived 40f3d6cb3dSEric Biggers * from this software without specific prior written permission. 41f3d6cb3dSEric Biggers * 42f3d6cb3dSEric Biggers * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 43f3d6cb3dSEric Biggers * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 44f3d6cb3dSEric Biggers * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 45f3d6cb3dSEric Biggers * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 46f3d6cb3dSEric Biggers * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 47f3d6cb3dSEric Biggers * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 48f3d6cb3dSEric Biggers * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 49f3d6cb3dSEric Biggers * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 50f3d6cb3dSEric Biggers * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 51f3d6cb3dSEric Biggers * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 52f3d6cb3dSEric Biggers * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53f3d6cb3dSEric Biggers * 54f3d6cb3dSEric Biggers */ 55f3d6cb3dSEric Biggers 56f3d6cb3dSEric Biggers#include <linux/linkage.h> 57f3d6cb3dSEric Biggers 58f88ed14aSEric Biggers#define STATE_PTR %rdi /* 1st arg */ 59f3d6cb3dSEric Biggers#define DATA_PTR %rsi /* 2nd arg */ 60f3d6cb3dSEric Biggers#define NUM_BLKS %rdx /* 3rd arg */ 61f3d6cb3dSEric Biggers 62f3d6cb3dSEric Biggers#define ABCD %xmm0 63f3d6cb3dSEric Biggers#define E0 %xmm1 /* Need two E's b/c they ping pong */ 64f3d6cb3dSEric Biggers#define E1 %xmm2 65f3d6cb3dSEric Biggers#define MSG0 %xmm3 66f3d6cb3dSEric Biggers#define MSG1 %xmm4 67f3d6cb3dSEric Biggers#define MSG2 %xmm5 68f3d6cb3dSEric Biggers#define MSG3 %xmm6 69f3d6cb3dSEric Biggers#define SHUF_MASK %xmm7 70f88ed14aSEric Biggers#define ABCD_SAVED %xmm8 71f88ed14aSEric Biggers#define E0_SAVED %xmm9 72f3d6cb3dSEric Biggers 73*42e3376eSEric Biggers.macro do_4rounds i, m0, m1, m2, m3, e0, e1 74*42e3376eSEric Biggers.if \i < 16 75*42e3376eSEric Biggers movdqu \i*4(DATA_PTR), \m0 76*42e3376eSEric Biggers pshufb SHUF_MASK, \m0 77*42e3376eSEric Biggers.endif 78*42e3376eSEric Biggers.if \i == 0 79*42e3376eSEric Biggers paddd \m0, \e0 80*42e3376eSEric Biggers.else 81*42e3376eSEric Biggers sha1nexte \m0, \e0 82*42e3376eSEric Biggers.endif 83*42e3376eSEric Biggers movdqa ABCD, \e1 84*42e3376eSEric Biggers.if \i >= 12 && \i < 76 85*42e3376eSEric Biggers sha1msg2 \m0, \m1 86*42e3376eSEric Biggers.endif 87*42e3376eSEric Biggers sha1rnds4 $\i / 20, \e0, ABCD 88*42e3376eSEric Biggers.if \i >= 4 && \i < 68 89*42e3376eSEric Biggers sha1msg1 \m0, \m3 90*42e3376eSEric Biggers.endif 91*42e3376eSEric Biggers.if \i >= 8 && \i < 72 92*42e3376eSEric Biggers pxor \m0, \m2 93*42e3376eSEric Biggers.endif 94*42e3376eSEric Biggers.endm 95*42e3376eSEric Biggers 96f3d6cb3dSEric Biggers/* 97f3d6cb3dSEric Biggers * Intel SHA Extensions optimized implementation of a SHA-1 block function 98f3d6cb3dSEric Biggers * 99f3d6cb3dSEric Biggers * This function takes a pointer to the current SHA-1 state, a pointer to the 100f88ed14aSEric Biggers * input data, and the number of 64-byte blocks to process. The number of 101f88ed14aSEric Biggers * blocks to process is assumed to be nonzero. Once all blocks have been 102f88ed14aSEric Biggers * processed, the state is updated with the new state. This function only 103f88ed14aSEric Biggers * processes complete blocks. State initialization, buffering of partial 104f3d6cb3dSEric Biggers * blocks, and digest finalization are expected to be handled elsewhere. 105f3d6cb3dSEric Biggers * 106f3d6cb3dSEric Biggers * void sha1_ni_transform(struct sha1_block_state *state, 107f3d6cb3dSEric Biggers * const u8 *data, size_t nblocks) 108f3d6cb3dSEric Biggers */ 109f3d6cb3dSEric Biggers.text 110f3d6cb3dSEric BiggersSYM_FUNC_START(sha1_ni_transform) 111f3d6cb3dSEric Biggers 112f88ed14aSEric Biggers /* Load the initial state from STATE_PTR. */ 113f88ed14aSEric Biggers pxor E0, E0 114f88ed14aSEric Biggers pinsrd $3, 16(STATE_PTR), E0 115f88ed14aSEric Biggers movdqu (STATE_PTR), ABCD 116f3d6cb3dSEric Biggers pshufd $0x1B, ABCD, ABCD 117f3d6cb3dSEric Biggers 118f3d6cb3dSEric Biggers movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK 119f3d6cb3dSEric Biggers 120f88ed14aSEric Biggers.Lnext_block: 121f88ed14aSEric Biggers /* Save the state for addition after the rounds. */ 122f88ed14aSEric Biggers movdqa E0, E0_SAVED 123f88ed14aSEric Biggers movdqa ABCD, ABCD_SAVED 124f3d6cb3dSEric Biggers 125*42e3376eSEric Biggers.irp i, 0, 16, 32, 48, 64 126*42e3376eSEric Biggers do_4rounds (\i + 0), MSG0, MSG1, MSG2, MSG3, E0, E1 127*42e3376eSEric Biggers do_4rounds (\i + 4), MSG1, MSG2, MSG3, MSG0, E1, E0 128*42e3376eSEric Biggers do_4rounds (\i + 8), MSG2, MSG3, MSG0, MSG1, E0, E1 129*42e3376eSEric Biggers do_4rounds (\i + 12), MSG3, MSG0, MSG1, MSG2, E1, E0 130*42e3376eSEric Biggers.endr 131f3d6cb3dSEric Biggers 132f88ed14aSEric Biggers /* Add the previous state (before the rounds) to the current state. */ 133f88ed14aSEric Biggers sha1nexte E0_SAVED, E0 134f88ed14aSEric Biggers paddd ABCD_SAVED, ABCD 135f3d6cb3dSEric Biggers 136f88ed14aSEric Biggers /* Advance to the next block, or break if there are no more blocks. */ 137f3d6cb3dSEric Biggers add $64, DATA_PTR 138f88ed14aSEric Biggers dec NUM_BLKS 139f88ed14aSEric Biggers jnz .Lnext_block 140f3d6cb3dSEric Biggers 141f88ed14aSEric Biggers /* Store the new state to STATE_PTR. */ 142f88ed14aSEric Biggers pextrd $3, E0, 16(STATE_PTR) 143f3d6cb3dSEric Biggers pshufd $0x1B, ABCD, ABCD 144f88ed14aSEric Biggers movdqu ABCD, (STATE_PTR) 145f3d6cb3dSEric Biggers 146f3d6cb3dSEric Biggers RET 147f3d6cb3dSEric BiggersSYM_FUNC_END(sha1_ni_transform) 148f3d6cb3dSEric Biggers 149f3d6cb3dSEric Biggers.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 150f3d6cb3dSEric Biggers.align 16 151f3d6cb3dSEric BiggersPSHUFFLE_BYTE_FLIP_MASK: 152f3d6cb3dSEric Biggers .octa 0x000102030405060708090a0b0c0d0e0f 153