xref: /linux/lib/crypto/x86/sha1-ni-asm.S (revision 13150742b09e720fdf021de14cd2b98b37415a89)
1f3d6cb3dSEric Biggers/*
2f3d6cb3dSEric Biggers * Intel SHA Extensions optimized implementation of a SHA-1 update function
3f3d6cb3dSEric Biggers *
4f3d6cb3dSEric Biggers * This file is provided under a dual BSD/GPLv2 license.  When using or
5f3d6cb3dSEric Biggers * redistributing this file, you may do so under either license.
6f3d6cb3dSEric Biggers *
7f3d6cb3dSEric Biggers * GPL LICENSE SUMMARY
8f3d6cb3dSEric Biggers *
9f3d6cb3dSEric Biggers * Copyright(c) 2015 Intel Corporation.
10f3d6cb3dSEric Biggers *
11f3d6cb3dSEric Biggers * This program is free software; you can redistribute it and/or modify
12f3d6cb3dSEric Biggers * it under the terms of version 2 of the GNU General Public License as
13f3d6cb3dSEric Biggers * published by the Free Software Foundation.
14f3d6cb3dSEric Biggers *
15f3d6cb3dSEric Biggers * This program is distributed in the hope that it will be useful, but
16f3d6cb3dSEric Biggers * WITHOUT ANY WARRANTY; without even the implied warranty of
17f3d6cb3dSEric Biggers * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18f3d6cb3dSEric Biggers * General Public License for more details.
19f3d6cb3dSEric Biggers *
20f3d6cb3dSEric Biggers * Contact Information:
21f3d6cb3dSEric Biggers * 	Sean Gulley <sean.m.gulley@intel.com>
22f3d6cb3dSEric Biggers * 	Tim Chen <tim.c.chen@linux.intel.com>
23f3d6cb3dSEric Biggers *
24f3d6cb3dSEric Biggers * BSD LICENSE
25f3d6cb3dSEric Biggers *
26f3d6cb3dSEric Biggers * Copyright(c) 2015 Intel Corporation.
27f3d6cb3dSEric Biggers *
28f3d6cb3dSEric Biggers * Redistribution and use in source and binary forms, with or without
29f3d6cb3dSEric Biggers * modification, are permitted provided that the following conditions
30f3d6cb3dSEric Biggers * are met:
31f3d6cb3dSEric Biggers *
32f3d6cb3dSEric Biggers * 	* Redistributions of source code must retain the above copyright
33f3d6cb3dSEric Biggers * 	  notice, this list of conditions and the following disclaimer.
34f3d6cb3dSEric Biggers * 	* Redistributions in binary form must reproduce the above copyright
35f3d6cb3dSEric Biggers * 	  notice, this list of conditions and the following disclaimer in
36f3d6cb3dSEric Biggers * 	  the documentation and/or other materials provided with the
37f3d6cb3dSEric Biggers * 	  distribution.
38f3d6cb3dSEric Biggers * 	* Neither the name of Intel Corporation nor the names of its
39f3d6cb3dSEric Biggers * 	  contributors may be used to endorse or promote products derived
40f3d6cb3dSEric Biggers * 	  from this software without specific prior written permission.
41f3d6cb3dSEric Biggers *
42f3d6cb3dSEric Biggers * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43f3d6cb3dSEric Biggers * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44f3d6cb3dSEric Biggers * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45f3d6cb3dSEric Biggers * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46f3d6cb3dSEric Biggers * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47f3d6cb3dSEric Biggers * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48f3d6cb3dSEric Biggers * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49f3d6cb3dSEric Biggers * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50f3d6cb3dSEric Biggers * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51f3d6cb3dSEric Biggers * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52f3d6cb3dSEric Biggers * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53f3d6cb3dSEric Biggers *
54f3d6cb3dSEric Biggers */
55f3d6cb3dSEric Biggers
56f3d6cb3dSEric Biggers#include <linux/linkage.h>
57f3d6cb3dSEric Biggers
58f88ed14aSEric Biggers#define STATE_PTR	%rdi	/* 1st arg */
59f3d6cb3dSEric Biggers#define DATA_PTR	%rsi	/* 2nd arg */
60f3d6cb3dSEric Biggers#define NUM_BLKS	%rdx	/* 3rd arg */
61f3d6cb3dSEric Biggers
62f3d6cb3dSEric Biggers#define ABCD		%xmm0
63f3d6cb3dSEric Biggers#define E0		%xmm1	/* Need two E's b/c they ping pong */
64f3d6cb3dSEric Biggers#define E1		%xmm2
65f3d6cb3dSEric Biggers#define MSG0		%xmm3
66f3d6cb3dSEric Biggers#define MSG1		%xmm4
67f3d6cb3dSEric Biggers#define MSG2		%xmm5
68f3d6cb3dSEric Biggers#define MSG3		%xmm6
69f3d6cb3dSEric Biggers#define SHUF_MASK	%xmm7
70f88ed14aSEric Biggers#define ABCD_SAVED	%xmm8
71f88ed14aSEric Biggers#define E0_SAVED	%xmm9
72f3d6cb3dSEric Biggers
73*42e3376eSEric Biggers.macro do_4rounds	i, m0, m1, m2, m3, e0, e1
74*42e3376eSEric Biggers.if \i < 16
75*42e3376eSEric Biggers	movdqu		\i*4(DATA_PTR), \m0
76*42e3376eSEric Biggers	pshufb		SHUF_MASK, \m0
77*42e3376eSEric Biggers.endif
78*42e3376eSEric Biggers.if \i == 0
79*42e3376eSEric Biggers	paddd		\m0, \e0
80*42e3376eSEric Biggers.else
81*42e3376eSEric Biggers	sha1nexte	\m0, \e0
82*42e3376eSEric Biggers.endif
83*42e3376eSEric Biggers	movdqa		ABCD, \e1
84*42e3376eSEric Biggers.if \i >= 12 && \i < 76
85*42e3376eSEric Biggers	sha1msg2	\m0, \m1
86*42e3376eSEric Biggers.endif
87*42e3376eSEric Biggers	sha1rnds4	$\i / 20, \e0, ABCD
88*42e3376eSEric Biggers.if \i >= 4 && \i < 68
89*42e3376eSEric Biggers	sha1msg1	\m0, \m3
90*42e3376eSEric Biggers.endif
91*42e3376eSEric Biggers.if \i >= 8 && \i < 72
92*42e3376eSEric Biggers	pxor		\m0, \m2
93*42e3376eSEric Biggers.endif
94*42e3376eSEric Biggers.endm
95*42e3376eSEric Biggers
96f3d6cb3dSEric Biggers/*
97f3d6cb3dSEric Biggers * Intel SHA Extensions optimized implementation of a SHA-1 block function
98f3d6cb3dSEric Biggers *
99f3d6cb3dSEric Biggers * This function takes a pointer to the current SHA-1 state, a pointer to the
100f88ed14aSEric Biggers * input data, and the number of 64-byte blocks to process.  The number of
101f88ed14aSEric Biggers * blocks to process is assumed to be nonzero.  Once all blocks have been
102f88ed14aSEric Biggers * processed, the state is updated with the new state.  This function only
103f88ed14aSEric Biggers * processes complete blocks.  State initialization, buffering of partial
104f3d6cb3dSEric Biggers * blocks, and digest finalization are expected to be handled elsewhere.
105f3d6cb3dSEric Biggers *
106f3d6cb3dSEric Biggers * void sha1_ni_transform(struct sha1_block_state *state,
107f3d6cb3dSEric Biggers *			  const u8 *data, size_t nblocks)
108f3d6cb3dSEric Biggers */
109f3d6cb3dSEric Biggers.text
110f3d6cb3dSEric BiggersSYM_FUNC_START(sha1_ni_transform)
111f3d6cb3dSEric Biggers
112f88ed14aSEric Biggers	/* Load the initial state from STATE_PTR. */
113f88ed14aSEric Biggers	pxor		E0, E0
114f88ed14aSEric Biggers	pinsrd		$3, 16(STATE_PTR), E0
115f88ed14aSEric Biggers	movdqu		(STATE_PTR), ABCD
116f3d6cb3dSEric Biggers	pshufd		$0x1B, ABCD, ABCD
117f3d6cb3dSEric Biggers
118f3d6cb3dSEric Biggers	movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
119f3d6cb3dSEric Biggers
120f88ed14aSEric Biggers.Lnext_block:
121f88ed14aSEric Biggers	/* Save the state for addition after the rounds. */
122f88ed14aSEric Biggers	movdqa		E0, E0_SAVED
123f88ed14aSEric Biggers	movdqa		ABCD, ABCD_SAVED
124f3d6cb3dSEric Biggers
125*42e3376eSEric Biggers.irp i, 0, 16, 32, 48, 64
126*42e3376eSEric Biggers	do_4rounds	(\i + 0),  MSG0, MSG1, MSG2, MSG3, E0, E1
127*42e3376eSEric Biggers	do_4rounds	(\i + 4),  MSG1, MSG2, MSG3, MSG0, E1, E0
128*42e3376eSEric Biggers	do_4rounds	(\i + 8),  MSG2, MSG3, MSG0, MSG1, E0, E1
129*42e3376eSEric Biggers	do_4rounds	(\i + 12), MSG3, MSG0, MSG1, MSG2, E1, E0
130*42e3376eSEric Biggers.endr
131f3d6cb3dSEric Biggers
132f88ed14aSEric Biggers	/* Add the previous state (before the rounds) to the current state. */
133f88ed14aSEric Biggers	sha1nexte	E0_SAVED, E0
134f88ed14aSEric Biggers	paddd		ABCD_SAVED, ABCD
135f3d6cb3dSEric Biggers
136f88ed14aSEric Biggers	/* Advance to the next block, or break if there are no more blocks. */
137f3d6cb3dSEric Biggers	add		$64, DATA_PTR
138f88ed14aSEric Biggers	dec		NUM_BLKS
139f88ed14aSEric Biggers	jnz		.Lnext_block
140f3d6cb3dSEric Biggers
141f88ed14aSEric Biggers	/* Store the new state to STATE_PTR. */
142f88ed14aSEric Biggers	pextrd		$3, E0, 16(STATE_PTR)
143f3d6cb3dSEric Biggers	pshufd		$0x1B, ABCD, ABCD
144f88ed14aSEric Biggers	movdqu		ABCD, (STATE_PTR)
145f3d6cb3dSEric Biggers
146f3d6cb3dSEric Biggers	RET
147f3d6cb3dSEric BiggersSYM_FUNC_END(sha1_ni_transform)
148f3d6cb3dSEric Biggers
149f3d6cb3dSEric Biggers.section	.rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
150f3d6cb3dSEric Biggers.align 16
151f3d6cb3dSEric BiggersPSHUFFLE_BYTE_FLIP_MASK:
152f3d6cb3dSEric Biggers	.octa 0x000102030405060708090a0b0c0d0e0f
153