xref: /linux/lib/crypto/x86/sha256-ni-asm.S (revision 13150742b09e720fdf021de14cd2b98b37415a89)
174750aa7SEric Biggers/*
274750aa7SEric Biggers * Intel SHA Extensions optimized implementation of a SHA-256 update function
374750aa7SEric Biggers *
474750aa7SEric Biggers * This file is provided under a dual BSD/GPLv2 license.  When using or
574750aa7SEric Biggers * redistributing this file, you may do so under either license.
674750aa7SEric Biggers *
774750aa7SEric Biggers * GPL LICENSE SUMMARY
874750aa7SEric Biggers *
974750aa7SEric Biggers * Copyright(c) 2015 Intel Corporation.
1074750aa7SEric Biggers *
1174750aa7SEric Biggers * This program is free software; you can redistribute it and/or modify
1274750aa7SEric Biggers * it under the terms of version 2 of the GNU General Public License as
1374750aa7SEric Biggers * published by the Free Software Foundation.
1474750aa7SEric Biggers *
1574750aa7SEric Biggers * This program is distributed in the hope that it will be useful, but
1674750aa7SEric Biggers * WITHOUT ANY WARRANTY; without even the implied warranty of
1774750aa7SEric Biggers * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1874750aa7SEric Biggers * General Public License for more details.
1974750aa7SEric Biggers *
2074750aa7SEric Biggers * Contact Information:
2174750aa7SEric Biggers * 	Sean Gulley <sean.m.gulley@intel.com>
2274750aa7SEric Biggers * 	Tim Chen <tim.c.chen@linux.intel.com>
2374750aa7SEric Biggers *
2474750aa7SEric Biggers * BSD LICENSE
2574750aa7SEric Biggers *
2674750aa7SEric Biggers * Copyright(c) 2015 Intel Corporation.
2774750aa7SEric Biggers *
2874750aa7SEric Biggers * Redistribution and use in source and binary forms, with or without
2974750aa7SEric Biggers * modification, are permitted provided that the following conditions
3074750aa7SEric Biggers * are met:
3174750aa7SEric Biggers *
3274750aa7SEric Biggers * 	* Redistributions of source code must retain the above copyright
3374750aa7SEric Biggers * 	  notice, this list of conditions and the following disclaimer.
3474750aa7SEric Biggers * 	* Redistributions in binary form must reproduce the above copyright
3574750aa7SEric Biggers * 	  notice, this list of conditions and the following disclaimer in
3674750aa7SEric Biggers * 	  the documentation and/or other materials provided with the
3774750aa7SEric Biggers * 	  distribution.
3874750aa7SEric Biggers * 	* Neither the name of Intel Corporation nor the names of its
3974750aa7SEric Biggers * 	  contributors may be used to endorse or promote products derived
4074750aa7SEric Biggers * 	  from this software without specific prior written permission.
4174750aa7SEric Biggers *
4274750aa7SEric Biggers * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
4374750aa7SEric Biggers * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
4474750aa7SEric Biggers * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
4574750aa7SEric Biggers * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
4674750aa7SEric Biggers * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
4774750aa7SEric Biggers * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
4874750aa7SEric Biggers * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
4974750aa7SEric Biggers * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
5074750aa7SEric Biggers * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
5174750aa7SEric Biggers * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
5274750aa7SEric Biggers * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5374750aa7SEric Biggers *
5474750aa7SEric Biggers */
5574750aa7SEric Biggers
5674750aa7SEric Biggers#include <linux/linkage.h>
5774750aa7SEric Biggers
5874750aa7SEric Biggers#define STATE_PTR	%rdi	/* 1st arg */
5974750aa7SEric Biggers#define DATA_PTR	%rsi	/* 2nd arg */
6074750aa7SEric Biggers#define NUM_BLKS	%rdx	/* 3rd arg */
6174750aa7SEric Biggers
6274750aa7SEric Biggers#define SHA256CONSTANTS	%rax
6374750aa7SEric Biggers
6474750aa7SEric Biggers#define MSG		%xmm0  /* sha256rnds2 implicit operand */
6574750aa7SEric Biggers#define STATE0		%xmm1
6674750aa7SEric Biggers#define STATE1		%xmm2
6774750aa7SEric Biggers#define MSG0		%xmm3
6874750aa7SEric Biggers#define MSG1		%xmm4
6974750aa7SEric Biggers#define MSG2		%xmm5
7074750aa7SEric Biggers#define MSG3		%xmm6
7174750aa7SEric Biggers#define TMP		%xmm7
7274750aa7SEric Biggers
7374750aa7SEric Biggers#define SHUF_MASK	%xmm8
7474750aa7SEric Biggers
7574750aa7SEric Biggers#define ABEF_SAVE	%xmm9
7674750aa7SEric Biggers#define CDGH_SAVE	%xmm10
7774750aa7SEric Biggers
7874750aa7SEric Biggers.macro do_4rounds	i, m0, m1, m2, m3
7974750aa7SEric Biggers.if \i < 16
8074750aa7SEric Biggers	movdqu		\i*4(DATA_PTR), \m0
8174750aa7SEric Biggers	pshufb		SHUF_MASK, \m0
8274750aa7SEric Biggers.endif
8374750aa7SEric Biggers	movdqa		(\i-32)*4(SHA256CONSTANTS), MSG
8474750aa7SEric Biggers	paddd		\m0, MSG
8574750aa7SEric Biggers	sha256rnds2	STATE0, STATE1
8674750aa7SEric Biggers.if \i >= 12 && \i < 60
8774750aa7SEric Biggers	movdqa		\m0, TMP
8874750aa7SEric Biggers	palignr		$4, \m3, TMP
8974750aa7SEric Biggers	paddd		TMP, \m1
9074750aa7SEric Biggers	sha256msg2	\m0, \m1
9174750aa7SEric Biggers.endif
9274750aa7SEric Biggers	punpckhqdq	MSG, MSG
9374750aa7SEric Biggers	sha256rnds2	STATE1, STATE0
9474750aa7SEric Biggers.if \i >= 4 && \i < 52
9574750aa7SEric Biggers	sha256msg1	\m0, \m3
9674750aa7SEric Biggers.endif
9774750aa7SEric Biggers.endm
9874750aa7SEric Biggers
9974750aa7SEric Biggers/*
10074750aa7SEric Biggers * Intel SHA Extensions optimized implementation of a SHA-256 block function
10174750aa7SEric Biggers *
10274750aa7SEric Biggers * This function takes a pointer to the current SHA-256 state, a pointer to the
10374750aa7SEric Biggers * input data, and the number of 64-byte blocks to process.  Once all blocks
10474750aa7SEric Biggers * have been processed, the state is updated with the new state.  This function
10574750aa7SEric Biggers * only processes complete blocks.  State initialization, buffering of partial
10674750aa7SEric Biggers * blocks, and digest finalization is expected to be handled elsewhere.
10774750aa7SEric Biggers *
108*4c855d50SEric Biggers * void sha256_ni_transform(struct sha256_block_state *state,
10974750aa7SEric Biggers *			    const u8 *data, size_t nblocks);
11074750aa7SEric Biggers */
11174750aa7SEric Biggers.text
11274750aa7SEric BiggersSYM_FUNC_START(sha256_ni_transform)
11374750aa7SEric Biggers
11474750aa7SEric Biggers	shl		$6, NUM_BLKS		/*  convert to bytes */
11574750aa7SEric Biggers	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
11674750aa7SEric Biggers
11774750aa7SEric Biggers	/*
11874750aa7SEric Biggers	 * load initial hash values
11974750aa7SEric Biggers	 * Need to reorder these appropriately
12074750aa7SEric Biggers	 * DCBA, HGFE -> ABEF, CDGH
12174750aa7SEric Biggers	 */
12274750aa7SEric Biggers	movdqu		0*16(STATE_PTR), STATE0		/* DCBA */
12374750aa7SEric Biggers	movdqu		1*16(STATE_PTR), STATE1		/* HGFE */
12474750aa7SEric Biggers
12574750aa7SEric Biggers	movdqa		STATE0, TMP
12674750aa7SEric Biggers	punpcklqdq	STATE1, STATE0			/* FEBA */
12774750aa7SEric Biggers	punpckhqdq	TMP, STATE1			/* DCHG */
12874750aa7SEric Biggers	pshufd		$0x1B, STATE0, STATE0		/* ABEF */
12974750aa7SEric Biggers	pshufd		$0xB1, STATE1, STATE1		/* CDGH */
13074750aa7SEric Biggers
13174750aa7SEric Biggers	movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
13274750aa7SEric Biggers	lea		K256+32*4(%rip), SHA256CONSTANTS
13374750aa7SEric Biggers
13474750aa7SEric Biggers.Lloop0:
13574750aa7SEric Biggers	/* Save hash values for addition after rounds */
13674750aa7SEric Biggers	movdqa		STATE0, ABEF_SAVE
13774750aa7SEric Biggers	movdqa		STATE1, CDGH_SAVE
13874750aa7SEric Biggers
13974750aa7SEric Biggers.irp i, 0, 16, 32, 48
14074750aa7SEric Biggers	do_4rounds	(\i + 0),  MSG0, MSG1, MSG2, MSG3
14174750aa7SEric Biggers	do_4rounds	(\i + 4),  MSG1, MSG2, MSG3, MSG0
14274750aa7SEric Biggers	do_4rounds	(\i + 8),  MSG2, MSG3, MSG0, MSG1
14374750aa7SEric Biggers	do_4rounds	(\i + 12), MSG3, MSG0, MSG1, MSG2
14474750aa7SEric Biggers.endr
14574750aa7SEric Biggers
14674750aa7SEric Biggers	/* Add current hash values with previously saved */
14774750aa7SEric Biggers	paddd		ABEF_SAVE, STATE0
14874750aa7SEric Biggers	paddd		CDGH_SAVE, STATE1
14974750aa7SEric Biggers
15074750aa7SEric Biggers	/* Increment data pointer and loop if more to process */
15174750aa7SEric Biggers	add		$64, DATA_PTR
15274750aa7SEric Biggers	cmp		NUM_BLKS, DATA_PTR
15374750aa7SEric Biggers	jne		.Lloop0
15474750aa7SEric Biggers
15574750aa7SEric Biggers	/* Write hash values back in the correct order */
15674750aa7SEric Biggers	movdqa		STATE0, TMP
15774750aa7SEric Biggers	punpcklqdq	STATE1, STATE0			/* GHEF */
15874750aa7SEric Biggers	punpckhqdq	TMP, STATE1			/* ABCD */
15974750aa7SEric Biggers	pshufd		$0xB1, STATE0, STATE0		/* HGFE */
16074750aa7SEric Biggers	pshufd		$0x1B, STATE1, STATE1		/* DCBA */
16174750aa7SEric Biggers
16274750aa7SEric Biggers	movdqu		STATE1, 0*16(STATE_PTR)
16374750aa7SEric Biggers	movdqu		STATE0, 1*16(STATE_PTR)
16474750aa7SEric Biggers
16574750aa7SEric Biggers	RET
16674750aa7SEric BiggersSYM_FUNC_END(sha256_ni_transform)
16774750aa7SEric Biggers
16874750aa7SEric Biggers.section	.rodata.cst256.K256, "aM", @progbits, 256
16974750aa7SEric Biggers.align 64
17074750aa7SEric BiggersK256:
17174750aa7SEric Biggers	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
17274750aa7SEric Biggers	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
17374750aa7SEric Biggers	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
17474750aa7SEric Biggers	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
17574750aa7SEric Biggers	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
17674750aa7SEric Biggers	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
17774750aa7SEric Biggers	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
17874750aa7SEric Biggers	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
17974750aa7SEric Biggers	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
18074750aa7SEric Biggers	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
18174750aa7SEric Biggers	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
18274750aa7SEric Biggers	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
18374750aa7SEric Biggers	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
18474750aa7SEric Biggers	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
18574750aa7SEric Biggers	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
18674750aa7SEric Biggers	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
18774750aa7SEric Biggers
18874750aa7SEric Biggers.section	.rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
18974750aa7SEric Biggers.align 16
19074750aa7SEric BiggersPSHUFFLE_BYTE_FLIP_MASK:
19174750aa7SEric Biggers	.octa 0x0c0d0e0f08090a0b0405060700010203
192