xref: /linux/lib/crypto/x86/sha256-ni-asm.S (revision 4c855d5069ee2edbcf62fafc7f1a5d4cfea1bce1)
174750aa7SEric Biggers/*
274750aa7SEric Biggers * Intel SHA Extensions optimized implementation of a SHA-256 update function
374750aa7SEric Biggers *
474750aa7SEric Biggers * This file is provided under a dual BSD/GPLv2 license.  When using or
574750aa7SEric Biggers * redistributing this file, you may do so under either license.
674750aa7SEric Biggers *
774750aa7SEric Biggers * GPL LICENSE SUMMARY
874750aa7SEric Biggers *
974750aa7SEric Biggers * Copyright(c) 2015 Intel Corporation.
1074750aa7SEric Biggers *
1174750aa7SEric Biggers * This program is free software; you can redistribute it and/or modify
1274750aa7SEric Biggers * it under the terms of version 2 of the GNU General Public License as
1374750aa7SEric Biggers * published by the Free Software Foundation.
1474750aa7SEric Biggers *
1574750aa7SEric Biggers * This program is distributed in the hope that it will be useful, but
1674750aa7SEric Biggers * WITHOUT ANY WARRANTY; without even the implied warranty of
1774750aa7SEric Biggers * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1874750aa7SEric Biggers * General Public License for more details.
1974750aa7SEric Biggers *
2074750aa7SEric Biggers * Contact Information:
2174750aa7SEric Biggers * 	Sean Gulley <sean.m.gulley@intel.com>
2274750aa7SEric Biggers * 	Tim Chen <tim.c.chen@linux.intel.com>
2374750aa7SEric Biggers *
2474750aa7SEric Biggers * BSD LICENSE
2574750aa7SEric Biggers *
2674750aa7SEric Biggers * Copyright(c) 2015 Intel Corporation.
2774750aa7SEric Biggers *
2874750aa7SEric Biggers * Redistribution and use in source and binary forms, with or without
2974750aa7SEric Biggers * modification, are permitted provided that the following conditions
3074750aa7SEric Biggers * are met:
3174750aa7SEric Biggers *
3274750aa7SEric Biggers * 	* Redistributions of source code must retain the above copyright
3374750aa7SEric Biggers * 	  notice, this list of conditions and the following disclaimer.
3474750aa7SEric Biggers * 	* Redistributions in binary form must reproduce the above copyright
3574750aa7SEric Biggers * 	  notice, this list of conditions and the following disclaimer in
3674750aa7SEric Biggers * 	  the documentation and/or other materials provided with the
3774750aa7SEric Biggers * 	  distribution.
3874750aa7SEric Biggers * 	* Neither the name of Intel Corporation nor the names of its
3974750aa7SEric Biggers * 	  contributors may be used to endorse or promote products derived
4074750aa7SEric Biggers * 	  from this software without specific prior written permission.
4174750aa7SEric Biggers *
4274750aa7SEric Biggers * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
4374750aa7SEric Biggers * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
4474750aa7SEric Biggers * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
4574750aa7SEric Biggers * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
4674750aa7SEric Biggers * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
4774750aa7SEric Biggers * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
4874750aa7SEric Biggers * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
4974750aa7SEric Biggers * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
5074750aa7SEric Biggers * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
5174750aa7SEric Biggers * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
5274750aa7SEric Biggers * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5374750aa7SEric Biggers *
5474750aa7SEric Biggers */
5574750aa7SEric Biggers
5674750aa7SEric Biggers#include <linux/linkage.h>
5774750aa7SEric Biggers#include <linux/objtool.h>
5874750aa7SEric Biggers
5974750aa7SEric Biggers#define STATE_PTR	%rdi	/* 1st arg */
6074750aa7SEric Biggers#define DATA_PTR	%rsi	/* 2nd arg */
6174750aa7SEric Biggers#define NUM_BLKS	%rdx	/* 3rd arg */
6274750aa7SEric Biggers
6374750aa7SEric Biggers#define SHA256CONSTANTS	%rax
6474750aa7SEric Biggers
6574750aa7SEric Biggers#define MSG		%xmm0  /* sha256rnds2 implicit operand */
6674750aa7SEric Biggers#define STATE0		%xmm1
6774750aa7SEric Biggers#define STATE1		%xmm2
6874750aa7SEric Biggers#define MSG0		%xmm3
6974750aa7SEric Biggers#define MSG1		%xmm4
7074750aa7SEric Biggers#define MSG2		%xmm5
7174750aa7SEric Biggers#define MSG3		%xmm6
7274750aa7SEric Biggers#define TMP		%xmm7
7374750aa7SEric Biggers
7474750aa7SEric Biggers#define SHUF_MASK	%xmm8
7574750aa7SEric Biggers
7674750aa7SEric Biggers#define ABEF_SAVE	%xmm9
7774750aa7SEric Biggers#define CDGH_SAVE	%xmm10
7874750aa7SEric Biggers
7974750aa7SEric Biggers.macro do_4rounds	i, m0, m1, m2, m3
8074750aa7SEric Biggers.if \i < 16
8174750aa7SEric Biggers	movdqu		\i*4(DATA_PTR), \m0
8274750aa7SEric Biggers	pshufb		SHUF_MASK, \m0
8374750aa7SEric Biggers.endif
8474750aa7SEric Biggers	movdqa		(\i-32)*4(SHA256CONSTANTS), MSG
8574750aa7SEric Biggers	paddd		\m0, MSG
8674750aa7SEric Biggers	sha256rnds2	STATE0, STATE1
8774750aa7SEric Biggers.if \i >= 12 && \i < 60
8874750aa7SEric Biggers	movdqa		\m0, TMP
8974750aa7SEric Biggers	palignr		$4, \m3, TMP
9074750aa7SEric Biggers	paddd		TMP, \m1
9174750aa7SEric Biggers	sha256msg2	\m0, \m1
9274750aa7SEric Biggers.endif
9374750aa7SEric Biggers	punpckhqdq	MSG, MSG
9474750aa7SEric Biggers	sha256rnds2	STATE1, STATE0
9574750aa7SEric Biggers.if \i >= 4 && \i < 52
9674750aa7SEric Biggers	sha256msg1	\m0, \m3
9774750aa7SEric Biggers.endif
9874750aa7SEric Biggers.endm
9974750aa7SEric Biggers
10074750aa7SEric Biggers/*
10174750aa7SEric Biggers * Intel SHA Extensions optimized implementation of a SHA-256 block function
10274750aa7SEric Biggers *
10374750aa7SEric Biggers * This function takes a pointer to the current SHA-256 state, a pointer to the
10474750aa7SEric Biggers * input data, and the number of 64-byte blocks to process.  Once all blocks
10574750aa7SEric Biggers * have been processed, the state is updated with the new state.  This function
10674750aa7SEric Biggers * only processes complete blocks.  State initialization, buffering of partial
10774750aa7SEric Biggers * blocks, and digest finalization is expected to be handled elsewhere.
10874750aa7SEric Biggers *
109*4c855d50SEric Biggers * void sha256_ni_transform(struct sha256_block_state *state,
11074750aa7SEric Biggers *			    const u8 *data, size_t nblocks);
11174750aa7SEric Biggers */
11274750aa7SEric Biggers.text
11374750aa7SEric BiggersSYM_FUNC_START(sha256_ni_transform)
11474750aa7SEric Biggers	ANNOTATE_NOENDBR	# since this is called only via static_call
11574750aa7SEric Biggers
11674750aa7SEric Biggers	shl		$6, NUM_BLKS		/*  convert to bytes */
11774750aa7SEric Biggers	jz		.Ldone_hash
11874750aa7SEric Biggers	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
11974750aa7SEric Biggers
12074750aa7SEric Biggers	/*
12174750aa7SEric Biggers	 * load initial hash values
12274750aa7SEric Biggers	 * Need to reorder these appropriately
12374750aa7SEric Biggers	 * DCBA, HGFE -> ABEF, CDGH
12474750aa7SEric Biggers	 */
12574750aa7SEric Biggers	movdqu		0*16(STATE_PTR), STATE0		/* DCBA */
12674750aa7SEric Biggers	movdqu		1*16(STATE_PTR), STATE1		/* HGFE */
12774750aa7SEric Biggers
12874750aa7SEric Biggers	movdqa		STATE0, TMP
12974750aa7SEric Biggers	punpcklqdq	STATE1, STATE0			/* FEBA */
13074750aa7SEric Biggers	punpckhqdq	TMP, STATE1			/* DCHG */
13174750aa7SEric Biggers	pshufd		$0x1B, STATE0, STATE0		/* ABEF */
13274750aa7SEric Biggers	pshufd		$0xB1, STATE1, STATE1		/* CDGH */
13374750aa7SEric Biggers
13474750aa7SEric Biggers	movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
13574750aa7SEric Biggers	lea		K256+32*4(%rip), SHA256CONSTANTS
13674750aa7SEric Biggers
13774750aa7SEric Biggers.Lloop0:
13874750aa7SEric Biggers	/* Save hash values for addition after rounds */
13974750aa7SEric Biggers	movdqa		STATE0, ABEF_SAVE
14074750aa7SEric Biggers	movdqa		STATE1, CDGH_SAVE
14174750aa7SEric Biggers
14274750aa7SEric Biggers.irp i, 0, 16, 32, 48
14374750aa7SEric Biggers	do_4rounds	(\i + 0),  MSG0, MSG1, MSG2, MSG3
14474750aa7SEric Biggers	do_4rounds	(\i + 4),  MSG1, MSG2, MSG3, MSG0
14574750aa7SEric Biggers	do_4rounds	(\i + 8),  MSG2, MSG3, MSG0, MSG1
14674750aa7SEric Biggers	do_4rounds	(\i + 12), MSG3, MSG0, MSG1, MSG2
14774750aa7SEric Biggers.endr
14874750aa7SEric Biggers
14974750aa7SEric Biggers	/* Add current hash values with previously saved */
15074750aa7SEric Biggers	paddd		ABEF_SAVE, STATE0
15174750aa7SEric Biggers	paddd		CDGH_SAVE, STATE1
15274750aa7SEric Biggers
15374750aa7SEric Biggers	/* Increment data pointer and loop if more to process */
15474750aa7SEric Biggers	add		$64, DATA_PTR
15574750aa7SEric Biggers	cmp		NUM_BLKS, DATA_PTR
15674750aa7SEric Biggers	jne		.Lloop0
15774750aa7SEric Biggers
15874750aa7SEric Biggers	/* Write hash values back in the correct order */
15974750aa7SEric Biggers	movdqa		STATE0, TMP
16074750aa7SEric Biggers	punpcklqdq	STATE1, STATE0			/* GHEF */
16174750aa7SEric Biggers	punpckhqdq	TMP, STATE1			/* ABCD */
16274750aa7SEric Biggers	pshufd		$0xB1, STATE0, STATE0		/* HGFE */
16374750aa7SEric Biggers	pshufd		$0x1B, STATE1, STATE1		/* DCBA */
16474750aa7SEric Biggers
16574750aa7SEric Biggers	movdqu		STATE1, 0*16(STATE_PTR)
16674750aa7SEric Biggers	movdqu		STATE0, 1*16(STATE_PTR)
16774750aa7SEric Biggers
16874750aa7SEric Biggers.Ldone_hash:
16974750aa7SEric Biggers
17074750aa7SEric Biggers	RET
17174750aa7SEric BiggersSYM_FUNC_END(sha256_ni_transform)
17274750aa7SEric Biggers
17374750aa7SEric Biggers.section	.rodata.cst256.K256, "aM", @progbits, 256
17474750aa7SEric Biggers.align 64
17574750aa7SEric BiggersK256:
17674750aa7SEric Biggers	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
17774750aa7SEric Biggers	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
17874750aa7SEric Biggers	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
17974750aa7SEric Biggers	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
18074750aa7SEric Biggers	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
18174750aa7SEric Biggers	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
18274750aa7SEric Biggers	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
18374750aa7SEric Biggers	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
18474750aa7SEric Biggers	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
18574750aa7SEric Biggers	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
18674750aa7SEric Biggers	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
18774750aa7SEric Biggers	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
18874750aa7SEric Biggers	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
18974750aa7SEric Biggers	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
19074750aa7SEric Biggers	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
19174750aa7SEric Biggers	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
19274750aa7SEric Biggers
19374750aa7SEric Biggers.section	.rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
19474750aa7SEric Biggers.align 16
19574750aa7SEric BiggersPSHUFFLE_BYTE_FLIP_MASK:
19674750aa7SEric Biggers	.octa 0x0c0d0e0f08090a0b0405060700010203
197