xref: /freebsd/crypto/openssl/crypto/poly1305/asm/poly1305-s390x.pl (revision b89a7cc2ed6e4398d5be502f5bb5885d1ec6ff0f)
1#! /usr/bin/env perl
2# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9#
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# This module implements Poly1305 hash for s390x.
18#
19# June 2015
20#
21# ~6.6/2.3 cpb on z10/z196+, >2x improvement over compiler-generated
22# code. For older compiler improvement coefficient is >3x, because
23# then base 2^64 and base 2^32 implementations are compared.
24#
25# On side note, z13 enables vector base 2^26 implementation...
26
27$flavour = shift;
28
29if ($flavour =~ /3[12]/) {
30	$SIZE_T=4;
31	$g="";
32} else {
33	$SIZE_T=8;
34	$g="g";
35}
36
37while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
38open STDOUT,">$output";
39
40$sp="%r15";
41
42my ($ctx,$inp,$len,$padbit) = map("%r$_",(2..5));
43
44$code.=<<___;
45.text
46
47.globl	poly1305_init
48.type	poly1305_init,\@function
49.align	16
50poly1305_init:
51	lghi	%r0,0
52	lghi	%r1,-1
53	stg	%r0,0($ctx)		# zero hash value
54	stg	%r0,8($ctx)
55	stg	%r0,16($ctx)
56
57	cl${g}r	$inp,%r0
58	je	.Lno_key
59
60	lrvg	%r4,0($inp)		# load little-endian key
61	lrvg	%r5,8($inp)
62
63	nihl	%r1,0xffc0		# 0xffffffc0ffffffff
64	srlg	%r0,%r1,4		# 0x0ffffffc0fffffff
65	srlg	%r1,%r1,4
66	nill	%r1,0xfffc		# 0x0ffffffc0ffffffc
67
68	ngr	%r4,%r0
69	ngr	%r5,%r1
70
71	stg	%r4,32($ctx)
72	stg	%r5,40($ctx)
73
74.Lno_key:
75	lghi	%r2,0
76	br	%r14
77.size	poly1305_init,.-poly1305_init
78___
79{
80my ($d0hi,$d0lo,$d1hi,$d1lo,$t0,$h0,$t1,$h1,$h2) = map("%r$_",(6..14));
81my ($r0,$r1,$s1) = map("%r$_",(0..2));
82
83$code.=<<___;
84.globl	poly1305_blocks
85.type	poly1305_blocks,\@function
86.align	16
87poly1305_blocks:
88	srl${g}	$len,4			# fixed-up in 64-bit build
89	lghi	%r0,0
90	cl${g}r	$len,%r0
91	je	.Lno_data
92
93	stm${g}	%r6,%r14,`6*$SIZE_T`($sp)
94
95	llgfr   $padbit,$padbit		# clear upper half, much needed with
96					# non-64-bit ABI
97	lg	$r0,32($ctx)		# load key
98	lg	$r1,40($ctx)
99
100	lg	$h0,0($ctx)		# load hash value
101	lg	$h1,8($ctx)
102	lg	$h2,16($ctx)
103
104	st$g	$ctx,`2*$SIZE_T`($sp)	# off-load $ctx
105	srlg	$s1,$r1,2
106	algr	$s1,$r1			# s1 = r1 + r1>>2
107	j	.Loop
108
109.align	16
110.Loop:
111	lrvg	$d0lo,0($inp)		# load little-endian input
112	lrvg	$d1lo,8($inp)
113	la	$inp,16($inp)
114
115	algr	$d0lo,$h0		# accumulate input
116	alcgr	$d1lo,$h1
117
118	lgr	$h0,$d0lo
119	mlgr	$d0hi,$r0		# h0*r0	  -> $d0hi:$d0lo
120	lgr	$h1,$d1lo
121	mlgr	$d1hi,$s1		# h1*5*r1 -> $d1hi:$d1lo
122
123	mlgr	$t0,$r1			# h0*r1   -> $t0:$h0
124	mlgr	$t1,$r0			# h1*r0   -> $t1:$h1
125	alcgr	$h2,$padbit
126
127	algr	$d0lo,$d1lo
128	lgr	$d1lo,$h2
129	alcgr	$d0hi,$d1hi
130	lghi	$d1hi,0
131
132	algr	$h1,$h0
133	alcgr	$t1,$t0
134
135	msgr	$d1lo,$s1		# h2*s1
136	msgr	$h2,$r0			# h2*r0
137
138	algr	$h1,$d1lo
139	alcgr	$t1,$d1hi		# $d1hi is zero
140
141	algr	$h1,$d0hi
142	alcgr	$h2,$t1
143
144	lghi	$h0,-4			# final reduction step
145	ngr	$h0,$h2
146	srlg	$t0,$h2,2
147	algr	$h0,$t0
148	lghi	$t1,3
149	ngr	$h2,$t1
150
151	algr	$h0,$d0lo
152	alcgr	$h1,$d1hi		# $d1hi is still zero
153	alcgr	$h2,$d1hi		# $d1hi is still zero
154
155	brct$g	$len,.Loop
156
157	l$g	$ctx,`2*$SIZE_T`($sp)	# restore $ctx
158
159	stg	$h0,0($ctx)		# store hash value
160	stg	$h1,8($ctx)
161	stg	$h2,16($ctx)
162
163	lm${g}	%r6,%r14,`6*$SIZE_T`($sp)
164.Lno_data:
165	br	%r14
166.size	poly1305_blocks,.-poly1305_blocks
167___
168}
169{
170my ($mac,$nonce)=($inp,$len);
171my ($h0,$h1,$h2,$d0,$d1)=map("%r$_",(5..9));
172
173$code.=<<___;
174.globl	poly1305_emit
175.type	poly1305_emit,\@function
176.align	16
177poly1305_emit:
178	stm${g}	%r6,%r9,`6*$SIZE_T`($sp)
179
180	lg	$h0,0($ctx)
181	lg	$h1,8($ctx)
182	lg	$h2,16($ctx)
183
184	lghi	%r0,5
185	lghi	%r1,0
186	lgr	$d0,$h0
187	lgr	$d1,$h1
188
189	algr	$h0,%r0			# compare to modulus
190	alcgr	$h1,%r1
191	alcgr	$h2,%r1
192
193	srlg	$h2,$h2,2		# did it borrow/carry?
194	slgr	%r1,$h2			# 0-$h2>>2
195	lg	$h2,0($nonce)		# load nonce
196	lghi	%r0,-1
197	lg	$ctx,8($nonce)
198	xgr	%r0,%r1			# ~%r1
199
200	ngr	$h0,%r1
201	ngr	$d0,%r0
202	ngr	$h1,%r1
203	ngr	$d1,%r0
204	ogr	$h0,$d0
205	rllg	$d0,$h2,32		# flip nonce words
206	ogr	$h1,$d1
207	rllg	$d1,$ctx,32
208
209	algr	$h0,$d0			# accumulate nonce
210	alcgr	$h1,$d1
211
212	strvg	$h0,0($mac)		# write little-endian result
213	strvg	$h1,8($mac)
214
215	lm${g}	%r6,%r9,`6*$SIZE_T`($sp)
216	br	%r14
217.size	poly1305_emit,.-poly1305_emit
218
219.string	"Poly1305 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
220___
221}
222
223$code =~ s/\`([^\`]*)\`/eval $1/gem;
224$code =~ s/\b(srlg\s+)(%r[0-9]+\s*,)\s*([0-9]+)/$1$2$2$3/gm;
225
226print $code;
227close STDOUT;
228