xref: /linux/arch/powerpc/crypto/ghashp8-ppc.pl (revision 71dfa617ea9f18e4585fe78364217cd32b1fc382)
1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3
4# This code is taken from the OpenSSL project but the author (Andy Polyakov)
5# has relicensed it under the GPLv2. Therefore this program is free software;
6# you can redistribute it and/or modify it under the terms of the GNU General
7# Public License version 2 as published by the Free Software Foundation.
8#
9# The original headers, including the original license headers, are
10# included below for completeness.
11
12# ====================================================================
13# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
14# project. The module is, however, dual licensed under OpenSSL and
15# CRYPTOGAMS licenses depending on where you obtain it. For further
16# details see https://www.openssl.org/~appro/cryptogams/.
17# ====================================================================
18#
19# GHASH for PowerISA v2.07.
20#
21# July 2014
22#
23# Accurate performance measurements are problematic, because it's
24# always virtualized setup with possibly throttled processor.
25# Relative comparison is therefore more informative. This initial
26# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
27# faster than "4-bit" integer-only compiler-generated 64-bit code.
28# "Initial version" means that there is room for futher improvement.
29
30$flavour=shift;
31$output =shift;
32
33if ($flavour =~ /64/) {
34	$SIZE_T=8;
35	$LRSAVE=2*$SIZE_T;
36	$STU="stdu";
37	$POP="ld";
38	$PUSH="std";
39} elsif ($flavour =~ /32/) {
40	$SIZE_T=4;
41	$LRSAVE=$SIZE_T;
42	$STU="stwu";
43	$POP="lwz";
44	$PUSH="stw";
45} else { die "nonsense $flavour"; }
46
47$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
48( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
49( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
50die "can't locate ppc-xlate.pl";
51
52open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
53
54my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6));	# argument block
55
56my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
57my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
58my $vrsave="r12";
59
60$code=<<___;
61.machine	"any"
62
63.text
64
65.globl	.gcm_init_p8
66	lis		r0,0xfff0
67	li		r8,0x10
68	mfspr		$vrsave,256
69	li		r9,0x20
70	mtspr		256,r0
71	li		r10,0x30
72	lvx_u		$H,0,r4			# load H
73	le?xor		r7,r7,r7
74	le?addi		r7,r7,0x8		# need a vperm start with 08
75	le?lvsr		5,0,r7
76	le?vspltisb	6,0x0f
77	le?vxor		5,5,6			# set a b-endian mask
78	le?vperm	$H,$H,$H,5
79
80	vspltisb	$xC2,-16		# 0xf0
81	vspltisb	$t0,1			# one
82	vaddubm		$xC2,$xC2,$xC2		# 0xe0
83	vxor		$zero,$zero,$zero
84	vor		$xC2,$xC2,$t0		# 0xe1
85	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
86	vsldoi		$t1,$zero,$t0,1		# ...1
87	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
88	vspltisb	$t2,7
89	vor		$xC2,$xC2,$t1		# 0xc2....01
90	vspltb		$t1,$H,0		# most significant byte
91	vsl		$H,$H,$t0		# H<<=1
92	vsrab		$t1,$t1,$t2		# broadcast carry bit
93	vand		$t1,$t1,$xC2
94	vxor		$H,$H,$t1		# twisted H
95
96	vsldoi		$H,$H,$H,8		# twist even more ...
97	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
98	vsldoi		$Hl,$zero,$H,8		# ... and split
99	vsldoi		$Hh,$H,$zero,8
100
101	stvx_u		$xC2,0,r3		# save pre-computed table
102	stvx_u		$Hl,r8,r3
103	stvx_u		$H, r9,r3
104	stvx_u		$Hh,r10,r3
105
106	mtspr		256,$vrsave
107	blr
108	.long		0
109	.byte		0,12,0x14,0,0,0,2,0
110	.long		0
111.size	.gcm_init_p8,.-.gcm_init_p8
112
113.globl	.gcm_gmult_p8
114	lis		r0,0xfff8
115	li		r8,0x10
116	mfspr		$vrsave,256
117	li		r9,0x20
118	mtspr		256,r0
119	li		r10,0x30
120	lvx_u		$IN,0,$Xip		# load Xi
121
122	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
123	 le?lvsl	$lemask,r0,r0
124	lvx_u		$H, r9,$Htbl
125	 le?vspltisb	$t0,0x07
126	lvx_u		$Hh,r10,$Htbl
127	 le?vxor	$lemask,$lemask,$t0
128	lvx_u		$xC2,0,$Htbl
129	 le?vperm	$IN,$IN,$IN,$lemask
130	vxor		$zero,$zero,$zero
131
132	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
133	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
134	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
135
136	vpmsumd		$t2,$Xl,$xC2		# 1st phase
137
138	vsldoi		$t0,$Xm,$zero,8
139	vsldoi		$t1,$zero,$Xm,8
140	vxor		$Xl,$Xl,$t0
141	vxor		$Xh,$Xh,$t1
142
143	vsldoi		$Xl,$Xl,$Xl,8
144	vxor		$Xl,$Xl,$t2
145
146	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
147	vpmsumd		$Xl,$Xl,$xC2
148	vxor		$t1,$t1,$Xh
149	vxor		$Xl,$Xl,$t1
150
151	le?vperm	$Xl,$Xl,$Xl,$lemask
152	stvx_u		$Xl,0,$Xip		# write out Xi
153
154	mtspr		256,$vrsave
155	blr
156	.long		0
157	.byte		0,12,0x14,0,0,0,2,0
158	.long		0
159.size	.gcm_gmult_p8,.-.gcm_gmult_p8
160
161.globl	.gcm_ghash_p8
162	lis		r0,0xfff8
163	li		r8,0x10
164	mfspr		$vrsave,256
165	li		r9,0x20
166	mtspr		256,r0
167	li		r10,0x30
168	lvx_u		$Xl,0,$Xip		# load Xi
169
170	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
171	 le?lvsl	$lemask,r0,r0
172	lvx_u		$H, r9,$Htbl
173	 le?vspltisb	$t0,0x07
174	lvx_u		$Hh,r10,$Htbl
175	 le?vxor	$lemask,$lemask,$t0
176	lvx_u		$xC2,0,$Htbl
177	 le?vperm	$Xl,$Xl,$Xl,$lemask
178	vxor		$zero,$zero,$zero
179
180	lvx_u		$IN,0,$inp
181	addi		$inp,$inp,16
182	subi		$len,$len,16
183	 le?vperm	$IN,$IN,$IN,$lemask
184	vxor		$IN,$IN,$Xl
185	b		Loop
186
187.align	5
188Loop:
189	 subic		$len,$len,16
190	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
191	 subfe.		r0,r0,r0		# borrow?-1:0
192	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
193	 and		r0,r0,$len
194	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
195	 add		$inp,$inp,r0
196
197	vpmsumd		$t2,$Xl,$xC2		# 1st phase
198
199	vsldoi		$t0,$Xm,$zero,8
200	vsldoi		$t1,$zero,$Xm,8
201	vxor		$Xl,$Xl,$t0
202	vxor		$Xh,$Xh,$t1
203
204	vsldoi		$Xl,$Xl,$Xl,8
205	vxor		$Xl,$Xl,$t2
206	 lvx_u		$IN,0,$inp
207	 addi		$inp,$inp,16
208
209	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
210	vpmsumd		$Xl,$Xl,$xC2
211	 le?vperm	$IN,$IN,$IN,$lemask
212	vxor		$t1,$t1,$Xh
213	vxor		$IN,$IN,$t1
214	vxor		$IN,$IN,$Xl
215	beq		Loop			# did $len-=16 borrow?
216
217	vxor		$Xl,$Xl,$t1
218	le?vperm	$Xl,$Xl,$Xl,$lemask
219	stvx_u		$Xl,0,$Xip		# write out Xi
220
221	mtspr		256,$vrsave
222	blr
223	.long		0
224	.byte		0,12,0x14,0,0,0,4,0
225	.long		0
226.size	.gcm_ghash_p8,.-.gcm_ghash_p8
227
228.asciz  "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
229.align  2
230___
231
232foreach (split("\n",$code)) {
233	if ($flavour =~ /le$/o) {	# little-endian
234	    s/le\?//o		or
235	    s/be\?/#be#/o;
236	} else {
237	    s/le\?/#le#/o	or
238	    s/be\?//o;
239	}
240	print $_,"\n";
241}
242
243close STDOUT; # enforce flush
244