xref: /linux/lib/crypto/powerpc/ghashp8-ppc.pl (revision c4dde411bc366f568dbe33366253bbfea049e8ea)
1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3
4# This code is taken from the OpenSSL project but the author (Andy Polyakov)
5# has relicensed it under the GPLv2. Therefore this program is free software;
6# you can redistribute it and/or modify it under the terms of the GNU General
7# Public License version 2 as published by the Free Software Foundation.
8#
9# The original headers, including the original license headers, are
10# included below for completeness.
11
12# ====================================================================
13# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
14# project. The module is, however, dual licensed under OpenSSL and
15# CRYPTOGAMS licenses depending on where you obtain it. For further
16# details see https://www.openssl.org/~appro/cryptogams/.
17# ====================================================================
18#
19# GHASH for PowerISA v2.07.
20#
21# July 2014
22#
23# Accurate performance measurements are problematic, because it's
24# always virtualized setup with possibly throttled processor.
25# Relative comparison is therefore more informative. This initial
26# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
27# faster than "4-bit" integer-only compiler-generated 64-bit code.
28# "Initial version" means that there is room for futher improvement.
29
30$flavour=shift;
31$output =shift;
32
33if ($flavour =~ /64/) {
34	$SIZE_T=8;
35	$LRSAVE=2*$SIZE_T;
36	$STU="stdu";
37	$POP="ld";
38	$PUSH="std";
39} elsif ($flavour =~ /32/) {
40	$SIZE_T=4;
41	$LRSAVE=$SIZE_T;
42	$STU="stwu";
43	$POP="lwz";
44	$PUSH="stw";
45} else { die "nonsense $flavour"; }
46
47$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
48( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
49( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
50( $xlate="${dir}../../../arch/powerpc/crypto/ppc-xlate.pl" and -f $xlate) or
51die "can't locate ppc-xlate.pl";
52
53open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
54
55my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6));	# argument block
56
57my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
58my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
59my $vrsave="r12";
60
61$code=<<___;
62.machine	"any"
63
64.text
65
66.globl	.gcm_init_p8
67	lis		r0,0xfff0
68	li		r8,0x10
69	mfspr		$vrsave,256
70	li		r9,0x20
71	mtspr		256,r0
72	li		r10,0x30
73	lvx_u		$H,0,r4			# load H
74	le?xor		r7,r7,r7
75	le?addi		r7,r7,0x8		# need a vperm start with 08
76	le?lvsr		5,0,r7
77	le?vspltisb	6,0x0f
78	le?vxor		5,5,6			# set a b-endian mask
79	le?vperm	$H,$H,$H,5
80
81	vspltisb	$xC2,-16		# 0xf0
82	vspltisb	$t0,1			# one
83	vaddubm		$xC2,$xC2,$xC2		# 0xe0
84	vxor		$zero,$zero,$zero
85	vor		$xC2,$xC2,$t0		# 0xe1
86	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
87	vsldoi		$t1,$zero,$t0,1		# ...1
88	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
89	vspltisb	$t2,7
90	vor		$xC2,$xC2,$t1		# 0xc2....01
91	vspltb		$t1,$H,0		# most significant byte
92	vsl		$H,$H,$t0		# H<<=1
93	vsrab		$t1,$t1,$t2		# broadcast carry bit
94	vand		$t1,$t1,$xC2
95	vxor		$H,$H,$t1		# twisted H
96
97	vsldoi		$H,$H,$H,8		# twist even more ...
98	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
99	vsldoi		$Hl,$zero,$H,8		# ... and split
100	vsldoi		$Hh,$H,$zero,8
101
102	stvx_u		$xC2,0,r3		# save pre-computed table
103	stvx_u		$Hl,r8,r3
104	stvx_u		$H, r9,r3
105	stvx_u		$Hh,r10,r3
106
107	mtspr		256,$vrsave
108	blr
109	.long		0
110	.byte		0,12,0x14,0,0,0,2,0
111	.long		0
112.size	.gcm_init_p8,.-.gcm_init_p8
113
114.globl	.gcm_gmult_p8
115	lis		r0,0xfff8
116	li		r8,0x10
117	mfspr		$vrsave,256
118	li		r9,0x20
119	mtspr		256,r0
120	li		r10,0x30
121	lvx_u		$IN,0,$Xip		# load Xi
122
123	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
124	 le?lvsl	$lemask,r0,r0
125	lvx_u		$H, r9,$Htbl
126	 le?vspltisb	$t0,0x07
127	lvx_u		$Hh,r10,$Htbl
128	 le?vxor	$lemask,$lemask,$t0
129	lvx_u		$xC2,0,$Htbl
130	 le?vperm	$IN,$IN,$IN,$lemask
131	vxor		$zero,$zero,$zero
132
133	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
134	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
135	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
136
137	vpmsumd		$t2,$Xl,$xC2		# 1st phase
138
139	vsldoi		$t0,$Xm,$zero,8
140	vsldoi		$t1,$zero,$Xm,8
141	vxor		$Xl,$Xl,$t0
142	vxor		$Xh,$Xh,$t1
143
144	vsldoi		$Xl,$Xl,$Xl,8
145	vxor		$Xl,$Xl,$t2
146
147	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
148	vpmsumd		$Xl,$Xl,$xC2
149	vxor		$t1,$t1,$Xh
150	vxor		$Xl,$Xl,$t1
151
152	le?vperm	$Xl,$Xl,$Xl,$lemask
153	stvx_u		$Xl,0,$Xip		# write out Xi
154
155	mtspr		256,$vrsave
156	blr
157	.long		0
158	.byte		0,12,0x14,0,0,0,2,0
159	.long		0
160.size	.gcm_gmult_p8,.-.gcm_gmult_p8
161
162.globl	.gcm_ghash_p8
163	lis		r0,0xfff8
164	li		r8,0x10
165	mfspr		$vrsave,256
166	li		r9,0x20
167	mtspr		256,r0
168	li		r10,0x30
169	lvx_u		$Xl,0,$Xip		# load Xi
170
171	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
172	 le?lvsl	$lemask,r0,r0
173	lvx_u		$H, r9,$Htbl
174	 le?vspltisb	$t0,0x07
175	lvx_u		$Hh,r10,$Htbl
176	 le?vxor	$lemask,$lemask,$t0
177	lvx_u		$xC2,0,$Htbl
178	 le?vperm	$Xl,$Xl,$Xl,$lemask
179	vxor		$zero,$zero,$zero
180
181	lvx_u		$IN,0,$inp
182	addi		$inp,$inp,16
183	subi		$len,$len,16
184	 le?vperm	$IN,$IN,$IN,$lemask
185	vxor		$IN,$IN,$Xl
186	b		Loop
187
188.align	5
189Loop:
190	 subic		$len,$len,16
191	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
192	 subfe.		r0,r0,r0		# borrow?-1:0
193	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
194	 and		r0,r0,$len
195	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
196	 add		$inp,$inp,r0
197
198	vpmsumd		$t2,$Xl,$xC2		# 1st phase
199
200	vsldoi		$t0,$Xm,$zero,8
201	vsldoi		$t1,$zero,$Xm,8
202	vxor		$Xl,$Xl,$t0
203	vxor		$Xh,$Xh,$t1
204
205	vsldoi		$Xl,$Xl,$Xl,8
206	vxor		$Xl,$Xl,$t2
207	 lvx_u		$IN,0,$inp
208	 addi		$inp,$inp,16
209
210	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
211	vpmsumd		$Xl,$Xl,$xC2
212	 le?vperm	$IN,$IN,$IN,$lemask
213	vxor		$t1,$t1,$Xh
214	vxor		$IN,$IN,$t1
215	vxor		$IN,$IN,$Xl
216	beq		Loop			# did $len-=16 borrow?
217
218	vxor		$Xl,$Xl,$t1
219	le?vperm	$Xl,$Xl,$Xl,$lemask
220	stvx_u		$Xl,0,$Xip		# write out Xi
221
222	mtspr		256,$vrsave
223	blr
224	.long		0
225	.byte		0,12,0x14,0,0,0,4,0
226	.long		0
227.size	.gcm_ghash_p8,.-.gcm_ghash_p8
228
229.asciz  "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
230.align  2
231___
232
233foreach (split("\n",$code)) {
234	if ($flavour =~ /le$/o) {	# little-endian
235	    s/le\?//o		or
236	    s/be\?/#be#/o;
237	} else {
238	    s/le\?/#le#/o	or
239	    s/be\?//o;
240	}
241	print $_,"\n";
242}
243
244close STDOUT; # enforce flush
245