xref: /linux/arch/powerpc/crypto/aesp8-ppc.pl (revision 0526b56cbc3c489642bd6a5fe4b718dea7ef0ee8)
1#! /usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3
4# This code is taken from CRYPTOGAMs[1] and is included here using the option
5# in the license to distribute the code under the GPL. Therefore this program
6# is free software; you can redistribute it and/or modify it under the terms of
7# the GNU General Public License version 2 as published by the Free Software
8# Foundation.
9#
10# [1] https://www.openssl.org/~appro/cryptogams/
11
12# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
13# All rights reserved.
14#
15# Redistribution and use in source and binary forms, with or without
16# modification, are permitted provided that the following conditions
17# are met:
18#
19#       * Redistributions of source code must retain copyright notices,
20#         this list of conditions and the following disclaimer.
21#
22#       * Redistributions in binary form must reproduce the above
23#         copyright notice, this list of conditions and the following
24#         disclaimer in the documentation and/or other materials
25#         provided with the distribution.
26#
27#       * Neither the name of the CRYPTOGAMS nor the names of its
28#         copyright holder and contributors may be used to endorse or
29#         promote products derived from this software without specific
30#         prior written permission.
31#
32# ALTERNATIVELY, provided that this notice is retained in full, this
33# product may be distributed under the terms of the GNU General Public
34# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
35# those given above.
36#
37# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48
49# ====================================================================
50# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
51# project. The module is, however, dual licensed under OpenSSL and
52# CRYPTOGAMS licenses depending on where you obtain it. For further
53# details see https://www.openssl.org/~appro/cryptogams/.
54# ====================================================================
55#
56# This module implements support for AES instructions as per PowerISA
57# specification version 2.07, first implemented by POWER8 processor.
58# The module is endian-agnostic in sense that it supports both big-
59# and little-endian cases. Data alignment in parallelizable modes is
60# handled with VSX loads and stores, which implies MSR.VSX flag being
61# set. It should also be noted that ISA specification doesn't prohibit
62# alignment exceptions for these instructions on page boundaries.
63# Initially alignment was handled in pure AltiVec/VMX way [when data
64# is aligned programmatically, which in turn guarantees exception-
65# free execution], but it turned to hamper performance when vcipher
66# instructions are interleaved. It's reckoned that eventual
67# misalignment penalties at page boundaries are in average lower
68# than additional overhead in pure AltiVec approach.
69#
70# May 2016
71#
72# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73# systems were measured.
74#
75######################################################################
76# Current large-block performance in cycles per byte processed with
77# 128-bit key (less is better).
78#
79#		CBC en-/decrypt	CTR	XTS
80# POWER8[le]	3.96/0.72	0.74	1.1
81# POWER8[be]	3.75/0.65	0.66	1.0
82
83$flavour = shift;
84
85if ($flavour =~ /64/) {
86	$SIZE_T	=8;
87	$LRSAVE	=2*$SIZE_T;
88	$STU	="stdu";
89	$POP	="ld";
90	$PUSH	="std";
91	$UCMP	="cmpld";
92	$SHL	="sldi";
93} elsif ($flavour =~ /32/) {
94	$SIZE_T	=4;
95	$LRSAVE	=$SIZE_T;
96	$STU	="stwu";
97	$POP	="lwz";
98	$PUSH	="stw";
99	$UCMP	="cmplw";
100	$SHL	="slwi";
101} else { die "nonsense $flavour"; }
102
103$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
104
105$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108die "can't locate ppc-xlate.pl";
109
110open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
111
112$FRAME=8*$SIZE_T;
113$prefix="aes_p8";
114
115$sp="r1";
116$vrsave="r12";
117
118#########################################################################
119{{{	# Key setup procedures						#
120my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
123
124$code.=<<___;
125.machine	"any"
126
127.text
128
129.align	7
130rcon:
131.long	0x01000000, 0x01000000, 0x01000000, 0x01000000	?rev
132.long	0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000	?rev
133.long	0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c	?rev
134.long	0,0,0,0						?asis
135Lconsts:
136	mflr	r0
137	bcl	20,31,\$+4
138	mflr	$ptr	 #vvvvv "distance between . and rcon
139	addi	$ptr,$ptr,-0x48
140	mtlr	r0
141	blr
142	.long	0
143	.byte	0,12,0x14,0,0,0,0,0
144.asciz	"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
145
146.globl	.${prefix}_set_encrypt_key
147Lset_encrypt_key:
148	mflr		r11
149	$PUSH		r11,$LRSAVE($sp)
150
151	li		$ptr,-1
152	${UCMP}i	$inp,0
153	beq-		Lenc_key_abort		# if ($inp==0) return -1;
154	${UCMP}i	$out,0
155	beq-		Lenc_key_abort		# if ($out==0) return -1;
156	li		$ptr,-2
157	cmpwi		$bits,128
158	blt-		Lenc_key_abort
159	cmpwi		$bits,256
160	bgt-		Lenc_key_abort
161	andi.		r0,$bits,0x3f
162	bne-		Lenc_key_abort
163
164	lis		r0,0xfff0
165	mfspr		$vrsave,256
166	mtspr		256,r0
167
168	bl		Lconsts
169	mtlr		r11
170
171	neg		r9,$inp
172	lvx		$in0,0,$inp
173	addi		$inp,$inp,15		# 15 is not typo
174	lvsr		$key,0,r9		# borrow $key
175	li		r8,0x20
176	cmpwi		$bits,192
177	lvx		$in1,0,$inp
178	le?vspltisb	$mask,0x0f		# borrow $mask
179	lvx		$rcon,0,$ptr
180	le?vxor		$key,$key,$mask		# adjust for byte swap
181	lvx		$mask,r8,$ptr
182	addi		$ptr,$ptr,0x10
183	vperm		$in0,$in0,$in1,$key	# align [and byte swap in LE]
184	li		$cnt,8
185	vxor		$zero,$zero,$zero
186	mtctr		$cnt
187
188	?lvsr		$outperm,0,$out
189	vspltisb	$outmask,-1
190	lvx		$outhead,0,$out
191	?vperm		$outmask,$zero,$outmask,$outperm
192
193	blt		Loop128
194	addi		$inp,$inp,8
195	beq		L192
196	addi		$inp,$inp,8
197	b		L256
198
199.align	4
200Loop128:
201	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
202	vsldoi		$tmp,$zero,$in0,12	# >>32
203	 vperm		$outtail,$in0,$in0,$outperm	# rotate
204	 vsel		$stage,$outhead,$outtail,$outmask
205	 vmr		$outhead,$outtail
206	vcipherlast	$key,$key,$rcon
207	 stvx		$stage,0,$out
208	 addi		$out,$out,16
209
210	vxor		$in0,$in0,$tmp
211	vsldoi		$tmp,$zero,$tmp,12	# >>32
212	vxor		$in0,$in0,$tmp
213	vsldoi		$tmp,$zero,$tmp,12	# >>32
214	vxor		$in0,$in0,$tmp
215	 vadduwm	$rcon,$rcon,$rcon
216	vxor		$in0,$in0,$key
217	bdnz		Loop128
218
219	lvx		$rcon,0,$ptr		# last two round keys
220
221	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
222	vsldoi		$tmp,$zero,$in0,12	# >>32
223	 vperm		$outtail,$in0,$in0,$outperm	# rotate
224	 vsel		$stage,$outhead,$outtail,$outmask
225	 vmr		$outhead,$outtail
226	vcipherlast	$key,$key,$rcon
227	 stvx		$stage,0,$out
228	 addi		$out,$out,16
229
230	vxor		$in0,$in0,$tmp
231	vsldoi		$tmp,$zero,$tmp,12	# >>32
232	vxor		$in0,$in0,$tmp
233	vsldoi		$tmp,$zero,$tmp,12	# >>32
234	vxor		$in0,$in0,$tmp
235	 vadduwm	$rcon,$rcon,$rcon
236	vxor		$in0,$in0,$key
237
238	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
239	vsldoi		$tmp,$zero,$in0,12	# >>32
240	 vperm		$outtail,$in0,$in0,$outperm	# rotate
241	 vsel		$stage,$outhead,$outtail,$outmask
242	 vmr		$outhead,$outtail
243	vcipherlast	$key,$key,$rcon
244	 stvx		$stage,0,$out
245	 addi		$out,$out,16
246
247	vxor		$in0,$in0,$tmp
248	vsldoi		$tmp,$zero,$tmp,12	# >>32
249	vxor		$in0,$in0,$tmp
250	vsldoi		$tmp,$zero,$tmp,12	# >>32
251	vxor		$in0,$in0,$tmp
252	vxor		$in0,$in0,$key
253	 vperm		$outtail,$in0,$in0,$outperm	# rotate
254	 vsel		$stage,$outhead,$outtail,$outmask
255	 vmr		$outhead,$outtail
256	 stvx		$stage,0,$out
257
258	addi		$inp,$out,15		# 15 is not typo
259	addi		$out,$out,0x50
260
261	li		$rounds,10
262	b		Ldone
263
264.align	4
265L192:
266	lvx		$tmp,0,$inp
267	li		$cnt,4
268	 vperm		$outtail,$in0,$in0,$outperm	# rotate
269	 vsel		$stage,$outhead,$outtail,$outmask
270	 vmr		$outhead,$outtail
271	 stvx		$stage,0,$out
272	 addi		$out,$out,16
273	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
274	vspltisb	$key,8			# borrow $key
275	mtctr		$cnt
276	vsububm		$mask,$mask,$key	# adjust the mask
277
278Loop192:
279	vperm		$key,$in1,$in1,$mask	# roate-n-splat
280	vsldoi		$tmp,$zero,$in0,12	# >>32
281	vcipherlast	$key,$key,$rcon
282
283	vxor		$in0,$in0,$tmp
284	vsldoi		$tmp,$zero,$tmp,12	# >>32
285	vxor		$in0,$in0,$tmp
286	vsldoi		$tmp,$zero,$tmp,12	# >>32
287	vxor		$in0,$in0,$tmp
288
289	 vsldoi		$stage,$zero,$in1,8
290	vspltw		$tmp,$in0,3
291	vxor		$tmp,$tmp,$in1
292	vsldoi		$in1,$zero,$in1,12	# >>32
293	 vadduwm	$rcon,$rcon,$rcon
294	vxor		$in1,$in1,$tmp
295	vxor		$in0,$in0,$key
296	vxor		$in1,$in1,$key
297	 vsldoi		$stage,$stage,$in0,8
298
299	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
300	vsldoi		$tmp,$zero,$in0,12	# >>32
301	 vperm		$outtail,$stage,$stage,$outperm	# rotate
302	 vsel		$stage,$outhead,$outtail,$outmask
303	 vmr		$outhead,$outtail
304	vcipherlast	$key,$key,$rcon
305	 stvx		$stage,0,$out
306	 addi		$out,$out,16
307
308	 vsldoi		$stage,$in0,$in1,8
309	vxor		$in0,$in0,$tmp
310	vsldoi		$tmp,$zero,$tmp,12	# >>32
311	 vperm		$outtail,$stage,$stage,$outperm	# rotate
312	 vsel		$stage,$outhead,$outtail,$outmask
313	 vmr		$outhead,$outtail
314	vxor		$in0,$in0,$tmp
315	vsldoi		$tmp,$zero,$tmp,12	# >>32
316	vxor		$in0,$in0,$tmp
317	 stvx		$stage,0,$out
318	 addi		$out,$out,16
319
320	vspltw		$tmp,$in0,3
321	vxor		$tmp,$tmp,$in1
322	vsldoi		$in1,$zero,$in1,12	# >>32
323	 vadduwm	$rcon,$rcon,$rcon
324	vxor		$in1,$in1,$tmp
325	vxor		$in0,$in0,$key
326	vxor		$in1,$in1,$key
327	 vperm		$outtail,$in0,$in0,$outperm	# rotate
328	 vsel		$stage,$outhead,$outtail,$outmask
329	 vmr		$outhead,$outtail
330	 stvx		$stage,0,$out
331	 addi		$inp,$out,15		# 15 is not typo
332	 addi		$out,$out,16
333	bdnz		Loop192
334
335	li		$rounds,12
336	addi		$out,$out,0x20
337	b		Ldone
338
339.align	4
340L256:
341	lvx		$tmp,0,$inp
342	li		$cnt,7
343	li		$rounds,14
344	 vperm		$outtail,$in0,$in0,$outperm	# rotate
345	 vsel		$stage,$outhead,$outtail,$outmask
346	 vmr		$outhead,$outtail
347	 stvx		$stage,0,$out
348	 addi		$out,$out,16
349	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
350	mtctr		$cnt
351
352Loop256:
353	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
354	vsldoi		$tmp,$zero,$in0,12	# >>32
355	 vperm		$outtail,$in1,$in1,$outperm	# rotate
356	 vsel		$stage,$outhead,$outtail,$outmask
357	 vmr		$outhead,$outtail
358	vcipherlast	$key,$key,$rcon
359	 stvx		$stage,0,$out
360	 addi		$out,$out,16
361
362	vxor		$in0,$in0,$tmp
363	vsldoi		$tmp,$zero,$tmp,12	# >>32
364	vxor		$in0,$in0,$tmp
365	vsldoi		$tmp,$zero,$tmp,12	# >>32
366	vxor		$in0,$in0,$tmp
367	 vadduwm	$rcon,$rcon,$rcon
368	vxor		$in0,$in0,$key
369	 vperm		$outtail,$in0,$in0,$outperm	# rotate
370	 vsel		$stage,$outhead,$outtail,$outmask
371	 vmr		$outhead,$outtail
372	 stvx		$stage,0,$out
373	 addi		$inp,$out,15		# 15 is not typo
374	 addi		$out,$out,16
375	bdz		Ldone
376
377	vspltw		$key,$in0,3		# just splat
378	vsldoi		$tmp,$zero,$in1,12	# >>32
379	vsbox		$key,$key
380
381	vxor		$in1,$in1,$tmp
382	vsldoi		$tmp,$zero,$tmp,12	# >>32
383	vxor		$in1,$in1,$tmp
384	vsldoi		$tmp,$zero,$tmp,12	# >>32
385	vxor		$in1,$in1,$tmp
386
387	vxor		$in1,$in1,$key
388	b		Loop256
389
390.align	4
391Ldone:
392	lvx		$in1,0,$inp		# redundant in aligned case
393	vsel		$in1,$outhead,$in1,$outmask
394	stvx		$in1,0,$inp
395	li		$ptr,0
396	mtspr		256,$vrsave
397	stw		$rounds,0($out)
398
399Lenc_key_abort:
400	mr		r3,$ptr
401	blr
402	.long		0
403	.byte		0,12,0x14,1,0,0,3,0
404	.long		0
405.size	.${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
406
407.globl	.${prefix}_set_decrypt_key
408	$STU		$sp,-$FRAME($sp)
409	mflr		r10
410	$PUSH		r10,$FRAME+$LRSAVE($sp)
411	bl		Lset_encrypt_key
412	mtlr		r10
413
414	cmpwi		r3,0
415	bne-		Ldec_key_abort
416
417	slwi		$cnt,$rounds,4
418	subi		$inp,$out,240		# first round key
419	srwi		$rounds,$rounds,1
420	add		$out,$inp,$cnt		# last round key
421	mtctr		$rounds
422
423Ldeckey:
424	lwz		r0, 0($inp)
425	lwz		r6, 4($inp)
426	lwz		r7, 8($inp)
427	lwz		r8, 12($inp)
428	addi		$inp,$inp,16
429	lwz		r9, 0($out)
430	lwz		r10,4($out)
431	lwz		r11,8($out)
432	lwz		r12,12($out)
433	stw		r0, 0($out)
434	stw		r6, 4($out)
435	stw		r7, 8($out)
436	stw		r8, 12($out)
437	subi		$out,$out,16
438	stw		r9, -16($inp)
439	stw		r10,-12($inp)
440	stw		r11,-8($inp)
441	stw		r12,-4($inp)
442	bdnz		Ldeckey
443
444	xor		r3,r3,r3		# return value
445Ldec_key_abort:
446	addi		$sp,$sp,$FRAME
447	blr
448	.long		0
449	.byte		0,12,4,1,0x80,0,3,0
450	.long		0
451.size	.${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
452___
453}}}
454#########################################################################
455{{{	# Single block en- and decrypt procedures			#
456sub gen_block () {
457my $dir = shift;
458my $n   = $dir eq "de" ? "n" : "";
459my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
460
461$code.=<<___;
462.globl	.${prefix}_${dir}crypt
463	lwz		$rounds,240($key)
464	lis		r0,0xfc00
465	mfspr		$vrsave,256
466	li		$idx,15			# 15 is not typo
467	mtspr		256,r0
468
469	lvx		v0,0,$inp
470	neg		r11,$out
471	lvx		v1,$idx,$inp
472	lvsl		v2,0,$inp		# inpperm
473	le?vspltisb	v4,0x0f
474	?lvsl		v3,0,r11		# outperm
475	le?vxor		v2,v2,v4
476	li		$idx,16
477	vperm		v0,v0,v1,v2		# align [and byte swap in LE]
478	lvx		v1,0,$key
479	?lvsl		v5,0,$key		# keyperm
480	srwi		$rounds,$rounds,1
481	lvx		v2,$idx,$key
482	addi		$idx,$idx,16
483	subi		$rounds,$rounds,1
484	?vperm		v1,v1,v2,v5		# align round key
485
486	vxor		v0,v0,v1
487	lvx		v1,$idx,$key
488	addi		$idx,$idx,16
489	mtctr		$rounds
490
491Loop_${dir}c:
492	?vperm		v2,v2,v1,v5
493	v${n}cipher	v0,v0,v2
494	lvx		v2,$idx,$key
495	addi		$idx,$idx,16
496	?vperm		v1,v1,v2,v5
497	v${n}cipher	v0,v0,v1
498	lvx		v1,$idx,$key
499	addi		$idx,$idx,16
500	bdnz		Loop_${dir}c
501
502	?vperm		v2,v2,v1,v5
503	v${n}cipher	v0,v0,v2
504	lvx		v2,$idx,$key
505	?vperm		v1,v1,v2,v5
506	v${n}cipherlast	v0,v0,v1
507
508	vspltisb	v2,-1
509	vxor		v1,v1,v1
510	li		$idx,15			# 15 is not typo
511	?vperm		v2,v1,v2,v3		# outmask
512	le?vxor		v3,v3,v4
513	lvx		v1,0,$out		# outhead
514	vperm		v0,v0,v0,v3		# rotate [and byte swap in LE]
515	vsel		v1,v1,v0,v2
516	lvx		v4,$idx,$out
517	stvx		v1,0,$out
518	vsel		v0,v0,v4,v2
519	stvx		v0,$idx,$out
520
521	mtspr		256,$vrsave
522	blr
523	.long		0
524	.byte		0,12,0x14,0,0,0,3,0
525	.long		0
526.size	.${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
527___
528}
529&gen_block("en");
530&gen_block("de");
531}}}
532
533my $consts=1;
534foreach(split("\n",$code)) {
535        s/\`([^\`]*)\`/eval($1)/geo;
536
537	# constants table endian-specific conversion
538	if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
539	    my $conv=$3;
540	    my @bytes=();
541
542	    # convert to endian-agnostic format
543	    if ($1 eq "long") {
544	      foreach (split(/,\s*/,$2)) {
545		my $l = /^0/?oct:int;
546		push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
547	      }
548	    } else {
549		@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
550	    }
551
552	    # little-endian conversion
553	    if ($flavour =~ /le$/o) {
554		SWITCH: for($conv)  {
555		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
556		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
557		}
558	    }
559
560	    #emit
561	    print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
562	    next;
563	}
564	$consts=0 if (m/Lconsts:/o);	# end of table
565
566	# instructions prefixed with '?' are endian-specific and need
567	# to be adjusted accordingly...
568	if ($flavour =~ /le$/o) {	# little-endian
569	    s/le\?//o		or
570	    s/be\?/#be#/o	or
571	    s/\?lvsr/lvsl/o	or
572	    s/\?lvsl/lvsr/o	or
573	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
574	    s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
575	    s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
576	} else {			# big-endian
577	    s/le\?/#le#/o	or
578	    s/be\?//o		or
579	    s/\?([a-z]+)/$1/o;
580	}
581
582        print $_,"\n";
583}
584
585close STDOUT;
586