xref: /freebsd/crypto/openssl/crypto/aes/asm/aesp8-ppc.pl (revision a7148ab39c03abd4d1a84997c70bf96f15dd2a09)
17bded2dbSJung-uk Kim#! /usr/bin/env perl
2*a7148ab3SEnji Cooper# Copyright 2014-2024 The OpenSSL Project Authors. All Rights Reserved.
3e71b7053SJung-uk Kim#
4b077aed3SPierre Pronchery# Licensed under the Apache License 2.0 (the "License").  You may not use
5e71b7053SJung-uk Kim# this file except in compliance with the License.  You can obtain a copy
6e71b7053SJung-uk Kim# in the file LICENSE in the source distribution or at
7e71b7053SJung-uk Kim# https://www.openssl.org/source/license.html
8e71b7053SJung-uk Kim
97bded2dbSJung-uk Kim#
107bded2dbSJung-uk Kim# ====================================================================
117bded2dbSJung-uk Kim# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
127bded2dbSJung-uk Kim# project. The module is, however, dual licensed under OpenSSL and
137bded2dbSJung-uk Kim# CRYPTOGAMS licenses depending on where you obtain it. For further
147bded2dbSJung-uk Kim# details see http://www.openssl.org/~appro/cryptogams/.
157bded2dbSJung-uk Kim# ====================================================================
167bded2dbSJung-uk Kim#
177bded2dbSJung-uk Kim# This module implements support for AES instructions as per PowerISA
187bded2dbSJung-uk Kim# specification version 2.07, first implemented by POWER8 processor.
197bded2dbSJung-uk Kim# The module is endian-agnostic in sense that it supports both big-
207bded2dbSJung-uk Kim# and little-endian cases. Data alignment in parallelizable modes is
217bded2dbSJung-uk Kim# handled with VSX loads and stores, which implies MSR.VSX flag being
227bded2dbSJung-uk Kim# set. It should also be noted that ISA specification doesn't prohibit
237bded2dbSJung-uk Kim# alignment exceptions for these instructions on page boundaries.
247bded2dbSJung-uk Kim# Initially alignment was handled in pure AltiVec/VMX way [when data
257bded2dbSJung-uk Kim# is aligned programmatically, which in turn guarantees exception-
267bded2dbSJung-uk Kim# free execution], but it turned to hamper performance when vcipher
277bded2dbSJung-uk Kim# instructions are interleaved. It's reckoned that eventual
287bded2dbSJung-uk Kim# misalignment penalties at page boundaries are in average lower
297bded2dbSJung-uk Kim# than additional overhead in pure AltiVec approach.
30e71b7053SJung-uk Kim#
31e71b7053SJung-uk Kim# May 2016
32e71b7053SJung-uk Kim#
33e71b7053SJung-uk Kim# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34e71b7053SJung-uk Kim# systems were measured.
35e71b7053SJung-uk Kim#
36e71b7053SJung-uk Kim######################################################################
37e71b7053SJung-uk Kim# Current large-block performance in cycles per byte processed with
38e71b7053SJung-uk Kim# 128-bit key (less is better).
39e71b7053SJung-uk Kim#
40e71b7053SJung-uk Kim#		CBC en-/decrypt	CTR	XTS
41e71b7053SJung-uk Kim# POWER8[le]	3.96/0.72	0.74	1.1
42e71b7053SJung-uk Kim# POWER8[be]	3.75/0.65	0.66	1.0
43e71b7053SJung-uk Kim# POWER9[le]	4.02/0.86	0.84	1.05
44e71b7053SJung-uk Kim# POWER9[be]	3.99/0.78	0.79	0.97
457bded2dbSJung-uk Kim
46b077aed3SPierre Pronchery# $output is the last argument if it looks like a file (it has an extension)
47b077aed3SPierre Pronchery# $flavour is the first argument if it doesn't look like a file
48b077aed3SPierre Pronchery$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
49b077aed3SPierre Pronchery$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
507bded2dbSJung-uk Kim
517bded2dbSJung-uk Kimif ($flavour =~ /64/) {
527bded2dbSJung-uk Kim	$SIZE_T	=8;
537bded2dbSJung-uk Kim	$LRSAVE	=2*$SIZE_T;
547bded2dbSJung-uk Kim	$STU	="stdu";
557bded2dbSJung-uk Kim	$POP	="ld";
567bded2dbSJung-uk Kim	$PUSH	="std";
577bded2dbSJung-uk Kim	$UCMP	="cmpld";
587bded2dbSJung-uk Kim	$SHL	="sldi";
597bded2dbSJung-uk Kim} elsif ($flavour =~ /32/) {
607bded2dbSJung-uk Kim	$SIZE_T	=4;
617bded2dbSJung-uk Kim	$LRSAVE	=$SIZE_T;
627bded2dbSJung-uk Kim	$STU	="stwu";
637bded2dbSJung-uk Kim	$POP	="lwz";
647bded2dbSJung-uk Kim	$PUSH	="stw";
657bded2dbSJung-uk Kim	$UCMP	="cmplw";
667bded2dbSJung-uk Kim	$SHL	="slwi";
677bded2dbSJung-uk Kim} else { die "nonsense $flavour"; }
687bded2dbSJung-uk Kim
697bded2dbSJung-uk Kim$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
707bded2dbSJung-uk Kim
717bded2dbSJung-uk Kim$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
727bded2dbSJung-uk Kim( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
737bded2dbSJung-uk Kim( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
747bded2dbSJung-uk Kimdie "can't locate ppc-xlate.pl";
757bded2dbSJung-uk Kim
76b077aed3SPierre Proncheryopen STDOUT,"| $^X $xlate $flavour \"$output\""
77b077aed3SPierre Pronchery    or die "can't call $xlate: $!";
787bded2dbSJung-uk Kim
797bded2dbSJung-uk Kim$FRAME=8*$SIZE_T;
807bded2dbSJung-uk Kim$prefix="aes_p8";
817bded2dbSJung-uk Kim
827bded2dbSJung-uk Kim$sp="r1";
837bded2dbSJung-uk Kim$vrsave="r12";
847bded2dbSJung-uk Kim
857bded2dbSJung-uk Kim#########################################################################
867bded2dbSJung-uk Kim{{{	# Key setup procedures						#
877bded2dbSJung-uk Kimmy ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
887bded2dbSJung-uk Kimmy ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
897bded2dbSJung-uk Kimmy ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
907bded2dbSJung-uk Kim
917bded2dbSJung-uk Kim$code.=<<___;
927bded2dbSJung-uk Kim.machine	"any"
937bded2dbSJung-uk Kim
947bded2dbSJung-uk Kim.text
957bded2dbSJung-uk Kim
967bded2dbSJung-uk Kim.align	7
977bded2dbSJung-uk Kimrcon:
987bded2dbSJung-uk Kim.long	0x01000000, 0x01000000, 0x01000000, 0x01000000	?rev
997bded2dbSJung-uk Kim.long	0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000	?rev
1007bded2dbSJung-uk Kim.long	0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c	?rev
1017bded2dbSJung-uk Kim.long	0,0,0,0						?asis
102*a7148ab3SEnji Cooper.long	0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
1037bded2dbSJung-uk KimLconsts:
1047bded2dbSJung-uk Kim	mflr	r0
1057bded2dbSJung-uk Kim	bcl	20,31,\$+4
1067bded2dbSJung-uk Kim	mflr	$ptr	 #vvvvv "distance between . and rcon
107*a7148ab3SEnji Cooper	addi	$ptr,$ptr,-0x58
1087bded2dbSJung-uk Kim	mtlr	r0
1097bded2dbSJung-uk Kim	blr
1107bded2dbSJung-uk Kim	.long	0
1117bded2dbSJung-uk Kim	.byte	0,12,0x14,0,0,0,0,0
1127bded2dbSJung-uk Kim.asciz	"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
1137bded2dbSJung-uk Kim
1147bded2dbSJung-uk Kim.globl	.${prefix}_set_encrypt_key
1157bded2dbSJung-uk Kim.align	5
1167bded2dbSJung-uk Kim.${prefix}_set_encrypt_key:
1177bded2dbSJung-uk KimLset_encrypt_key:
1187bded2dbSJung-uk Kim	mflr		r11
1197bded2dbSJung-uk Kim	$PUSH		r11,$LRSAVE($sp)
1207bded2dbSJung-uk Kim
1217bded2dbSJung-uk Kim	li		$ptr,-1
1227bded2dbSJung-uk Kim	${UCMP}i	$inp,0
1237bded2dbSJung-uk Kim	beq-		Lenc_key_abort		# if ($inp==0) return -1;
1247bded2dbSJung-uk Kim	${UCMP}i	$out,0
1257bded2dbSJung-uk Kim	beq-		Lenc_key_abort		# if ($out==0) return -1;
1267bded2dbSJung-uk Kim	li		$ptr,-2
1277bded2dbSJung-uk Kim	cmpwi		$bits,128
1287bded2dbSJung-uk Kim	blt-		Lenc_key_abort
1297bded2dbSJung-uk Kim	cmpwi		$bits,256
1307bded2dbSJung-uk Kim	bgt-		Lenc_key_abort
1317bded2dbSJung-uk Kim	andi.		r0,$bits,0x3f
1327bded2dbSJung-uk Kim	bne-		Lenc_key_abort
1337bded2dbSJung-uk Kim
1347bded2dbSJung-uk Kim	lis		r0,0xfff0
1357bded2dbSJung-uk Kim	mfspr		$vrsave,256
1367bded2dbSJung-uk Kim	mtspr		256,r0
1377bded2dbSJung-uk Kim
1387bded2dbSJung-uk Kim	bl		Lconsts
1397bded2dbSJung-uk Kim	mtlr		r11
1407bded2dbSJung-uk Kim
1417bded2dbSJung-uk Kim	neg		r9,$inp
1427bded2dbSJung-uk Kim	lvx		$in0,0,$inp
1437bded2dbSJung-uk Kim	addi		$inp,$inp,15		# 15 is not typo
1447bded2dbSJung-uk Kim	lvsr		$key,0,r9		# borrow $key
1457bded2dbSJung-uk Kim	li		r8,0x20
1467bded2dbSJung-uk Kim	cmpwi		$bits,192
1477bded2dbSJung-uk Kim	lvx		$in1,0,$inp
1487bded2dbSJung-uk Kim	le?vspltisb	$mask,0x0f		# borrow $mask
1497bded2dbSJung-uk Kim	lvx		$rcon,0,$ptr
1507bded2dbSJung-uk Kim	le?vxor		$key,$key,$mask		# adjust for byte swap
1517bded2dbSJung-uk Kim	lvx		$mask,r8,$ptr
1527bded2dbSJung-uk Kim	addi		$ptr,$ptr,0x10
1537bded2dbSJung-uk Kim	vperm		$in0,$in0,$in1,$key	# align [and byte swap in LE]
1547bded2dbSJung-uk Kim	li		$cnt,8
1557bded2dbSJung-uk Kim	vxor		$zero,$zero,$zero
1567bded2dbSJung-uk Kim	mtctr		$cnt
1577bded2dbSJung-uk Kim
1587bded2dbSJung-uk Kim	?lvsr		$outperm,0,$out
1597bded2dbSJung-uk Kim	vspltisb	$outmask,-1
1607bded2dbSJung-uk Kim	lvx		$outhead,0,$out
1617bded2dbSJung-uk Kim	?vperm		$outmask,$zero,$outmask,$outperm
1627bded2dbSJung-uk Kim
1637bded2dbSJung-uk Kim	blt		Loop128
1647bded2dbSJung-uk Kim	addi		$inp,$inp,8
1657bded2dbSJung-uk Kim	beq		L192
1667bded2dbSJung-uk Kim	addi		$inp,$inp,8
1677bded2dbSJung-uk Kim	b		L256
1687bded2dbSJung-uk Kim
1697bded2dbSJung-uk Kim.align	4
1707bded2dbSJung-uk KimLoop128:
1717bded2dbSJung-uk Kim	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
1727bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$in0,12	# >>32
1737bded2dbSJung-uk Kim	 vperm		$outtail,$in0,$in0,$outperm	# rotate
1747bded2dbSJung-uk Kim	 vsel		$stage,$outhead,$outtail,$outmask
1757bded2dbSJung-uk Kim	 vmr		$outhead,$outtail
1767bded2dbSJung-uk Kim	vcipherlast	$key,$key,$rcon
1777bded2dbSJung-uk Kim	 stvx		$stage,0,$out
1787bded2dbSJung-uk Kim	 addi		$out,$out,16
1797bded2dbSJung-uk Kim
1807bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
1817bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$tmp,12	# >>32
1827bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
1837bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$tmp,12	# >>32
1847bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
1857bded2dbSJung-uk Kim	 vadduwm	$rcon,$rcon,$rcon
1867bded2dbSJung-uk Kim	vxor		$in0,$in0,$key
1877bded2dbSJung-uk Kim	bdnz		Loop128
1887bded2dbSJung-uk Kim
1897bded2dbSJung-uk Kim	lvx		$rcon,0,$ptr		# last two round keys
1907bded2dbSJung-uk Kim
1917bded2dbSJung-uk Kim	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
1927bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$in0,12	# >>32
1937bded2dbSJung-uk Kim	 vperm		$outtail,$in0,$in0,$outperm	# rotate
1947bded2dbSJung-uk Kim	 vsel		$stage,$outhead,$outtail,$outmask
1957bded2dbSJung-uk Kim	 vmr		$outhead,$outtail
1967bded2dbSJung-uk Kim	vcipherlast	$key,$key,$rcon
1977bded2dbSJung-uk Kim	 stvx		$stage,0,$out
1987bded2dbSJung-uk Kim	 addi		$out,$out,16
1997bded2dbSJung-uk Kim
2007bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
2017bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$tmp,12	# >>32
2027bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
2037bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$tmp,12	# >>32
2047bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
2057bded2dbSJung-uk Kim	 vadduwm	$rcon,$rcon,$rcon
2067bded2dbSJung-uk Kim	vxor		$in0,$in0,$key
2077bded2dbSJung-uk Kim
2087bded2dbSJung-uk Kim	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
2097bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$in0,12	# >>32
2107bded2dbSJung-uk Kim	 vperm		$outtail,$in0,$in0,$outperm	# rotate
2117bded2dbSJung-uk Kim	 vsel		$stage,$outhead,$outtail,$outmask
2127bded2dbSJung-uk Kim	 vmr		$outhead,$outtail
2137bded2dbSJung-uk Kim	vcipherlast	$key,$key,$rcon
2147bded2dbSJung-uk Kim	 stvx		$stage,0,$out
2157bded2dbSJung-uk Kim	 addi		$out,$out,16
2167bded2dbSJung-uk Kim
2177bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
2187bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$tmp,12	# >>32
2197bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
2207bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$tmp,12	# >>32
2217bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
2227bded2dbSJung-uk Kim	vxor		$in0,$in0,$key
2237bded2dbSJung-uk Kim	 vperm		$outtail,$in0,$in0,$outperm	# rotate
2247bded2dbSJung-uk Kim	 vsel		$stage,$outhead,$outtail,$outmask
2257bded2dbSJung-uk Kim	 vmr		$outhead,$outtail
2267bded2dbSJung-uk Kim	 stvx		$stage,0,$out
2277bded2dbSJung-uk Kim
2287bded2dbSJung-uk Kim	addi		$inp,$out,15		# 15 is not typo
2297bded2dbSJung-uk Kim	addi		$out,$out,0x50
2307bded2dbSJung-uk Kim
2317bded2dbSJung-uk Kim	li		$rounds,10
2327bded2dbSJung-uk Kim	b		Ldone
2337bded2dbSJung-uk Kim
2347bded2dbSJung-uk Kim.align	4
2357bded2dbSJung-uk KimL192:
2367bded2dbSJung-uk Kim	lvx		$tmp,0,$inp
2377bded2dbSJung-uk Kim	li		$cnt,4
2387bded2dbSJung-uk Kim	 vperm		$outtail,$in0,$in0,$outperm	# rotate
2397bded2dbSJung-uk Kim	 vsel		$stage,$outhead,$outtail,$outmask
2407bded2dbSJung-uk Kim	 vmr		$outhead,$outtail
2417bded2dbSJung-uk Kim	 stvx		$stage,0,$out
2427bded2dbSJung-uk Kim	 addi		$out,$out,16
2437bded2dbSJung-uk Kim	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
2447bded2dbSJung-uk Kim	vspltisb	$key,8			# borrow $key
2457bded2dbSJung-uk Kim	mtctr		$cnt
2467bded2dbSJung-uk Kim	vsububm		$mask,$mask,$key	# adjust the mask
2477bded2dbSJung-uk Kim
2487bded2dbSJung-uk KimLoop192:
2497bded2dbSJung-uk Kim	vperm		$key,$in1,$in1,$mask	# roate-n-splat
2507bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$in0,12	# >>32
2517bded2dbSJung-uk Kim	vcipherlast	$key,$key,$rcon
2527bded2dbSJung-uk Kim
2537bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
2547bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$tmp,12	# >>32
2557bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
2567bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$tmp,12	# >>32
2577bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
2587bded2dbSJung-uk Kim
2597bded2dbSJung-uk Kim	 vsldoi		$stage,$zero,$in1,8
2607bded2dbSJung-uk Kim	vspltw		$tmp,$in0,3
2617bded2dbSJung-uk Kim	vxor		$tmp,$tmp,$in1
2627bded2dbSJung-uk Kim	vsldoi		$in1,$zero,$in1,12	# >>32
2637bded2dbSJung-uk Kim	 vadduwm	$rcon,$rcon,$rcon
2647bded2dbSJung-uk Kim	vxor		$in1,$in1,$tmp
2657bded2dbSJung-uk Kim	vxor		$in0,$in0,$key
2667bded2dbSJung-uk Kim	vxor		$in1,$in1,$key
2677bded2dbSJung-uk Kim	 vsldoi		$stage,$stage,$in0,8
2687bded2dbSJung-uk Kim
2697bded2dbSJung-uk Kim	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
2707bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$in0,12	# >>32
2717bded2dbSJung-uk Kim	 vperm		$outtail,$stage,$stage,$outperm	# rotate
2727bded2dbSJung-uk Kim	 vsel		$stage,$outhead,$outtail,$outmask
2737bded2dbSJung-uk Kim	 vmr		$outhead,$outtail
2747bded2dbSJung-uk Kim	vcipherlast	$key,$key,$rcon
2757bded2dbSJung-uk Kim	 stvx		$stage,0,$out
2767bded2dbSJung-uk Kim	 addi		$out,$out,16
2777bded2dbSJung-uk Kim
2787bded2dbSJung-uk Kim	 vsldoi		$stage,$in0,$in1,8
2797bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
2807bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$tmp,12	# >>32
2817bded2dbSJung-uk Kim	 vperm		$outtail,$stage,$stage,$outperm	# rotate
2827bded2dbSJung-uk Kim	 vsel		$stage,$outhead,$outtail,$outmask
2837bded2dbSJung-uk Kim	 vmr		$outhead,$outtail
2847bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
2857bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$tmp,12	# >>32
2867bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
2877bded2dbSJung-uk Kim	 stvx		$stage,0,$out
2887bded2dbSJung-uk Kim	 addi		$out,$out,16
2897bded2dbSJung-uk Kim
2907bded2dbSJung-uk Kim	vspltw		$tmp,$in0,3
2917bded2dbSJung-uk Kim	vxor		$tmp,$tmp,$in1
2927bded2dbSJung-uk Kim	vsldoi		$in1,$zero,$in1,12	# >>32
2937bded2dbSJung-uk Kim	 vadduwm	$rcon,$rcon,$rcon
2947bded2dbSJung-uk Kim	vxor		$in1,$in1,$tmp
2957bded2dbSJung-uk Kim	vxor		$in0,$in0,$key
2967bded2dbSJung-uk Kim	vxor		$in1,$in1,$key
2977bded2dbSJung-uk Kim	 vperm		$outtail,$in0,$in0,$outperm	# rotate
2987bded2dbSJung-uk Kim	 vsel		$stage,$outhead,$outtail,$outmask
2997bded2dbSJung-uk Kim	 vmr		$outhead,$outtail
3007bded2dbSJung-uk Kim	 stvx		$stage,0,$out
3017bded2dbSJung-uk Kim	 addi		$inp,$out,15		# 15 is not typo
3027bded2dbSJung-uk Kim	 addi		$out,$out,16
3037bded2dbSJung-uk Kim	bdnz		Loop192
3047bded2dbSJung-uk Kim
3057bded2dbSJung-uk Kim	li		$rounds,12
3067bded2dbSJung-uk Kim	addi		$out,$out,0x20
3077bded2dbSJung-uk Kim	b		Ldone
3087bded2dbSJung-uk Kim
3097bded2dbSJung-uk Kim.align	4
3107bded2dbSJung-uk KimL256:
3117bded2dbSJung-uk Kim	lvx		$tmp,0,$inp
3127bded2dbSJung-uk Kim	li		$cnt,7
3137bded2dbSJung-uk Kim	li		$rounds,14
3147bded2dbSJung-uk Kim	 vperm		$outtail,$in0,$in0,$outperm	# rotate
3157bded2dbSJung-uk Kim	 vsel		$stage,$outhead,$outtail,$outmask
3167bded2dbSJung-uk Kim	 vmr		$outhead,$outtail
3177bded2dbSJung-uk Kim	 stvx		$stage,0,$out
3187bded2dbSJung-uk Kim	 addi		$out,$out,16
3197bded2dbSJung-uk Kim	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
3207bded2dbSJung-uk Kim	mtctr		$cnt
3217bded2dbSJung-uk Kim
3227bded2dbSJung-uk KimLoop256:
3237bded2dbSJung-uk Kim	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
3247bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$in0,12	# >>32
3257bded2dbSJung-uk Kim	 vperm		$outtail,$in1,$in1,$outperm	# rotate
3267bded2dbSJung-uk Kim	 vsel		$stage,$outhead,$outtail,$outmask
3277bded2dbSJung-uk Kim	 vmr		$outhead,$outtail
3287bded2dbSJung-uk Kim	vcipherlast	$key,$key,$rcon
3297bded2dbSJung-uk Kim	 stvx		$stage,0,$out
3307bded2dbSJung-uk Kim	 addi		$out,$out,16
3317bded2dbSJung-uk Kim
3327bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
3337bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$tmp,12	# >>32
3347bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
3357bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$tmp,12	# >>32
3367bded2dbSJung-uk Kim	vxor		$in0,$in0,$tmp
3377bded2dbSJung-uk Kim	 vadduwm	$rcon,$rcon,$rcon
3387bded2dbSJung-uk Kim	vxor		$in0,$in0,$key
3397bded2dbSJung-uk Kim	 vperm		$outtail,$in0,$in0,$outperm	# rotate
3407bded2dbSJung-uk Kim	 vsel		$stage,$outhead,$outtail,$outmask
3417bded2dbSJung-uk Kim	 vmr		$outhead,$outtail
3427bded2dbSJung-uk Kim	 stvx		$stage,0,$out
3437bded2dbSJung-uk Kim	 addi		$inp,$out,15		# 15 is not typo
3447bded2dbSJung-uk Kim	 addi		$out,$out,16
3457bded2dbSJung-uk Kim	bdz		Ldone
3467bded2dbSJung-uk Kim
3477bded2dbSJung-uk Kim	vspltw		$key,$in0,3		# just splat
3487bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$in1,12	# >>32
3497bded2dbSJung-uk Kim	vsbox		$key,$key
3507bded2dbSJung-uk Kim
3517bded2dbSJung-uk Kim	vxor		$in1,$in1,$tmp
3527bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$tmp,12	# >>32
3537bded2dbSJung-uk Kim	vxor		$in1,$in1,$tmp
3547bded2dbSJung-uk Kim	vsldoi		$tmp,$zero,$tmp,12	# >>32
3557bded2dbSJung-uk Kim	vxor		$in1,$in1,$tmp
3567bded2dbSJung-uk Kim
3577bded2dbSJung-uk Kim	vxor		$in1,$in1,$key
3587bded2dbSJung-uk Kim	b		Loop256
3597bded2dbSJung-uk Kim
3607bded2dbSJung-uk Kim.align	4
3617bded2dbSJung-uk KimLdone:
3627bded2dbSJung-uk Kim	lvx		$in1,0,$inp		# redundant in aligned case
3637bded2dbSJung-uk Kim	vsel		$in1,$outhead,$in1,$outmask
3647bded2dbSJung-uk Kim	stvx		$in1,0,$inp
3657bded2dbSJung-uk Kim	li		$ptr,0
3667bded2dbSJung-uk Kim	mtspr		256,$vrsave
3677bded2dbSJung-uk Kim	stw		$rounds,0($out)
3687bded2dbSJung-uk Kim
3697bded2dbSJung-uk KimLenc_key_abort:
3707bded2dbSJung-uk Kim	mr		r3,$ptr
3717bded2dbSJung-uk Kim	blr
3727bded2dbSJung-uk Kim	.long		0
3737bded2dbSJung-uk Kim	.byte		0,12,0x14,1,0,0,3,0
3747bded2dbSJung-uk Kim	.long		0
3757bded2dbSJung-uk Kim.size	.${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
3767bded2dbSJung-uk Kim
3777bded2dbSJung-uk Kim.globl	.${prefix}_set_decrypt_key
3787bded2dbSJung-uk Kim.align	5
3797bded2dbSJung-uk Kim.${prefix}_set_decrypt_key:
3807bded2dbSJung-uk Kim	$STU		$sp,-$FRAME($sp)
3817bded2dbSJung-uk Kim	mflr		r10
3827bded2dbSJung-uk Kim	$PUSH		r10,$FRAME+$LRSAVE($sp)
3837bded2dbSJung-uk Kim	bl		Lset_encrypt_key
3847bded2dbSJung-uk Kim	mtlr		r10
3857bded2dbSJung-uk Kim
3867bded2dbSJung-uk Kim	cmpwi		r3,0
3877bded2dbSJung-uk Kim	bne-		Ldec_key_abort
3887bded2dbSJung-uk Kim
3897bded2dbSJung-uk Kim	slwi		$cnt,$rounds,4
3907bded2dbSJung-uk Kim	subi		$inp,$out,240		# first round key
3917bded2dbSJung-uk Kim	srwi		$rounds,$rounds,1
3927bded2dbSJung-uk Kim	add		$out,$inp,$cnt		# last round key
3937bded2dbSJung-uk Kim	mtctr		$rounds
3947bded2dbSJung-uk Kim
3957bded2dbSJung-uk KimLdeckey:
3967bded2dbSJung-uk Kim	lwz		r0, 0($inp)
3977bded2dbSJung-uk Kim	lwz		r6, 4($inp)
3987bded2dbSJung-uk Kim	lwz		r7, 8($inp)
3997bded2dbSJung-uk Kim	lwz		r8, 12($inp)
4007bded2dbSJung-uk Kim	addi		$inp,$inp,16
4017bded2dbSJung-uk Kim	lwz		r9, 0($out)
4027bded2dbSJung-uk Kim	lwz		r10,4($out)
4037bded2dbSJung-uk Kim	lwz		r11,8($out)
4047bded2dbSJung-uk Kim	lwz		r12,12($out)
4057bded2dbSJung-uk Kim	stw		r0, 0($out)
4067bded2dbSJung-uk Kim	stw		r6, 4($out)
4077bded2dbSJung-uk Kim	stw		r7, 8($out)
4087bded2dbSJung-uk Kim	stw		r8, 12($out)
4097bded2dbSJung-uk Kim	subi		$out,$out,16
4107bded2dbSJung-uk Kim	stw		r9, -16($inp)
4117bded2dbSJung-uk Kim	stw		r10,-12($inp)
4127bded2dbSJung-uk Kim	stw		r11,-8($inp)
4137bded2dbSJung-uk Kim	stw		r12,-4($inp)
4147bded2dbSJung-uk Kim	bdnz		Ldeckey
4157bded2dbSJung-uk Kim
4167bded2dbSJung-uk Kim	xor		r3,r3,r3		# return value
4177bded2dbSJung-uk KimLdec_key_abort:
4187bded2dbSJung-uk Kim	addi		$sp,$sp,$FRAME
4197bded2dbSJung-uk Kim	blr
4207bded2dbSJung-uk Kim	.long		0
4217bded2dbSJung-uk Kim	.byte		0,12,4,1,0x80,0,3,0
4227bded2dbSJung-uk Kim	.long		0
4237bded2dbSJung-uk Kim.size	.${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
4247bded2dbSJung-uk Kim___
4257bded2dbSJung-uk Kim}}}
4267bded2dbSJung-uk Kim#########################################################################
4277bded2dbSJung-uk Kim{{{	# Single block en- and decrypt procedures			#
4287bded2dbSJung-uk Kimsub gen_block () {
4297bded2dbSJung-uk Kimmy $dir = shift;
4307bded2dbSJung-uk Kimmy $n   = $dir eq "de" ? "n" : "";
4317bded2dbSJung-uk Kimmy ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
4327bded2dbSJung-uk Kim
4337bded2dbSJung-uk Kim$code.=<<___;
4347bded2dbSJung-uk Kim.globl	.${prefix}_${dir}crypt
4357bded2dbSJung-uk Kim.align	5
4367bded2dbSJung-uk Kim.${prefix}_${dir}crypt:
4377bded2dbSJung-uk Kim	lwz		$rounds,240($key)
4387bded2dbSJung-uk Kim	lis		r0,0xfc00
4397bded2dbSJung-uk Kim	mfspr		$vrsave,256
4407bded2dbSJung-uk Kim	li		$idx,15			# 15 is not typo
4417bded2dbSJung-uk Kim	mtspr		256,r0
4427bded2dbSJung-uk Kim
4437bded2dbSJung-uk Kim	lvx		v0,0,$inp
4447bded2dbSJung-uk Kim	neg		r11,$out
4457bded2dbSJung-uk Kim	lvx		v1,$idx,$inp
4467bded2dbSJung-uk Kim	lvsl		v2,0,$inp		# inpperm
4477bded2dbSJung-uk Kim	le?vspltisb	v4,0x0f
4487bded2dbSJung-uk Kim	?lvsl		v3,0,r11		# outperm
4497bded2dbSJung-uk Kim	le?vxor		v2,v2,v4
4507bded2dbSJung-uk Kim	li		$idx,16
4517bded2dbSJung-uk Kim	vperm		v0,v0,v1,v2		# align [and byte swap in LE]
4527bded2dbSJung-uk Kim	lvx		v1,0,$key
4537bded2dbSJung-uk Kim	?lvsl		v5,0,$key		# keyperm
4547bded2dbSJung-uk Kim	srwi		$rounds,$rounds,1
4557bded2dbSJung-uk Kim	lvx		v2,$idx,$key
4567bded2dbSJung-uk Kim	addi		$idx,$idx,16
4577bded2dbSJung-uk Kim	subi		$rounds,$rounds,1
4587bded2dbSJung-uk Kim	?vperm		v1,v1,v2,v5		# align round key
4597bded2dbSJung-uk Kim
4607bded2dbSJung-uk Kim	vxor		v0,v0,v1
4617bded2dbSJung-uk Kim	lvx		v1,$idx,$key
4627bded2dbSJung-uk Kim	addi		$idx,$idx,16
4637bded2dbSJung-uk Kim	mtctr		$rounds
4647bded2dbSJung-uk Kim
4657bded2dbSJung-uk KimLoop_${dir}c:
4667bded2dbSJung-uk Kim	?vperm		v2,v2,v1,v5
4677bded2dbSJung-uk Kim	v${n}cipher	v0,v0,v2
4687bded2dbSJung-uk Kim	lvx		v2,$idx,$key
4697bded2dbSJung-uk Kim	addi		$idx,$idx,16
4707bded2dbSJung-uk Kim	?vperm		v1,v1,v2,v5
4717bded2dbSJung-uk Kim	v${n}cipher	v0,v0,v1
4727bded2dbSJung-uk Kim	lvx		v1,$idx,$key
4737bded2dbSJung-uk Kim	addi		$idx,$idx,16
4747bded2dbSJung-uk Kim	bdnz		Loop_${dir}c
4757bded2dbSJung-uk Kim
4767bded2dbSJung-uk Kim	?vperm		v2,v2,v1,v5
4777bded2dbSJung-uk Kim	v${n}cipher	v0,v0,v2
4787bded2dbSJung-uk Kim	lvx		v2,$idx,$key
4797bded2dbSJung-uk Kim	?vperm		v1,v1,v2,v5
4807bded2dbSJung-uk Kim	v${n}cipherlast	v0,v0,v1
4817bded2dbSJung-uk Kim
4827bded2dbSJung-uk Kim	vspltisb	v2,-1
4837bded2dbSJung-uk Kim	vxor		v1,v1,v1
4847bded2dbSJung-uk Kim	li		$idx,15			# 15 is not typo
4857bded2dbSJung-uk Kim	?vperm		v2,v1,v2,v3		# outmask
4867bded2dbSJung-uk Kim	le?vxor		v3,v3,v4
4877bded2dbSJung-uk Kim	lvx		v1,0,$out		# outhead
4887bded2dbSJung-uk Kim	vperm		v0,v0,v0,v3		# rotate [and byte swap in LE]
4897bded2dbSJung-uk Kim	vsel		v1,v1,v0,v2
4907bded2dbSJung-uk Kim	lvx		v4,$idx,$out
4917bded2dbSJung-uk Kim	stvx		v1,0,$out
4927bded2dbSJung-uk Kim	vsel		v0,v0,v4,v2
4937bded2dbSJung-uk Kim	stvx		v0,$idx,$out
4947bded2dbSJung-uk Kim
4957bded2dbSJung-uk Kim	mtspr		256,$vrsave
4967bded2dbSJung-uk Kim	blr
4977bded2dbSJung-uk Kim	.long		0
4987bded2dbSJung-uk Kim	.byte		0,12,0x14,0,0,0,3,0
4997bded2dbSJung-uk Kim	.long		0
5007bded2dbSJung-uk Kim.size	.${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
5017bded2dbSJung-uk Kim___
5027bded2dbSJung-uk Kim}
5037bded2dbSJung-uk Kim&gen_block("en");
5047bded2dbSJung-uk Kim&gen_block("de");
5057bded2dbSJung-uk Kim}}}
5067bded2dbSJung-uk Kim#########################################################################
5077bded2dbSJung-uk Kim{{{	# CBC en- and decrypt procedures				#
5087bded2dbSJung-uk Kimmy ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
5097bded2dbSJung-uk Kimmy ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
5107bded2dbSJung-uk Kimmy ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
5117bded2dbSJung-uk Kim						map("v$_",(4..10));
5127bded2dbSJung-uk Kim$code.=<<___;
5137bded2dbSJung-uk Kim.globl	.${prefix}_cbc_encrypt
5147bded2dbSJung-uk Kim.align	5
5157bded2dbSJung-uk Kim.${prefix}_cbc_encrypt:
5167bded2dbSJung-uk Kim	${UCMP}i	$len,16
5177bded2dbSJung-uk Kim	bltlr-
5187bded2dbSJung-uk Kim
5197bded2dbSJung-uk Kim	cmpwi		$enc,0			# test direction
5207bded2dbSJung-uk Kim	lis		r0,0xffe0
5217bded2dbSJung-uk Kim	mfspr		$vrsave,256
5227bded2dbSJung-uk Kim	mtspr		256,r0
5237bded2dbSJung-uk Kim
5247bded2dbSJung-uk Kim	li		$idx,15
5257bded2dbSJung-uk Kim	vxor		$rndkey0,$rndkey0,$rndkey0
5267bded2dbSJung-uk Kim	le?vspltisb	$tmp,0x0f
5277bded2dbSJung-uk Kim
5287bded2dbSJung-uk Kim	lvx		$ivec,0,$ivp		# load [unaligned] iv
5297bded2dbSJung-uk Kim	lvsl		$inpperm,0,$ivp
5307bded2dbSJung-uk Kim	lvx		$inptail,$idx,$ivp
5317bded2dbSJung-uk Kim	le?vxor		$inpperm,$inpperm,$tmp
5327bded2dbSJung-uk Kim	vperm		$ivec,$ivec,$inptail,$inpperm
5337bded2dbSJung-uk Kim
5347bded2dbSJung-uk Kim	neg		r11,$inp
5357bded2dbSJung-uk Kim	?lvsl		$keyperm,0,$key		# prepare for unaligned key
5367bded2dbSJung-uk Kim	lwz		$rounds,240($key)
5377bded2dbSJung-uk Kim
5387bded2dbSJung-uk Kim	lvsr		$inpperm,0,r11		# prepare for unaligned load
5397bded2dbSJung-uk Kim	lvx		$inptail,0,$inp
5407bded2dbSJung-uk Kim	addi		$inp,$inp,15		# 15 is not typo
5417bded2dbSJung-uk Kim	le?vxor		$inpperm,$inpperm,$tmp
5427bded2dbSJung-uk Kim
5437bded2dbSJung-uk Kim	?lvsr		$outperm,0,$out		# prepare for unaligned store
5447bded2dbSJung-uk Kim	vspltisb	$outmask,-1
5457bded2dbSJung-uk Kim	lvx		$outhead,0,$out
5467bded2dbSJung-uk Kim	?vperm		$outmask,$rndkey0,$outmask,$outperm
5477bded2dbSJung-uk Kim	le?vxor		$outperm,$outperm,$tmp
5487bded2dbSJung-uk Kim
5497bded2dbSJung-uk Kim	srwi		$rounds,$rounds,1
5507bded2dbSJung-uk Kim	li		$idx,16
5517bded2dbSJung-uk Kim	subi		$rounds,$rounds,1
5527bded2dbSJung-uk Kim	beq		Lcbc_dec
5537bded2dbSJung-uk Kim
5547bded2dbSJung-uk KimLcbc_enc:
5557bded2dbSJung-uk Kim	vmr		$inout,$inptail
5567bded2dbSJung-uk Kim	lvx		$inptail,0,$inp
5577bded2dbSJung-uk Kim	addi		$inp,$inp,16
5587bded2dbSJung-uk Kim	mtctr		$rounds
5597bded2dbSJung-uk Kim	subi		$len,$len,16		# len-=16
5607bded2dbSJung-uk Kim
5617bded2dbSJung-uk Kim	lvx		$rndkey0,0,$key
5627bded2dbSJung-uk Kim	 vperm		$inout,$inout,$inptail,$inpperm
5637bded2dbSJung-uk Kim	lvx		$rndkey1,$idx,$key
5647bded2dbSJung-uk Kim	addi		$idx,$idx,16
5657bded2dbSJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
5667bded2dbSJung-uk Kim	vxor		$inout,$inout,$rndkey0
5677bded2dbSJung-uk Kim	lvx		$rndkey0,$idx,$key
5687bded2dbSJung-uk Kim	addi		$idx,$idx,16
5697bded2dbSJung-uk Kim	vxor		$inout,$inout,$ivec
5707bded2dbSJung-uk Kim
5717bded2dbSJung-uk KimLoop_cbc_enc:
5727bded2dbSJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
5737bded2dbSJung-uk Kim	vcipher		$inout,$inout,$rndkey1
5747bded2dbSJung-uk Kim	lvx		$rndkey1,$idx,$key
5757bded2dbSJung-uk Kim	addi		$idx,$idx,16
5767bded2dbSJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
5777bded2dbSJung-uk Kim	vcipher		$inout,$inout,$rndkey0
5787bded2dbSJung-uk Kim	lvx		$rndkey0,$idx,$key
5797bded2dbSJung-uk Kim	addi		$idx,$idx,16
5807bded2dbSJung-uk Kim	bdnz		Loop_cbc_enc
5817bded2dbSJung-uk Kim
5827bded2dbSJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
5837bded2dbSJung-uk Kim	vcipher		$inout,$inout,$rndkey1
5847bded2dbSJung-uk Kim	lvx		$rndkey1,$idx,$key
5857bded2dbSJung-uk Kim	li		$idx,16
5867bded2dbSJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
5877bded2dbSJung-uk Kim	vcipherlast	$ivec,$inout,$rndkey0
5887bded2dbSJung-uk Kim	${UCMP}i	$len,16
5897bded2dbSJung-uk Kim
5907bded2dbSJung-uk Kim	vperm		$tmp,$ivec,$ivec,$outperm
5917bded2dbSJung-uk Kim	vsel		$inout,$outhead,$tmp,$outmask
5927bded2dbSJung-uk Kim	vmr		$outhead,$tmp
5937bded2dbSJung-uk Kim	stvx		$inout,0,$out
5947bded2dbSJung-uk Kim	addi		$out,$out,16
5957bded2dbSJung-uk Kim	bge		Lcbc_enc
5967bded2dbSJung-uk Kim
5977bded2dbSJung-uk Kim	b		Lcbc_done
5987bded2dbSJung-uk Kim
5997bded2dbSJung-uk Kim.align	4
6007bded2dbSJung-uk KimLcbc_dec:
6017bded2dbSJung-uk Kim	${UCMP}i	$len,128
6027bded2dbSJung-uk Kim	bge		_aesp8_cbc_decrypt8x
6037bded2dbSJung-uk Kim	vmr		$tmp,$inptail
6047bded2dbSJung-uk Kim	lvx		$inptail,0,$inp
6057bded2dbSJung-uk Kim	addi		$inp,$inp,16
6067bded2dbSJung-uk Kim	mtctr		$rounds
6077bded2dbSJung-uk Kim	subi		$len,$len,16		# len-=16
6087bded2dbSJung-uk Kim
6097bded2dbSJung-uk Kim	lvx		$rndkey0,0,$key
6107bded2dbSJung-uk Kim	 vperm		$tmp,$tmp,$inptail,$inpperm
6117bded2dbSJung-uk Kim	lvx		$rndkey1,$idx,$key
6127bded2dbSJung-uk Kim	addi		$idx,$idx,16
6137bded2dbSJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
6147bded2dbSJung-uk Kim	vxor		$inout,$tmp,$rndkey0
6157bded2dbSJung-uk Kim	lvx		$rndkey0,$idx,$key
6167bded2dbSJung-uk Kim	addi		$idx,$idx,16
6177bded2dbSJung-uk Kim
6187bded2dbSJung-uk KimLoop_cbc_dec:
6197bded2dbSJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
6207bded2dbSJung-uk Kim	vncipher	$inout,$inout,$rndkey1
6217bded2dbSJung-uk Kim	lvx		$rndkey1,$idx,$key
6227bded2dbSJung-uk Kim	addi		$idx,$idx,16
6237bded2dbSJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
6247bded2dbSJung-uk Kim	vncipher	$inout,$inout,$rndkey0
6257bded2dbSJung-uk Kim	lvx		$rndkey0,$idx,$key
6267bded2dbSJung-uk Kim	addi		$idx,$idx,16
6277bded2dbSJung-uk Kim	bdnz		Loop_cbc_dec
6287bded2dbSJung-uk Kim
6297bded2dbSJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
6307bded2dbSJung-uk Kim	vncipher	$inout,$inout,$rndkey1
6317bded2dbSJung-uk Kim	lvx		$rndkey1,$idx,$key
6327bded2dbSJung-uk Kim	li		$idx,16
6337bded2dbSJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
6347bded2dbSJung-uk Kim	vncipherlast	$inout,$inout,$rndkey0
6357bded2dbSJung-uk Kim	${UCMP}i	$len,16
6367bded2dbSJung-uk Kim
6377bded2dbSJung-uk Kim	vxor		$inout,$inout,$ivec
6387bded2dbSJung-uk Kim	vmr		$ivec,$tmp
6397bded2dbSJung-uk Kim	vperm		$tmp,$inout,$inout,$outperm
6407bded2dbSJung-uk Kim	vsel		$inout,$outhead,$tmp,$outmask
6417bded2dbSJung-uk Kim	vmr		$outhead,$tmp
6427bded2dbSJung-uk Kim	stvx		$inout,0,$out
6437bded2dbSJung-uk Kim	addi		$out,$out,16
6447bded2dbSJung-uk Kim	bge		Lcbc_dec
6457bded2dbSJung-uk Kim
6467bded2dbSJung-uk KimLcbc_done:
6477bded2dbSJung-uk Kim	addi		$out,$out,-1
6487bded2dbSJung-uk Kim	lvx		$inout,0,$out		# redundant in aligned case
6497bded2dbSJung-uk Kim	vsel		$inout,$outhead,$inout,$outmask
6507bded2dbSJung-uk Kim	stvx		$inout,0,$out
6517bded2dbSJung-uk Kim
6527bded2dbSJung-uk Kim	neg		$enc,$ivp		# write [unaligned] iv
6537bded2dbSJung-uk Kim	li		$idx,15			# 15 is not typo
6547bded2dbSJung-uk Kim	vxor		$rndkey0,$rndkey0,$rndkey0
6557bded2dbSJung-uk Kim	vspltisb	$outmask,-1
6567bded2dbSJung-uk Kim	le?vspltisb	$tmp,0x0f
6577bded2dbSJung-uk Kim	?lvsl		$outperm,0,$enc
6587bded2dbSJung-uk Kim	?vperm		$outmask,$rndkey0,$outmask,$outperm
6597bded2dbSJung-uk Kim	le?vxor		$outperm,$outperm,$tmp
6607bded2dbSJung-uk Kim	lvx		$outhead,0,$ivp
6617bded2dbSJung-uk Kim	vperm		$ivec,$ivec,$ivec,$outperm
6627bded2dbSJung-uk Kim	vsel		$inout,$outhead,$ivec,$outmask
6637bded2dbSJung-uk Kim	lvx		$inptail,$idx,$ivp
6647bded2dbSJung-uk Kim	stvx		$inout,0,$ivp
6657bded2dbSJung-uk Kim	vsel		$inout,$ivec,$inptail,$outmask
6667bded2dbSJung-uk Kim	stvx		$inout,$idx,$ivp
6677bded2dbSJung-uk Kim
6687bded2dbSJung-uk Kim	mtspr		256,$vrsave
6697bded2dbSJung-uk Kim	blr
6707bded2dbSJung-uk Kim	.long		0
6717bded2dbSJung-uk Kim	.byte		0,12,0x14,0,0,0,6,0
6727bded2dbSJung-uk Kim	.long		0
6737bded2dbSJung-uk Kim___
6747bded2dbSJung-uk Kim#########################################################################
6757bded2dbSJung-uk Kim{{	# Optimized CBC decrypt procedure				#
6767bded2dbSJung-uk Kimmy $key_="r11";
6777bded2dbSJung-uk Kimmy ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
6787bded2dbSJung-uk Kim    $x00=0 if ($flavour =~ /osx/);
6797bded2dbSJung-uk Kimmy ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
6807bded2dbSJung-uk Kimmy ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
6817bded2dbSJung-uk Kimmy $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
6827bded2dbSJung-uk Kim			# v26-v31 last 6 round keys
6837bded2dbSJung-uk Kimmy ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
6847bded2dbSJung-uk Kim
6857bded2dbSJung-uk Kim$code.=<<___;
6867bded2dbSJung-uk Kim.align	5
6877bded2dbSJung-uk Kim_aesp8_cbc_decrypt8x:
6887bded2dbSJung-uk Kim	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
6897bded2dbSJung-uk Kim	li		r10,`$FRAME+8*16+15`
6907bded2dbSJung-uk Kim	li		r11,`$FRAME+8*16+31`
6917bded2dbSJung-uk Kim	stvx		v20,r10,$sp		# ABI says so
6927bded2dbSJung-uk Kim	addi		r10,r10,32
6937bded2dbSJung-uk Kim	stvx		v21,r11,$sp
6947bded2dbSJung-uk Kim	addi		r11,r11,32
6957bded2dbSJung-uk Kim	stvx		v22,r10,$sp
6967bded2dbSJung-uk Kim	addi		r10,r10,32
6977bded2dbSJung-uk Kim	stvx		v23,r11,$sp
6987bded2dbSJung-uk Kim	addi		r11,r11,32
6997bded2dbSJung-uk Kim	stvx		v24,r10,$sp
7007bded2dbSJung-uk Kim	addi		r10,r10,32
7017bded2dbSJung-uk Kim	stvx		v25,r11,$sp
7027bded2dbSJung-uk Kim	addi		r11,r11,32
7037bded2dbSJung-uk Kim	stvx		v26,r10,$sp
7047bded2dbSJung-uk Kim	addi		r10,r10,32
7057bded2dbSJung-uk Kim	stvx		v27,r11,$sp
7067bded2dbSJung-uk Kim	addi		r11,r11,32
7077bded2dbSJung-uk Kim	stvx		v28,r10,$sp
7087bded2dbSJung-uk Kim	addi		r10,r10,32
7097bded2dbSJung-uk Kim	stvx		v29,r11,$sp
7107bded2dbSJung-uk Kim	addi		r11,r11,32
7117bded2dbSJung-uk Kim	stvx		v30,r10,$sp
7127bded2dbSJung-uk Kim	stvx		v31,r11,$sp
7137bded2dbSJung-uk Kim	li		r0,-1
7147bded2dbSJung-uk Kim	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
7157bded2dbSJung-uk Kim	li		$x10,0x10
7167bded2dbSJung-uk Kim	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
7177bded2dbSJung-uk Kim	li		$x20,0x20
7187bded2dbSJung-uk Kim	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
7197bded2dbSJung-uk Kim	li		$x30,0x30
7207bded2dbSJung-uk Kim	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
7217bded2dbSJung-uk Kim	li		$x40,0x40
7227bded2dbSJung-uk Kim	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
7237bded2dbSJung-uk Kim	li		$x50,0x50
7247bded2dbSJung-uk Kim	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
7257bded2dbSJung-uk Kim	li		$x60,0x60
7267bded2dbSJung-uk Kim	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
7277bded2dbSJung-uk Kim	li		$x70,0x70
7287bded2dbSJung-uk Kim	mtspr		256,r0
7297bded2dbSJung-uk Kim
7307bded2dbSJung-uk Kim	subi		$rounds,$rounds,3	# -4 in total
7317bded2dbSJung-uk Kim	subi		$len,$len,128		# bias
7327bded2dbSJung-uk Kim
7337bded2dbSJung-uk Kim	lvx		$rndkey0,$x00,$key	# load key schedule
7347bded2dbSJung-uk Kim	lvx		v30,$x10,$key
7357bded2dbSJung-uk Kim	addi		$key,$key,0x20
7367bded2dbSJung-uk Kim	lvx		v31,$x00,$key
7377bded2dbSJung-uk Kim	?vperm		$rndkey0,$rndkey0,v30,$keyperm
7387bded2dbSJung-uk Kim	addi		$key_,$sp,$FRAME+15
7397bded2dbSJung-uk Kim	mtctr		$rounds
7407bded2dbSJung-uk Kim
7417bded2dbSJung-uk KimLoad_cbc_dec_key:
7427bded2dbSJung-uk Kim	?vperm		v24,v30,v31,$keyperm
7437bded2dbSJung-uk Kim	lvx		v30,$x10,$key
7447bded2dbSJung-uk Kim	addi		$key,$key,0x20
7457bded2dbSJung-uk Kim	stvx		v24,$x00,$key_		# off-load round[1]
7467bded2dbSJung-uk Kim	?vperm		v25,v31,v30,$keyperm
7477bded2dbSJung-uk Kim	lvx		v31,$x00,$key
7487bded2dbSJung-uk Kim	stvx		v25,$x10,$key_		# off-load round[2]
7497bded2dbSJung-uk Kim	addi		$key_,$key_,0x20
7507bded2dbSJung-uk Kim	bdnz		Load_cbc_dec_key
7517bded2dbSJung-uk Kim
7527bded2dbSJung-uk Kim	lvx		v26,$x10,$key
7537bded2dbSJung-uk Kim	?vperm		v24,v30,v31,$keyperm
7547bded2dbSJung-uk Kim	lvx		v27,$x20,$key
7557bded2dbSJung-uk Kim	stvx		v24,$x00,$key_		# off-load round[3]
7567bded2dbSJung-uk Kim	?vperm		v25,v31,v26,$keyperm
7577bded2dbSJung-uk Kim	lvx		v28,$x30,$key
7587bded2dbSJung-uk Kim	stvx		v25,$x10,$key_		# off-load round[4]
7597bded2dbSJung-uk Kim	addi		$key_,$sp,$FRAME+15	# rewind $key_
7607bded2dbSJung-uk Kim	?vperm		v26,v26,v27,$keyperm
7617bded2dbSJung-uk Kim	lvx		v29,$x40,$key
7627bded2dbSJung-uk Kim	?vperm		v27,v27,v28,$keyperm
7637bded2dbSJung-uk Kim	lvx		v30,$x50,$key
7647bded2dbSJung-uk Kim	?vperm		v28,v28,v29,$keyperm
7657bded2dbSJung-uk Kim	lvx		v31,$x60,$key
7667bded2dbSJung-uk Kim	?vperm		v29,v29,v30,$keyperm
7677bded2dbSJung-uk Kim	lvx		$out0,$x70,$key		# borrow $out0
7687bded2dbSJung-uk Kim	?vperm		v30,v30,v31,$keyperm
7697bded2dbSJung-uk Kim	lvx		v24,$x00,$key_		# pre-load round[1]
7707bded2dbSJung-uk Kim	?vperm		v31,v31,$out0,$keyperm
7717bded2dbSJung-uk Kim	lvx		v25,$x10,$key_		# pre-load round[2]
7727bded2dbSJung-uk Kim
7737bded2dbSJung-uk Kim	#lvx		$inptail,0,$inp		# "caller" already did this
7747bded2dbSJung-uk Kim	#addi		$inp,$inp,15		# 15 is not typo
7757bded2dbSJung-uk Kim	subi		$inp,$inp,15		# undo "caller"
7767bded2dbSJung-uk Kim
7777bded2dbSJung-uk Kim	 le?li		$idx,8
7787bded2dbSJung-uk Kim	lvx_u		$in0,$x00,$inp		# load first 8 "words"
7797bded2dbSJung-uk Kim	 le?lvsl	$inpperm,0,$idx
7807bded2dbSJung-uk Kim	 le?vspltisb	$tmp,0x0f
7817bded2dbSJung-uk Kim	lvx_u		$in1,$x10,$inp
7827bded2dbSJung-uk Kim	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
7837bded2dbSJung-uk Kim	lvx_u		$in2,$x20,$inp
7847bded2dbSJung-uk Kim	 le?vperm	$in0,$in0,$in0,$inpperm
7857bded2dbSJung-uk Kim	lvx_u		$in3,$x30,$inp
7867bded2dbSJung-uk Kim	 le?vperm	$in1,$in1,$in1,$inpperm
7877bded2dbSJung-uk Kim	lvx_u		$in4,$x40,$inp
7887bded2dbSJung-uk Kim	 le?vperm	$in2,$in2,$in2,$inpperm
7897bded2dbSJung-uk Kim	vxor		$out0,$in0,$rndkey0
7907bded2dbSJung-uk Kim	lvx_u		$in5,$x50,$inp
7917bded2dbSJung-uk Kim	 le?vperm	$in3,$in3,$in3,$inpperm
7927bded2dbSJung-uk Kim	vxor		$out1,$in1,$rndkey0
7937bded2dbSJung-uk Kim	lvx_u		$in6,$x60,$inp
7947bded2dbSJung-uk Kim	 le?vperm	$in4,$in4,$in4,$inpperm
7957bded2dbSJung-uk Kim	vxor		$out2,$in2,$rndkey0
7967bded2dbSJung-uk Kim	lvx_u		$in7,$x70,$inp
7977bded2dbSJung-uk Kim	addi		$inp,$inp,0x80
7987bded2dbSJung-uk Kim	 le?vperm	$in5,$in5,$in5,$inpperm
7997bded2dbSJung-uk Kim	vxor		$out3,$in3,$rndkey0
8007bded2dbSJung-uk Kim	 le?vperm	$in6,$in6,$in6,$inpperm
8017bded2dbSJung-uk Kim	vxor		$out4,$in4,$rndkey0
8027bded2dbSJung-uk Kim	 le?vperm	$in7,$in7,$in7,$inpperm
8037bded2dbSJung-uk Kim	vxor		$out5,$in5,$rndkey0
8047bded2dbSJung-uk Kim	vxor		$out6,$in6,$rndkey0
8057bded2dbSJung-uk Kim	vxor		$out7,$in7,$rndkey0
8067bded2dbSJung-uk Kim
8077bded2dbSJung-uk Kim	mtctr		$rounds
8087bded2dbSJung-uk Kim	b		Loop_cbc_dec8x
8097bded2dbSJung-uk Kim.align	5
8107bded2dbSJung-uk KimLoop_cbc_dec8x:
8117bded2dbSJung-uk Kim	vncipher	$out0,$out0,v24
8127bded2dbSJung-uk Kim	vncipher	$out1,$out1,v24
8137bded2dbSJung-uk Kim	vncipher	$out2,$out2,v24
8147bded2dbSJung-uk Kim	vncipher	$out3,$out3,v24
8157bded2dbSJung-uk Kim	vncipher	$out4,$out4,v24
8167bded2dbSJung-uk Kim	vncipher	$out5,$out5,v24
8177bded2dbSJung-uk Kim	vncipher	$out6,$out6,v24
8187bded2dbSJung-uk Kim	vncipher	$out7,$out7,v24
8197bded2dbSJung-uk Kim	lvx		v24,$x20,$key_		# round[3]
8207bded2dbSJung-uk Kim	addi		$key_,$key_,0x20
8217bded2dbSJung-uk Kim
8227bded2dbSJung-uk Kim	vncipher	$out0,$out0,v25
8237bded2dbSJung-uk Kim	vncipher	$out1,$out1,v25
8247bded2dbSJung-uk Kim	vncipher	$out2,$out2,v25
8257bded2dbSJung-uk Kim	vncipher	$out3,$out3,v25
8267bded2dbSJung-uk Kim	vncipher	$out4,$out4,v25
8277bded2dbSJung-uk Kim	vncipher	$out5,$out5,v25
8287bded2dbSJung-uk Kim	vncipher	$out6,$out6,v25
8297bded2dbSJung-uk Kim	vncipher	$out7,$out7,v25
8307bded2dbSJung-uk Kim	lvx		v25,$x10,$key_		# round[4]
8317bded2dbSJung-uk Kim	bdnz		Loop_cbc_dec8x
8327bded2dbSJung-uk Kim
8337bded2dbSJung-uk Kim	subic		$len,$len,128		# $len-=128
8347bded2dbSJung-uk Kim	vncipher	$out0,$out0,v24
8357bded2dbSJung-uk Kim	vncipher	$out1,$out1,v24
8367bded2dbSJung-uk Kim	vncipher	$out2,$out2,v24
8377bded2dbSJung-uk Kim	vncipher	$out3,$out3,v24
8387bded2dbSJung-uk Kim	vncipher	$out4,$out4,v24
8397bded2dbSJung-uk Kim	vncipher	$out5,$out5,v24
8407bded2dbSJung-uk Kim	vncipher	$out6,$out6,v24
8417bded2dbSJung-uk Kim	vncipher	$out7,$out7,v24
8427bded2dbSJung-uk Kim
8437bded2dbSJung-uk Kim	subfe.		r0,r0,r0		# borrow?-1:0
8447bded2dbSJung-uk Kim	vncipher	$out0,$out0,v25
8457bded2dbSJung-uk Kim	vncipher	$out1,$out1,v25
8467bded2dbSJung-uk Kim	vncipher	$out2,$out2,v25
8477bded2dbSJung-uk Kim	vncipher	$out3,$out3,v25
8487bded2dbSJung-uk Kim	vncipher	$out4,$out4,v25
8497bded2dbSJung-uk Kim	vncipher	$out5,$out5,v25
8507bded2dbSJung-uk Kim	vncipher	$out6,$out6,v25
8517bded2dbSJung-uk Kim	vncipher	$out7,$out7,v25
8527bded2dbSJung-uk Kim
8537bded2dbSJung-uk Kim	and		r0,r0,$len
8547bded2dbSJung-uk Kim	vncipher	$out0,$out0,v26
8557bded2dbSJung-uk Kim	vncipher	$out1,$out1,v26
8567bded2dbSJung-uk Kim	vncipher	$out2,$out2,v26
8577bded2dbSJung-uk Kim	vncipher	$out3,$out3,v26
8587bded2dbSJung-uk Kim	vncipher	$out4,$out4,v26
8597bded2dbSJung-uk Kim	vncipher	$out5,$out5,v26
8607bded2dbSJung-uk Kim	vncipher	$out6,$out6,v26
8617bded2dbSJung-uk Kim	vncipher	$out7,$out7,v26
8627bded2dbSJung-uk Kim
8637bded2dbSJung-uk Kim	add		$inp,$inp,r0		# $inp is adjusted in such
8647bded2dbSJung-uk Kim						# way that at exit from the
8657bded2dbSJung-uk Kim						# loop inX-in7 are loaded
8667bded2dbSJung-uk Kim						# with last "words"
8677bded2dbSJung-uk Kim	vncipher	$out0,$out0,v27
8687bded2dbSJung-uk Kim	vncipher	$out1,$out1,v27
8697bded2dbSJung-uk Kim	vncipher	$out2,$out2,v27
8707bded2dbSJung-uk Kim	vncipher	$out3,$out3,v27
8717bded2dbSJung-uk Kim	vncipher	$out4,$out4,v27
8727bded2dbSJung-uk Kim	vncipher	$out5,$out5,v27
8737bded2dbSJung-uk Kim	vncipher	$out6,$out6,v27
8747bded2dbSJung-uk Kim	vncipher	$out7,$out7,v27
8757bded2dbSJung-uk Kim
8767bded2dbSJung-uk Kim	addi		$key_,$sp,$FRAME+15	# rewind $key_
8777bded2dbSJung-uk Kim	vncipher	$out0,$out0,v28
8787bded2dbSJung-uk Kim	vncipher	$out1,$out1,v28
8797bded2dbSJung-uk Kim	vncipher	$out2,$out2,v28
8807bded2dbSJung-uk Kim	vncipher	$out3,$out3,v28
8817bded2dbSJung-uk Kim	vncipher	$out4,$out4,v28
8827bded2dbSJung-uk Kim	vncipher	$out5,$out5,v28
8837bded2dbSJung-uk Kim	vncipher	$out6,$out6,v28
8847bded2dbSJung-uk Kim	vncipher	$out7,$out7,v28
8857bded2dbSJung-uk Kim	lvx		v24,$x00,$key_		# re-pre-load round[1]
8867bded2dbSJung-uk Kim
8877bded2dbSJung-uk Kim	vncipher	$out0,$out0,v29
8887bded2dbSJung-uk Kim	vncipher	$out1,$out1,v29
8897bded2dbSJung-uk Kim	vncipher	$out2,$out2,v29
8907bded2dbSJung-uk Kim	vncipher	$out3,$out3,v29
8917bded2dbSJung-uk Kim	vncipher	$out4,$out4,v29
8927bded2dbSJung-uk Kim	vncipher	$out5,$out5,v29
8937bded2dbSJung-uk Kim	vncipher	$out6,$out6,v29
8947bded2dbSJung-uk Kim	vncipher	$out7,$out7,v29
8957bded2dbSJung-uk Kim	lvx		v25,$x10,$key_		# re-pre-load round[2]
8967bded2dbSJung-uk Kim
8977bded2dbSJung-uk Kim	vncipher	$out0,$out0,v30
8987bded2dbSJung-uk Kim	 vxor		$ivec,$ivec,v31		# xor with last round key
8997bded2dbSJung-uk Kim	vncipher	$out1,$out1,v30
9007bded2dbSJung-uk Kim	 vxor		$in0,$in0,v31
9017bded2dbSJung-uk Kim	vncipher	$out2,$out2,v30
9027bded2dbSJung-uk Kim	 vxor		$in1,$in1,v31
9037bded2dbSJung-uk Kim	vncipher	$out3,$out3,v30
9047bded2dbSJung-uk Kim	 vxor		$in2,$in2,v31
9057bded2dbSJung-uk Kim	vncipher	$out4,$out4,v30
9067bded2dbSJung-uk Kim	 vxor		$in3,$in3,v31
9077bded2dbSJung-uk Kim	vncipher	$out5,$out5,v30
9087bded2dbSJung-uk Kim	 vxor		$in4,$in4,v31
9097bded2dbSJung-uk Kim	vncipher	$out6,$out6,v30
9107bded2dbSJung-uk Kim	 vxor		$in5,$in5,v31
9117bded2dbSJung-uk Kim	vncipher	$out7,$out7,v30
9127bded2dbSJung-uk Kim	 vxor		$in6,$in6,v31
9137bded2dbSJung-uk Kim
9147bded2dbSJung-uk Kim	vncipherlast	$out0,$out0,$ivec
9157bded2dbSJung-uk Kim	vncipherlast	$out1,$out1,$in0
9167bded2dbSJung-uk Kim	 lvx_u		$in0,$x00,$inp		# load next input block
9177bded2dbSJung-uk Kim	vncipherlast	$out2,$out2,$in1
9187bded2dbSJung-uk Kim	 lvx_u		$in1,$x10,$inp
9197bded2dbSJung-uk Kim	vncipherlast	$out3,$out3,$in2
9207bded2dbSJung-uk Kim	 le?vperm	$in0,$in0,$in0,$inpperm
9217bded2dbSJung-uk Kim	 lvx_u		$in2,$x20,$inp
9227bded2dbSJung-uk Kim	vncipherlast	$out4,$out4,$in3
9237bded2dbSJung-uk Kim	 le?vperm	$in1,$in1,$in1,$inpperm
9247bded2dbSJung-uk Kim	 lvx_u		$in3,$x30,$inp
9257bded2dbSJung-uk Kim	vncipherlast	$out5,$out5,$in4
9267bded2dbSJung-uk Kim	 le?vperm	$in2,$in2,$in2,$inpperm
9277bded2dbSJung-uk Kim	 lvx_u		$in4,$x40,$inp
9287bded2dbSJung-uk Kim	vncipherlast	$out6,$out6,$in5
9297bded2dbSJung-uk Kim	 le?vperm	$in3,$in3,$in3,$inpperm
9307bded2dbSJung-uk Kim	 lvx_u		$in5,$x50,$inp
9317bded2dbSJung-uk Kim	vncipherlast	$out7,$out7,$in6
9327bded2dbSJung-uk Kim	 le?vperm	$in4,$in4,$in4,$inpperm
9337bded2dbSJung-uk Kim	 lvx_u		$in6,$x60,$inp
9347bded2dbSJung-uk Kim	vmr		$ivec,$in7
9357bded2dbSJung-uk Kim	 le?vperm	$in5,$in5,$in5,$inpperm
9367bded2dbSJung-uk Kim	 lvx_u		$in7,$x70,$inp
9377bded2dbSJung-uk Kim	 addi		$inp,$inp,0x80
9387bded2dbSJung-uk Kim
9397bded2dbSJung-uk Kim	le?vperm	$out0,$out0,$out0,$inpperm
9407bded2dbSJung-uk Kim	le?vperm	$out1,$out1,$out1,$inpperm
9417bded2dbSJung-uk Kim	stvx_u		$out0,$x00,$out
9427bded2dbSJung-uk Kim	 le?vperm	$in6,$in6,$in6,$inpperm
9437bded2dbSJung-uk Kim	 vxor		$out0,$in0,$rndkey0
9447bded2dbSJung-uk Kim	le?vperm	$out2,$out2,$out2,$inpperm
9457bded2dbSJung-uk Kim	stvx_u		$out1,$x10,$out
9467bded2dbSJung-uk Kim	 le?vperm	$in7,$in7,$in7,$inpperm
9477bded2dbSJung-uk Kim	 vxor		$out1,$in1,$rndkey0
9487bded2dbSJung-uk Kim	le?vperm	$out3,$out3,$out3,$inpperm
9497bded2dbSJung-uk Kim	stvx_u		$out2,$x20,$out
9507bded2dbSJung-uk Kim	 vxor		$out2,$in2,$rndkey0
9517bded2dbSJung-uk Kim	le?vperm	$out4,$out4,$out4,$inpperm
9527bded2dbSJung-uk Kim	stvx_u		$out3,$x30,$out
9537bded2dbSJung-uk Kim	 vxor		$out3,$in3,$rndkey0
9547bded2dbSJung-uk Kim	le?vperm	$out5,$out5,$out5,$inpperm
9557bded2dbSJung-uk Kim	stvx_u		$out4,$x40,$out
9567bded2dbSJung-uk Kim	 vxor		$out4,$in4,$rndkey0
9577bded2dbSJung-uk Kim	le?vperm	$out6,$out6,$out6,$inpperm
9587bded2dbSJung-uk Kim	stvx_u		$out5,$x50,$out
9597bded2dbSJung-uk Kim	 vxor		$out5,$in5,$rndkey0
9607bded2dbSJung-uk Kim	le?vperm	$out7,$out7,$out7,$inpperm
9617bded2dbSJung-uk Kim	stvx_u		$out6,$x60,$out
9627bded2dbSJung-uk Kim	 vxor		$out6,$in6,$rndkey0
9637bded2dbSJung-uk Kim	stvx_u		$out7,$x70,$out
9647bded2dbSJung-uk Kim	addi		$out,$out,0x80
9657bded2dbSJung-uk Kim	 vxor		$out7,$in7,$rndkey0
9667bded2dbSJung-uk Kim
9677bded2dbSJung-uk Kim	mtctr		$rounds
9687bded2dbSJung-uk Kim	beq		Loop_cbc_dec8x		# did $len-=128 borrow?
9697bded2dbSJung-uk Kim
9707bded2dbSJung-uk Kim	addic.		$len,$len,128
9717bded2dbSJung-uk Kim	beq		Lcbc_dec8x_done
9727bded2dbSJung-uk Kim	nop
9737bded2dbSJung-uk Kim	nop
9747bded2dbSJung-uk Kim
9757bded2dbSJung-uk KimLoop_cbc_dec8x_tail:				# up to 7 "words" tail...
9767bded2dbSJung-uk Kim	vncipher	$out1,$out1,v24
9777bded2dbSJung-uk Kim	vncipher	$out2,$out2,v24
9787bded2dbSJung-uk Kim	vncipher	$out3,$out3,v24
9797bded2dbSJung-uk Kim	vncipher	$out4,$out4,v24
9807bded2dbSJung-uk Kim	vncipher	$out5,$out5,v24
9817bded2dbSJung-uk Kim	vncipher	$out6,$out6,v24
9827bded2dbSJung-uk Kim	vncipher	$out7,$out7,v24
9837bded2dbSJung-uk Kim	lvx		v24,$x20,$key_		# round[3]
9847bded2dbSJung-uk Kim	addi		$key_,$key_,0x20
9857bded2dbSJung-uk Kim
9867bded2dbSJung-uk Kim	vncipher	$out1,$out1,v25
9877bded2dbSJung-uk Kim	vncipher	$out2,$out2,v25
9887bded2dbSJung-uk Kim	vncipher	$out3,$out3,v25
9897bded2dbSJung-uk Kim	vncipher	$out4,$out4,v25
9907bded2dbSJung-uk Kim	vncipher	$out5,$out5,v25
9917bded2dbSJung-uk Kim	vncipher	$out6,$out6,v25
9927bded2dbSJung-uk Kim	vncipher	$out7,$out7,v25
9937bded2dbSJung-uk Kim	lvx		v25,$x10,$key_		# round[4]
9947bded2dbSJung-uk Kim	bdnz		Loop_cbc_dec8x_tail
9957bded2dbSJung-uk Kim
9967bded2dbSJung-uk Kim	vncipher	$out1,$out1,v24
9977bded2dbSJung-uk Kim	vncipher	$out2,$out2,v24
9987bded2dbSJung-uk Kim	vncipher	$out3,$out3,v24
9997bded2dbSJung-uk Kim	vncipher	$out4,$out4,v24
10007bded2dbSJung-uk Kim	vncipher	$out5,$out5,v24
10017bded2dbSJung-uk Kim	vncipher	$out6,$out6,v24
10027bded2dbSJung-uk Kim	vncipher	$out7,$out7,v24
10037bded2dbSJung-uk Kim
10047bded2dbSJung-uk Kim	vncipher	$out1,$out1,v25
10057bded2dbSJung-uk Kim	vncipher	$out2,$out2,v25
10067bded2dbSJung-uk Kim	vncipher	$out3,$out3,v25
10077bded2dbSJung-uk Kim	vncipher	$out4,$out4,v25
10087bded2dbSJung-uk Kim	vncipher	$out5,$out5,v25
10097bded2dbSJung-uk Kim	vncipher	$out6,$out6,v25
10107bded2dbSJung-uk Kim	vncipher	$out7,$out7,v25
10117bded2dbSJung-uk Kim
10127bded2dbSJung-uk Kim	vncipher	$out1,$out1,v26
10137bded2dbSJung-uk Kim	vncipher	$out2,$out2,v26
10147bded2dbSJung-uk Kim	vncipher	$out3,$out3,v26
10157bded2dbSJung-uk Kim	vncipher	$out4,$out4,v26
10167bded2dbSJung-uk Kim	vncipher	$out5,$out5,v26
10177bded2dbSJung-uk Kim	vncipher	$out6,$out6,v26
10187bded2dbSJung-uk Kim	vncipher	$out7,$out7,v26
10197bded2dbSJung-uk Kim
10207bded2dbSJung-uk Kim	vncipher	$out1,$out1,v27
10217bded2dbSJung-uk Kim	vncipher	$out2,$out2,v27
10227bded2dbSJung-uk Kim	vncipher	$out3,$out3,v27
10237bded2dbSJung-uk Kim	vncipher	$out4,$out4,v27
10247bded2dbSJung-uk Kim	vncipher	$out5,$out5,v27
10257bded2dbSJung-uk Kim	vncipher	$out6,$out6,v27
10267bded2dbSJung-uk Kim	vncipher	$out7,$out7,v27
10277bded2dbSJung-uk Kim
10287bded2dbSJung-uk Kim	vncipher	$out1,$out1,v28
10297bded2dbSJung-uk Kim	vncipher	$out2,$out2,v28
10307bded2dbSJung-uk Kim	vncipher	$out3,$out3,v28
10317bded2dbSJung-uk Kim	vncipher	$out4,$out4,v28
10327bded2dbSJung-uk Kim	vncipher	$out5,$out5,v28
10337bded2dbSJung-uk Kim	vncipher	$out6,$out6,v28
10347bded2dbSJung-uk Kim	vncipher	$out7,$out7,v28
10357bded2dbSJung-uk Kim
10367bded2dbSJung-uk Kim	vncipher	$out1,$out1,v29
10377bded2dbSJung-uk Kim	vncipher	$out2,$out2,v29
10387bded2dbSJung-uk Kim	vncipher	$out3,$out3,v29
10397bded2dbSJung-uk Kim	vncipher	$out4,$out4,v29
10407bded2dbSJung-uk Kim	vncipher	$out5,$out5,v29
10417bded2dbSJung-uk Kim	vncipher	$out6,$out6,v29
10427bded2dbSJung-uk Kim	vncipher	$out7,$out7,v29
10437bded2dbSJung-uk Kim
10447bded2dbSJung-uk Kim	vncipher	$out1,$out1,v30
10457bded2dbSJung-uk Kim	 vxor		$ivec,$ivec,v31		# last round key
10467bded2dbSJung-uk Kim	vncipher	$out2,$out2,v30
10477bded2dbSJung-uk Kim	 vxor		$in1,$in1,v31
10487bded2dbSJung-uk Kim	vncipher	$out3,$out3,v30
10497bded2dbSJung-uk Kim	 vxor		$in2,$in2,v31
10507bded2dbSJung-uk Kim	vncipher	$out4,$out4,v30
10517bded2dbSJung-uk Kim	 vxor		$in3,$in3,v31
10527bded2dbSJung-uk Kim	vncipher	$out5,$out5,v30
10537bded2dbSJung-uk Kim	 vxor		$in4,$in4,v31
10547bded2dbSJung-uk Kim	vncipher	$out6,$out6,v30
10557bded2dbSJung-uk Kim	 vxor		$in5,$in5,v31
10567bded2dbSJung-uk Kim	vncipher	$out7,$out7,v30
10577bded2dbSJung-uk Kim	 vxor		$in6,$in6,v31
10587bded2dbSJung-uk Kim
10597bded2dbSJung-uk Kim	cmplwi		$len,32			# switch($len)
10607bded2dbSJung-uk Kim	blt		Lcbc_dec8x_one
10617bded2dbSJung-uk Kim	nop
10627bded2dbSJung-uk Kim	beq		Lcbc_dec8x_two
10637bded2dbSJung-uk Kim	cmplwi		$len,64
10647bded2dbSJung-uk Kim	blt		Lcbc_dec8x_three
10657bded2dbSJung-uk Kim	nop
10667bded2dbSJung-uk Kim	beq		Lcbc_dec8x_four
10677bded2dbSJung-uk Kim	cmplwi		$len,96
10687bded2dbSJung-uk Kim	blt		Lcbc_dec8x_five
10697bded2dbSJung-uk Kim	nop
10707bded2dbSJung-uk Kim	beq		Lcbc_dec8x_six
10717bded2dbSJung-uk Kim
10727bded2dbSJung-uk KimLcbc_dec8x_seven:
10737bded2dbSJung-uk Kim	vncipherlast	$out1,$out1,$ivec
10747bded2dbSJung-uk Kim	vncipherlast	$out2,$out2,$in1
10757bded2dbSJung-uk Kim	vncipherlast	$out3,$out3,$in2
10767bded2dbSJung-uk Kim	vncipherlast	$out4,$out4,$in3
10777bded2dbSJung-uk Kim	vncipherlast	$out5,$out5,$in4
10787bded2dbSJung-uk Kim	vncipherlast	$out6,$out6,$in5
10797bded2dbSJung-uk Kim	vncipherlast	$out7,$out7,$in6
10807bded2dbSJung-uk Kim	vmr		$ivec,$in7
10817bded2dbSJung-uk Kim
10827bded2dbSJung-uk Kim	le?vperm	$out1,$out1,$out1,$inpperm
10837bded2dbSJung-uk Kim	le?vperm	$out2,$out2,$out2,$inpperm
10847bded2dbSJung-uk Kim	stvx_u		$out1,$x00,$out
10857bded2dbSJung-uk Kim	le?vperm	$out3,$out3,$out3,$inpperm
10867bded2dbSJung-uk Kim	stvx_u		$out2,$x10,$out
10877bded2dbSJung-uk Kim	le?vperm	$out4,$out4,$out4,$inpperm
10887bded2dbSJung-uk Kim	stvx_u		$out3,$x20,$out
10897bded2dbSJung-uk Kim	le?vperm	$out5,$out5,$out5,$inpperm
10907bded2dbSJung-uk Kim	stvx_u		$out4,$x30,$out
10917bded2dbSJung-uk Kim	le?vperm	$out6,$out6,$out6,$inpperm
10927bded2dbSJung-uk Kim	stvx_u		$out5,$x40,$out
10937bded2dbSJung-uk Kim	le?vperm	$out7,$out7,$out7,$inpperm
10947bded2dbSJung-uk Kim	stvx_u		$out6,$x50,$out
10957bded2dbSJung-uk Kim	stvx_u		$out7,$x60,$out
10967bded2dbSJung-uk Kim	addi		$out,$out,0x70
10977bded2dbSJung-uk Kim	b		Lcbc_dec8x_done
10987bded2dbSJung-uk Kim
10997bded2dbSJung-uk Kim.align	5
11007bded2dbSJung-uk KimLcbc_dec8x_six:
11017bded2dbSJung-uk Kim	vncipherlast	$out2,$out2,$ivec
11027bded2dbSJung-uk Kim	vncipherlast	$out3,$out3,$in2
11037bded2dbSJung-uk Kim	vncipherlast	$out4,$out4,$in3
11047bded2dbSJung-uk Kim	vncipherlast	$out5,$out5,$in4
11057bded2dbSJung-uk Kim	vncipherlast	$out6,$out6,$in5
11067bded2dbSJung-uk Kim	vncipherlast	$out7,$out7,$in6
11077bded2dbSJung-uk Kim	vmr		$ivec,$in7
11087bded2dbSJung-uk Kim
11097bded2dbSJung-uk Kim	le?vperm	$out2,$out2,$out2,$inpperm
11107bded2dbSJung-uk Kim	le?vperm	$out3,$out3,$out3,$inpperm
11117bded2dbSJung-uk Kim	stvx_u		$out2,$x00,$out
11127bded2dbSJung-uk Kim	le?vperm	$out4,$out4,$out4,$inpperm
11137bded2dbSJung-uk Kim	stvx_u		$out3,$x10,$out
11147bded2dbSJung-uk Kim	le?vperm	$out5,$out5,$out5,$inpperm
11157bded2dbSJung-uk Kim	stvx_u		$out4,$x20,$out
11167bded2dbSJung-uk Kim	le?vperm	$out6,$out6,$out6,$inpperm
11177bded2dbSJung-uk Kim	stvx_u		$out5,$x30,$out
11187bded2dbSJung-uk Kim	le?vperm	$out7,$out7,$out7,$inpperm
11197bded2dbSJung-uk Kim	stvx_u		$out6,$x40,$out
11207bded2dbSJung-uk Kim	stvx_u		$out7,$x50,$out
11217bded2dbSJung-uk Kim	addi		$out,$out,0x60
11227bded2dbSJung-uk Kim	b		Lcbc_dec8x_done
11237bded2dbSJung-uk Kim
11247bded2dbSJung-uk Kim.align	5
11257bded2dbSJung-uk KimLcbc_dec8x_five:
11267bded2dbSJung-uk Kim	vncipherlast	$out3,$out3,$ivec
11277bded2dbSJung-uk Kim	vncipherlast	$out4,$out4,$in3
11287bded2dbSJung-uk Kim	vncipherlast	$out5,$out5,$in4
11297bded2dbSJung-uk Kim	vncipherlast	$out6,$out6,$in5
11307bded2dbSJung-uk Kim	vncipherlast	$out7,$out7,$in6
11317bded2dbSJung-uk Kim	vmr		$ivec,$in7
11327bded2dbSJung-uk Kim
11337bded2dbSJung-uk Kim	le?vperm	$out3,$out3,$out3,$inpperm
11347bded2dbSJung-uk Kim	le?vperm	$out4,$out4,$out4,$inpperm
11357bded2dbSJung-uk Kim	stvx_u		$out3,$x00,$out
11367bded2dbSJung-uk Kim	le?vperm	$out5,$out5,$out5,$inpperm
11377bded2dbSJung-uk Kim	stvx_u		$out4,$x10,$out
11387bded2dbSJung-uk Kim	le?vperm	$out6,$out6,$out6,$inpperm
11397bded2dbSJung-uk Kim	stvx_u		$out5,$x20,$out
11407bded2dbSJung-uk Kim	le?vperm	$out7,$out7,$out7,$inpperm
11417bded2dbSJung-uk Kim	stvx_u		$out6,$x30,$out
11427bded2dbSJung-uk Kim	stvx_u		$out7,$x40,$out
11437bded2dbSJung-uk Kim	addi		$out,$out,0x50
11447bded2dbSJung-uk Kim	b		Lcbc_dec8x_done
11457bded2dbSJung-uk Kim
11467bded2dbSJung-uk Kim.align	5
11477bded2dbSJung-uk KimLcbc_dec8x_four:
11487bded2dbSJung-uk Kim	vncipherlast	$out4,$out4,$ivec
11497bded2dbSJung-uk Kim	vncipherlast	$out5,$out5,$in4
11507bded2dbSJung-uk Kim	vncipherlast	$out6,$out6,$in5
11517bded2dbSJung-uk Kim	vncipherlast	$out7,$out7,$in6
11527bded2dbSJung-uk Kim	vmr		$ivec,$in7
11537bded2dbSJung-uk Kim
11547bded2dbSJung-uk Kim	le?vperm	$out4,$out4,$out4,$inpperm
11557bded2dbSJung-uk Kim	le?vperm	$out5,$out5,$out5,$inpperm
11567bded2dbSJung-uk Kim	stvx_u		$out4,$x00,$out
11577bded2dbSJung-uk Kim	le?vperm	$out6,$out6,$out6,$inpperm
11587bded2dbSJung-uk Kim	stvx_u		$out5,$x10,$out
11597bded2dbSJung-uk Kim	le?vperm	$out7,$out7,$out7,$inpperm
11607bded2dbSJung-uk Kim	stvx_u		$out6,$x20,$out
11617bded2dbSJung-uk Kim	stvx_u		$out7,$x30,$out
11627bded2dbSJung-uk Kim	addi		$out,$out,0x40
11637bded2dbSJung-uk Kim	b		Lcbc_dec8x_done
11647bded2dbSJung-uk Kim
11657bded2dbSJung-uk Kim.align	5
11667bded2dbSJung-uk KimLcbc_dec8x_three:
11677bded2dbSJung-uk Kim	vncipherlast	$out5,$out5,$ivec
11687bded2dbSJung-uk Kim	vncipherlast	$out6,$out6,$in5
11697bded2dbSJung-uk Kim	vncipherlast	$out7,$out7,$in6
11707bded2dbSJung-uk Kim	vmr		$ivec,$in7
11717bded2dbSJung-uk Kim
11727bded2dbSJung-uk Kim	le?vperm	$out5,$out5,$out5,$inpperm
11737bded2dbSJung-uk Kim	le?vperm	$out6,$out6,$out6,$inpperm
11747bded2dbSJung-uk Kim	stvx_u		$out5,$x00,$out
11757bded2dbSJung-uk Kim	le?vperm	$out7,$out7,$out7,$inpperm
11767bded2dbSJung-uk Kim	stvx_u		$out6,$x10,$out
11777bded2dbSJung-uk Kim	stvx_u		$out7,$x20,$out
11787bded2dbSJung-uk Kim	addi		$out,$out,0x30
11797bded2dbSJung-uk Kim	b		Lcbc_dec8x_done
11807bded2dbSJung-uk Kim
11817bded2dbSJung-uk Kim.align	5
11827bded2dbSJung-uk KimLcbc_dec8x_two:
11837bded2dbSJung-uk Kim	vncipherlast	$out6,$out6,$ivec
11847bded2dbSJung-uk Kim	vncipherlast	$out7,$out7,$in6
11857bded2dbSJung-uk Kim	vmr		$ivec,$in7
11867bded2dbSJung-uk Kim
11877bded2dbSJung-uk Kim	le?vperm	$out6,$out6,$out6,$inpperm
11887bded2dbSJung-uk Kim	le?vperm	$out7,$out7,$out7,$inpperm
11897bded2dbSJung-uk Kim	stvx_u		$out6,$x00,$out
11907bded2dbSJung-uk Kim	stvx_u		$out7,$x10,$out
11917bded2dbSJung-uk Kim	addi		$out,$out,0x20
11927bded2dbSJung-uk Kim	b		Lcbc_dec8x_done
11937bded2dbSJung-uk Kim
11947bded2dbSJung-uk Kim.align	5
11957bded2dbSJung-uk KimLcbc_dec8x_one:
11967bded2dbSJung-uk Kim	vncipherlast	$out7,$out7,$ivec
11977bded2dbSJung-uk Kim	vmr		$ivec,$in7
11987bded2dbSJung-uk Kim
11997bded2dbSJung-uk Kim	le?vperm	$out7,$out7,$out7,$inpperm
12007bded2dbSJung-uk Kim	stvx_u		$out7,0,$out
12017bded2dbSJung-uk Kim	addi		$out,$out,0x10
12027bded2dbSJung-uk Kim
12037bded2dbSJung-uk KimLcbc_dec8x_done:
12047bded2dbSJung-uk Kim	le?vperm	$ivec,$ivec,$ivec,$inpperm
12057bded2dbSJung-uk Kim	stvx_u		$ivec,0,$ivp		# write [unaligned] iv
12067bded2dbSJung-uk Kim
12077bded2dbSJung-uk Kim	li		r10,`$FRAME+15`
12087bded2dbSJung-uk Kim	li		r11,`$FRAME+31`
12097bded2dbSJung-uk Kim	stvx		$inpperm,r10,$sp	# wipe copies of round keys
12107bded2dbSJung-uk Kim	addi		r10,r10,32
12117bded2dbSJung-uk Kim	stvx		$inpperm,r11,$sp
12127bded2dbSJung-uk Kim	addi		r11,r11,32
12137bded2dbSJung-uk Kim	stvx		$inpperm,r10,$sp
12147bded2dbSJung-uk Kim	addi		r10,r10,32
12157bded2dbSJung-uk Kim	stvx		$inpperm,r11,$sp
12167bded2dbSJung-uk Kim	addi		r11,r11,32
12177bded2dbSJung-uk Kim	stvx		$inpperm,r10,$sp
12187bded2dbSJung-uk Kim	addi		r10,r10,32
12197bded2dbSJung-uk Kim	stvx		$inpperm,r11,$sp
12207bded2dbSJung-uk Kim	addi		r11,r11,32
12217bded2dbSJung-uk Kim	stvx		$inpperm,r10,$sp
12227bded2dbSJung-uk Kim	addi		r10,r10,32
12237bded2dbSJung-uk Kim	stvx		$inpperm,r11,$sp
12247bded2dbSJung-uk Kim	addi		r11,r11,32
12257bded2dbSJung-uk Kim
12267bded2dbSJung-uk Kim	mtspr		256,$vrsave
12277bded2dbSJung-uk Kim	lvx		v20,r10,$sp		# ABI says so
12287bded2dbSJung-uk Kim	addi		r10,r10,32
12297bded2dbSJung-uk Kim	lvx		v21,r11,$sp
12307bded2dbSJung-uk Kim	addi		r11,r11,32
12317bded2dbSJung-uk Kim	lvx		v22,r10,$sp
12327bded2dbSJung-uk Kim	addi		r10,r10,32
12337bded2dbSJung-uk Kim	lvx		v23,r11,$sp
12347bded2dbSJung-uk Kim	addi		r11,r11,32
12357bded2dbSJung-uk Kim	lvx		v24,r10,$sp
12367bded2dbSJung-uk Kim	addi		r10,r10,32
12377bded2dbSJung-uk Kim	lvx		v25,r11,$sp
12387bded2dbSJung-uk Kim	addi		r11,r11,32
12397bded2dbSJung-uk Kim	lvx		v26,r10,$sp
12407bded2dbSJung-uk Kim	addi		r10,r10,32
12417bded2dbSJung-uk Kim	lvx		v27,r11,$sp
12427bded2dbSJung-uk Kim	addi		r11,r11,32
12437bded2dbSJung-uk Kim	lvx		v28,r10,$sp
12447bded2dbSJung-uk Kim	addi		r10,r10,32
12457bded2dbSJung-uk Kim	lvx		v29,r11,$sp
12467bded2dbSJung-uk Kim	addi		r11,r11,32
12477bded2dbSJung-uk Kim	lvx		v30,r10,$sp
12487bded2dbSJung-uk Kim	lvx		v31,r11,$sp
12497bded2dbSJung-uk Kim	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
12507bded2dbSJung-uk Kim	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
12517bded2dbSJung-uk Kim	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
12527bded2dbSJung-uk Kim	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
12537bded2dbSJung-uk Kim	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
12547bded2dbSJung-uk Kim	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
12557bded2dbSJung-uk Kim	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
12567bded2dbSJung-uk Kim	blr
12577bded2dbSJung-uk Kim	.long		0
12587bded2dbSJung-uk Kim	.byte		0,12,0x04,0,0x80,6,6,0
12597bded2dbSJung-uk Kim	.long		0
12607bded2dbSJung-uk Kim.size	.${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
12617bded2dbSJung-uk Kim___
12627bded2dbSJung-uk Kim}}	}}}
12637bded2dbSJung-uk Kim
12647bded2dbSJung-uk Kim#########################################################################
12657bded2dbSJung-uk Kim{{{	# CTR procedure[s]						#
12667bded2dbSJung-uk Kimmy ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
12677bded2dbSJung-uk Kimmy ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
12687bded2dbSJung-uk Kimmy ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
12697bded2dbSJung-uk Kim						map("v$_",(4..11));
12707bded2dbSJung-uk Kimmy $dat=$tmp;
12717bded2dbSJung-uk Kim
12727bded2dbSJung-uk Kim$code.=<<___;
12737bded2dbSJung-uk Kim.globl	.${prefix}_ctr32_encrypt_blocks
12747bded2dbSJung-uk Kim.align	5
12757bded2dbSJung-uk Kim.${prefix}_ctr32_encrypt_blocks:
12767bded2dbSJung-uk Kim	${UCMP}i	$len,1
12777bded2dbSJung-uk Kim	bltlr-
12787bded2dbSJung-uk Kim
12797bded2dbSJung-uk Kim	lis		r0,0xfff0
12807bded2dbSJung-uk Kim	mfspr		$vrsave,256
12817bded2dbSJung-uk Kim	mtspr		256,r0
12827bded2dbSJung-uk Kim
12837bded2dbSJung-uk Kim	li		$idx,15
12847bded2dbSJung-uk Kim	vxor		$rndkey0,$rndkey0,$rndkey0
12857bded2dbSJung-uk Kim	le?vspltisb	$tmp,0x0f
12867bded2dbSJung-uk Kim
12877bded2dbSJung-uk Kim	lvx		$ivec,0,$ivp		# load [unaligned] iv
12887bded2dbSJung-uk Kim	lvsl		$inpperm,0,$ivp
12897bded2dbSJung-uk Kim	lvx		$inptail,$idx,$ivp
12907bded2dbSJung-uk Kim	 vspltisb	$one,1
12917bded2dbSJung-uk Kim	le?vxor		$inpperm,$inpperm,$tmp
12927bded2dbSJung-uk Kim	vperm		$ivec,$ivec,$inptail,$inpperm
12937bded2dbSJung-uk Kim	 vsldoi		$one,$rndkey0,$one,1
12947bded2dbSJung-uk Kim
12957bded2dbSJung-uk Kim	neg		r11,$inp
12967bded2dbSJung-uk Kim	?lvsl		$keyperm,0,$key		# prepare for unaligned key
12977bded2dbSJung-uk Kim	lwz		$rounds,240($key)
12987bded2dbSJung-uk Kim
12997bded2dbSJung-uk Kim	lvsr		$inpperm,0,r11		# prepare for unaligned load
13007bded2dbSJung-uk Kim	lvx		$inptail,0,$inp
13017bded2dbSJung-uk Kim	addi		$inp,$inp,15		# 15 is not typo
13027bded2dbSJung-uk Kim	le?vxor		$inpperm,$inpperm,$tmp
13037bded2dbSJung-uk Kim
13047bded2dbSJung-uk Kim	srwi		$rounds,$rounds,1
13057bded2dbSJung-uk Kim	li		$idx,16
13067bded2dbSJung-uk Kim	subi		$rounds,$rounds,1
13077bded2dbSJung-uk Kim
13087bded2dbSJung-uk Kim	${UCMP}i	$len,8
13097bded2dbSJung-uk Kim	bge		_aesp8_ctr32_encrypt8x
13107bded2dbSJung-uk Kim
13117bded2dbSJung-uk Kim	?lvsr		$outperm,0,$out		# prepare for unaligned store
13127bded2dbSJung-uk Kim	vspltisb	$outmask,-1
13137bded2dbSJung-uk Kim	lvx		$outhead,0,$out
13147bded2dbSJung-uk Kim	?vperm		$outmask,$rndkey0,$outmask,$outperm
13157bded2dbSJung-uk Kim	le?vxor		$outperm,$outperm,$tmp
13167bded2dbSJung-uk Kim
13177bded2dbSJung-uk Kim	lvx		$rndkey0,0,$key
13187bded2dbSJung-uk Kim	mtctr		$rounds
13197bded2dbSJung-uk Kim	lvx		$rndkey1,$idx,$key
13207bded2dbSJung-uk Kim	addi		$idx,$idx,16
13217bded2dbSJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
13227bded2dbSJung-uk Kim	vxor		$inout,$ivec,$rndkey0
13237bded2dbSJung-uk Kim	lvx		$rndkey0,$idx,$key
13247bded2dbSJung-uk Kim	addi		$idx,$idx,16
13257bded2dbSJung-uk Kim	b		Loop_ctr32_enc
13267bded2dbSJung-uk Kim
13277bded2dbSJung-uk Kim.align	5
13287bded2dbSJung-uk KimLoop_ctr32_enc:
13297bded2dbSJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
13307bded2dbSJung-uk Kim	vcipher		$inout,$inout,$rndkey1
13317bded2dbSJung-uk Kim	lvx		$rndkey1,$idx,$key
13327bded2dbSJung-uk Kim	addi		$idx,$idx,16
13337bded2dbSJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
13347bded2dbSJung-uk Kim	vcipher		$inout,$inout,$rndkey0
13357bded2dbSJung-uk Kim	lvx		$rndkey0,$idx,$key
13367bded2dbSJung-uk Kim	addi		$idx,$idx,16
13377bded2dbSJung-uk Kim	bdnz		Loop_ctr32_enc
13387bded2dbSJung-uk Kim
13397bded2dbSJung-uk Kim	vadduwm		$ivec,$ivec,$one
13407bded2dbSJung-uk Kim	 vmr		$dat,$inptail
13417bded2dbSJung-uk Kim	 lvx		$inptail,0,$inp
13427bded2dbSJung-uk Kim	 addi		$inp,$inp,16
13437bded2dbSJung-uk Kim	 subic.		$len,$len,1		# blocks--
13447bded2dbSJung-uk Kim
13457bded2dbSJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
13467bded2dbSJung-uk Kim	vcipher		$inout,$inout,$rndkey1
13477bded2dbSJung-uk Kim	lvx		$rndkey1,$idx,$key
13487bded2dbSJung-uk Kim	 vperm		$dat,$dat,$inptail,$inpperm
13497bded2dbSJung-uk Kim	 li		$idx,16
13507bded2dbSJung-uk Kim	?vperm		$rndkey1,$rndkey0,$rndkey1,$keyperm
13517bded2dbSJung-uk Kim	 lvx		$rndkey0,0,$key
13527bded2dbSJung-uk Kim	vxor		$dat,$dat,$rndkey1	# last round key
13537bded2dbSJung-uk Kim	vcipherlast	$inout,$inout,$dat
13547bded2dbSJung-uk Kim
13557bded2dbSJung-uk Kim	 lvx		$rndkey1,$idx,$key
13567bded2dbSJung-uk Kim	 addi		$idx,$idx,16
13577bded2dbSJung-uk Kim	vperm		$inout,$inout,$inout,$outperm
13587bded2dbSJung-uk Kim	vsel		$dat,$outhead,$inout,$outmask
13597bded2dbSJung-uk Kim	 mtctr		$rounds
13607bded2dbSJung-uk Kim	 ?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
13617bded2dbSJung-uk Kim	vmr		$outhead,$inout
13627bded2dbSJung-uk Kim	 vxor		$inout,$ivec,$rndkey0
13637bded2dbSJung-uk Kim	 lvx		$rndkey0,$idx,$key
13647bded2dbSJung-uk Kim	 addi		$idx,$idx,16
13657bded2dbSJung-uk Kim	stvx		$dat,0,$out
13667bded2dbSJung-uk Kim	addi		$out,$out,16
13677bded2dbSJung-uk Kim	bne		Loop_ctr32_enc
13687bded2dbSJung-uk Kim
13697bded2dbSJung-uk Kim	addi		$out,$out,-1
13707bded2dbSJung-uk Kim	lvx		$inout,0,$out		# redundant in aligned case
13717bded2dbSJung-uk Kim	vsel		$inout,$outhead,$inout,$outmask
13727bded2dbSJung-uk Kim	stvx		$inout,0,$out
13737bded2dbSJung-uk Kim
13747bded2dbSJung-uk Kim	mtspr		256,$vrsave
13757bded2dbSJung-uk Kim	blr
13767bded2dbSJung-uk Kim	.long		0
13777bded2dbSJung-uk Kim	.byte		0,12,0x14,0,0,0,6,0
13787bded2dbSJung-uk Kim	.long		0
13797bded2dbSJung-uk Kim___
13807bded2dbSJung-uk Kim#########################################################################
13817bded2dbSJung-uk Kim{{	# Optimized CTR procedure					#
13827bded2dbSJung-uk Kimmy $key_="r11";
13837bded2dbSJung-uk Kimmy ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
13847bded2dbSJung-uk Kim    $x00=0 if ($flavour =~ /osx/);
13857bded2dbSJung-uk Kimmy ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
13867bded2dbSJung-uk Kimmy ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
13877bded2dbSJung-uk Kimmy $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
13887bded2dbSJung-uk Kim			# v26-v31 last 6 round keys
13897bded2dbSJung-uk Kimmy ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
13907bded2dbSJung-uk Kimmy ($two,$three,$four)=($outhead,$outperm,$outmask);
13917bded2dbSJung-uk Kim
13927bded2dbSJung-uk Kim$code.=<<___;
13937bded2dbSJung-uk Kim.align	5
13947bded2dbSJung-uk Kim_aesp8_ctr32_encrypt8x:
13957bded2dbSJung-uk Kim	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
13967bded2dbSJung-uk Kim	li		r10,`$FRAME+8*16+15`
13977bded2dbSJung-uk Kim	li		r11,`$FRAME+8*16+31`
13987bded2dbSJung-uk Kim	stvx		v20,r10,$sp		# ABI says so
13997bded2dbSJung-uk Kim	addi		r10,r10,32
14007bded2dbSJung-uk Kim	stvx		v21,r11,$sp
14017bded2dbSJung-uk Kim	addi		r11,r11,32
14027bded2dbSJung-uk Kim	stvx		v22,r10,$sp
14037bded2dbSJung-uk Kim	addi		r10,r10,32
14047bded2dbSJung-uk Kim	stvx		v23,r11,$sp
14057bded2dbSJung-uk Kim	addi		r11,r11,32
14067bded2dbSJung-uk Kim	stvx		v24,r10,$sp
14077bded2dbSJung-uk Kim	addi		r10,r10,32
14087bded2dbSJung-uk Kim	stvx		v25,r11,$sp
14097bded2dbSJung-uk Kim	addi		r11,r11,32
14107bded2dbSJung-uk Kim	stvx		v26,r10,$sp
14117bded2dbSJung-uk Kim	addi		r10,r10,32
14127bded2dbSJung-uk Kim	stvx		v27,r11,$sp
14137bded2dbSJung-uk Kim	addi		r11,r11,32
14147bded2dbSJung-uk Kim	stvx		v28,r10,$sp
14157bded2dbSJung-uk Kim	addi		r10,r10,32
14167bded2dbSJung-uk Kim	stvx		v29,r11,$sp
14177bded2dbSJung-uk Kim	addi		r11,r11,32
14187bded2dbSJung-uk Kim	stvx		v30,r10,$sp
14197bded2dbSJung-uk Kim	stvx		v31,r11,$sp
14207bded2dbSJung-uk Kim	li		r0,-1
14217bded2dbSJung-uk Kim	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
14227bded2dbSJung-uk Kim	li		$x10,0x10
14237bded2dbSJung-uk Kim	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
14247bded2dbSJung-uk Kim	li		$x20,0x20
14257bded2dbSJung-uk Kim	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
14267bded2dbSJung-uk Kim	li		$x30,0x30
14277bded2dbSJung-uk Kim	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
14287bded2dbSJung-uk Kim	li		$x40,0x40
14297bded2dbSJung-uk Kim	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
14307bded2dbSJung-uk Kim	li		$x50,0x50
14317bded2dbSJung-uk Kim	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
14327bded2dbSJung-uk Kim	li		$x60,0x60
14337bded2dbSJung-uk Kim	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
14347bded2dbSJung-uk Kim	li		$x70,0x70
14357bded2dbSJung-uk Kim	mtspr		256,r0
14367bded2dbSJung-uk Kim
14377bded2dbSJung-uk Kim	subi		$rounds,$rounds,3	# -4 in total
14387bded2dbSJung-uk Kim
14397bded2dbSJung-uk Kim	lvx		$rndkey0,$x00,$key	# load key schedule
14407bded2dbSJung-uk Kim	lvx		v30,$x10,$key
14417bded2dbSJung-uk Kim	addi		$key,$key,0x20
14427bded2dbSJung-uk Kim	lvx		v31,$x00,$key
14437bded2dbSJung-uk Kim	?vperm		$rndkey0,$rndkey0,v30,$keyperm
14447bded2dbSJung-uk Kim	addi		$key_,$sp,$FRAME+15
14457bded2dbSJung-uk Kim	mtctr		$rounds
14467bded2dbSJung-uk Kim
14477bded2dbSJung-uk KimLoad_ctr32_enc_key:
14487bded2dbSJung-uk Kim	?vperm		v24,v30,v31,$keyperm
14497bded2dbSJung-uk Kim	lvx		v30,$x10,$key
14507bded2dbSJung-uk Kim	addi		$key,$key,0x20
14517bded2dbSJung-uk Kim	stvx		v24,$x00,$key_		# off-load round[1]
14527bded2dbSJung-uk Kim	?vperm		v25,v31,v30,$keyperm
14537bded2dbSJung-uk Kim	lvx		v31,$x00,$key
14547bded2dbSJung-uk Kim	stvx		v25,$x10,$key_		# off-load round[2]
14557bded2dbSJung-uk Kim	addi		$key_,$key_,0x20
14567bded2dbSJung-uk Kim	bdnz		Load_ctr32_enc_key
14577bded2dbSJung-uk Kim
14587bded2dbSJung-uk Kim	lvx		v26,$x10,$key
14597bded2dbSJung-uk Kim	?vperm		v24,v30,v31,$keyperm
14607bded2dbSJung-uk Kim	lvx		v27,$x20,$key
14617bded2dbSJung-uk Kim	stvx		v24,$x00,$key_		# off-load round[3]
14627bded2dbSJung-uk Kim	?vperm		v25,v31,v26,$keyperm
14637bded2dbSJung-uk Kim	lvx		v28,$x30,$key
14647bded2dbSJung-uk Kim	stvx		v25,$x10,$key_		# off-load round[4]
14657bded2dbSJung-uk Kim	addi		$key_,$sp,$FRAME+15	# rewind $key_
14667bded2dbSJung-uk Kim	?vperm		v26,v26,v27,$keyperm
14677bded2dbSJung-uk Kim	lvx		v29,$x40,$key
14687bded2dbSJung-uk Kim	?vperm		v27,v27,v28,$keyperm
14697bded2dbSJung-uk Kim	lvx		v30,$x50,$key
14707bded2dbSJung-uk Kim	?vperm		v28,v28,v29,$keyperm
14717bded2dbSJung-uk Kim	lvx		v31,$x60,$key
14727bded2dbSJung-uk Kim	?vperm		v29,v29,v30,$keyperm
14737bded2dbSJung-uk Kim	lvx		$out0,$x70,$key		# borrow $out0
14747bded2dbSJung-uk Kim	?vperm		v30,v30,v31,$keyperm
14757bded2dbSJung-uk Kim	lvx		v24,$x00,$key_		# pre-load round[1]
14767bded2dbSJung-uk Kim	?vperm		v31,v31,$out0,$keyperm
14777bded2dbSJung-uk Kim	lvx		v25,$x10,$key_		# pre-load round[2]
14787bded2dbSJung-uk Kim
14797bded2dbSJung-uk Kim	vadduwm		$two,$one,$one
14807bded2dbSJung-uk Kim	subi		$inp,$inp,15		# undo "caller"
14817bded2dbSJung-uk Kim	$SHL		$len,$len,4
14827bded2dbSJung-uk Kim
14837bded2dbSJung-uk Kim	vadduwm		$out1,$ivec,$one	# counter values ...
14847bded2dbSJung-uk Kim	vadduwm		$out2,$ivec,$two
14857bded2dbSJung-uk Kim	vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
14867bded2dbSJung-uk Kim	 le?li		$idx,8
14877bded2dbSJung-uk Kim	vadduwm		$out3,$out1,$two
14887bded2dbSJung-uk Kim	vxor		$out1,$out1,$rndkey0
14897bded2dbSJung-uk Kim	 le?lvsl	$inpperm,0,$idx
14907bded2dbSJung-uk Kim	vadduwm		$out4,$out2,$two
14917bded2dbSJung-uk Kim	vxor		$out2,$out2,$rndkey0
14927bded2dbSJung-uk Kim	 le?vspltisb	$tmp,0x0f
14937bded2dbSJung-uk Kim	vadduwm		$out5,$out3,$two
14947bded2dbSJung-uk Kim	vxor		$out3,$out3,$rndkey0
14957bded2dbSJung-uk Kim	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
14967bded2dbSJung-uk Kim	vadduwm		$out6,$out4,$two
14977bded2dbSJung-uk Kim	vxor		$out4,$out4,$rndkey0
14987bded2dbSJung-uk Kim	vadduwm		$out7,$out5,$two
14997bded2dbSJung-uk Kim	vxor		$out5,$out5,$rndkey0
15007bded2dbSJung-uk Kim	vadduwm		$ivec,$out6,$two	# next counter value
15017bded2dbSJung-uk Kim	vxor		$out6,$out6,$rndkey0
15027bded2dbSJung-uk Kim	vxor		$out7,$out7,$rndkey0
15037bded2dbSJung-uk Kim
15047bded2dbSJung-uk Kim	mtctr		$rounds
15057bded2dbSJung-uk Kim	b		Loop_ctr32_enc8x
15067bded2dbSJung-uk Kim.align	5
15077bded2dbSJung-uk KimLoop_ctr32_enc8x:
15087bded2dbSJung-uk Kim	vcipher 	$out0,$out0,v24
15097bded2dbSJung-uk Kim	vcipher 	$out1,$out1,v24
15107bded2dbSJung-uk Kim	vcipher 	$out2,$out2,v24
15117bded2dbSJung-uk Kim	vcipher 	$out3,$out3,v24
15127bded2dbSJung-uk Kim	vcipher 	$out4,$out4,v24
15137bded2dbSJung-uk Kim	vcipher 	$out5,$out5,v24
15147bded2dbSJung-uk Kim	vcipher 	$out6,$out6,v24
15157bded2dbSJung-uk Kim	vcipher 	$out7,$out7,v24
15167bded2dbSJung-uk KimLoop_ctr32_enc8x_middle:
15177bded2dbSJung-uk Kim	lvx		v24,$x20,$key_		# round[3]
15187bded2dbSJung-uk Kim	addi		$key_,$key_,0x20
15197bded2dbSJung-uk Kim
15207bded2dbSJung-uk Kim	vcipher 	$out0,$out0,v25
15217bded2dbSJung-uk Kim	vcipher 	$out1,$out1,v25
15227bded2dbSJung-uk Kim	vcipher 	$out2,$out2,v25
15237bded2dbSJung-uk Kim	vcipher 	$out3,$out3,v25
15247bded2dbSJung-uk Kim	vcipher 	$out4,$out4,v25
15257bded2dbSJung-uk Kim	vcipher 	$out5,$out5,v25
15267bded2dbSJung-uk Kim	vcipher 	$out6,$out6,v25
15277bded2dbSJung-uk Kim	vcipher 	$out7,$out7,v25
15287bded2dbSJung-uk Kim	lvx		v25,$x10,$key_		# round[4]
15297bded2dbSJung-uk Kim	bdnz		Loop_ctr32_enc8x
15307bded2dbSJung-uk Kim
15317bded2dbSJung-uk Kim	subic		r11,$len,256		# $len-256, borrow $key_
15327bded2dbSJung-uk Kim	vcipher 	$out0,$out0,v24
15337bded2dbSJung-uk Kim	vcipher 	$out1,$out1,v24
15347bded2dbSJung-uk Kim	vcipher 	$out2,$out2,v24
15357bded2dbSJung-uk Kim	vcipher 	$out3,$out3,v24
15367bded2dbSJung-uk Kim	vcipher 	$out4,$out4,v24
15377bded2dbSJung-uk Kim	vcipher 	$out5,$out5,v24
15387bded2dbSJung-uk Kim	vcipher 	$out6,$out6,v24
15397bded2dbSJung-uk Kim	vcipher 	$out7,$out7,v24
15407bded2dbSJung-uk Kim
15417bded2dbSJung-uk Kim	subfe		r0,r0,r0		# borrow?-1:0
15427bded2dbSJung-uk Kim	vcipher 	$out0,$out0,v25
15437bded2dbSJung-uk Kim	vcipher 	$out1,$out1,v25
15447bded2dbSJung-uk Kim	vcipher 	$out2,$out2,v25
15457bded2dbSJung-uk Kim	vcipher 	$out3,$out3,v25
15467bded2dbSJung-uk Kim	vcipher 	$out4,$out4,v25
15477bded2dbSJung-uk Kim	vcipher		$out5,$out5,v25
15487bded2dbSJung-uk Kim	vcipher		$out6,$out6,v25
15497bded2dbSJung-uk Kim	vcipher		$out7,$out7,v25
15507bded2dbSJung-uk Kim
15517bded2dbSJung-uk Kim	and		r0,r0,r11
15527bded2dbSJung-uk Kim	addi		$key_,$sp,$FRAME+15	# rewind $key_
15537bded2dbSJung-uk Kim	vcipher		$out0,$out0,v26
15547bded2dbSJung-uk Kim	vcipher		$out1,$out1,v26
15557bded2dbSJung-uk Kim	vcipher		$out2,$out2,v26
15567bded2dbSJung-uk Kim	vcipher		$out3,$out3,v26
15577bded2dbSJung-uk Kim	vcipher		$out4,$out4,v26
15587bded2dbSJung-uk Kim	vcipher		$out5,$out5,v26
15597bded2dbSJung-uk Kim	vcipher		$out6,$out6,v26
15607bded2dbSJung-uk Kim	vcipher		$out7,$out7,v26
15617bded2dbSJung-uk Kim	lvx		v24,$x00,$key_		# re-pre-load round[1]
15627bded2dbSJung-uk Kim
15637bded2dbSJung-uk Kim	subic		$len,$len,129		# $len-=129
15647bded2dbSJung-uk Kim	vcipher		$out0,$out0,v27
15657bded2dbSJung-uk Kim	addi		$len,$len,1		# $len-=128 really
15667bded2dbSJung-uk Kim	vcipher		$out1,$out1,v27
15677bded2dbSJung-uk Kim	vcipher		$out2,$out2,v27
15687bded2dbSJung-uk Kim	vcipher		$out3,$out3,v27
15697bded2dbSJung-uk Kim	vcipher		$out4,$out4,v27
15707bded2dbSJung-uk Kim	vcipher		$out5,$out5,v27
15717bded2dbSJung-uk Kim	vcipher		$out6,$out6,v27
15727bded2dbSJung-uk Kim	vcipher		$out7,$out7,v27
15737bded2dbSJung-uk Kim	lvx		v25,$x10,$key_		# re-pre-load round[2]
15747bded2dbSJung-uk Kim
15757bded2dbSJung-uk Kim	vcipher		$out0,$out0,v28
15767bded2dbSJung-uk Kim	 lvx_u		$in0,$x00,$inp		# load input
15777bded2dbSJung-uk Kim	vcipher		$out1,$out1,v28
15787bded2dbSJung-uk Kim	 lvx_u		$in1,$x10,$inp
15797bded2dbSJung-uk Kim	vcipher		$out2,$out2,v28
15807bded2dbSJung-uk Kim	 lvx_u		$in2,$x20,$inp
15817bded2dbSJung-uk Kim	vcipher		$out3,$out3,v28
15827bded2dbSJung-uk Kim	 lvx_u		$in3,$x30,$inp
15837bded2dbSJung-uk Kim	vcipher		$out4,$out4,v28
15847bded2dbSJung-uk Kim	 lvx_u		$in4,$x40,$inp
15857bded2dbSJung-uk Kim	vcipher		$out5,$out5,v28
15867bded2dbSJung-uk Kim	 lvx_u		$in5,$x50,$inp
15877bded2dbSJung-uk Kim	vcipher		$out6,$out6,v28
15887bded2dbSJung-uk Kim	 lvx_u		$in6,$x60,$inp
15897bded2dbSJung-uk Kim	vcipher		$out7,$out7,v28
15907bded2dbSJung-uk Kim	 lvx_u		$in7,$x70,$inp
15917bded2dbSJung-uk Kim	 addi		$inp,$inp,0x80
15927bded2dbSJung-uk Kim
15937bded2dbSJung-uk Kim	vcipher		$out0,$out0,v29
15947bded2dbSJung-uk Kim	 le?vperm	$in0,$in0,$in0,$inpperm
15957bded2dbSJung-uk Kim	vcipher		$out1,$out1,v29
15967bded2dbSJung-uk Kim	 le?vperm	$in1,$in1,$in1,$inpperm
15977bded2dbSJung-uk Kim	vcipher		$out2,$out2,v29
15987bded2dbSJung-uk Kim	 le?vperm	$in2,$in2,$in2,$inpperm
15997bded2dbSJung-uk Kim	vcipher		$out3,$out3,v29
16007bded2dbSJung-uk Kim	 le?vperm	$in3,$in3,$in3,$inpperm
16017bded2dbSJung-uk Kim	vcipher		$out4,$out4,v29
16027bded2dbSJung-uk Kim	 le?vperm	$in4,$in4,$in4,$inpperm
16037bded2dbSJung-uk Kim	vcipher		$out5,$out5,v29
16047bded2dbSJung-uk Kim	 le?vperm	$in5,$in5,$in5,$inpperm
16057bded2dbSJung-uk Kim	vcipher		$out6,$out6,v29
16067bded2dbSJung-uk Kim	 le?vperm	$in6,$in6,$in6,$inpperm
16077bded2dbSJung-uk Kim	vcipher		$out7,$out7,v29
16087bded2dbSJung-uk Kim	 le?vperm	$in7,$in7,$in7,$inpperm
16097bded2dbSJung-uk Kim
16107bded2dbSJung-uk Kim	add		$inp,$inp,r0		# $inp is adjusted in such
16117bded2dbSJung-uk Kim						# way that at exit from the
16127bded2dbSJung-uk Kim						# loop inX-in7 are loaded
16137bded2dbSJung-uk Kim						# with last "words"
16147bded2dbSJung-uk Kim	subfe.		r0,r0,r0		# borrow?-1:0
16157bded2dbSJung-uk Kim	vcipher		$out0,$out0,v30
16167bded2dbSJung-uk Kim	 vxor		$in0,$in0,v31		# xor with last round key
16177bded2dbSJung-uk Kim	vcipher		$out1,$out1,v30
16187bded2dbSJung-uk Kim	 vxor		$in1,$in1,v31
16197bded2dbSJung-uk Kim	vcipher		$out2,$out2,v30
16207bded2dbSJung-uk Kim	 vxor		$in2,$in2,v31
16217bded2dbSJung-uk Kim	vcipher		$out3,$out3,v30
16227bded2dbSJung-uk Kim	 vxor		$in3,$in3,v31
16237bded2dbSJung-uk Kim	vcipher		$out4,$out4,v30
16247bded2dbSJung-uk Kim	 vxor		$in4,$in4,v31
16257bded2dbSJung-uk Kim	vcipher		$out5,$out5,v30
16267bded2dbSJung-uk Kim	 vxor		$in5,$in5,v31
16277bded2dbSJung-uk Kim	vcipher		$out6,$out6,v30
16287bded2dbSJung-uk Kim	 vxor		$in6,$in6,v31
16297bded2dbSJung-uk Kim	vcipher		$out7,$out7,v30
16307bded2dbSJung-uk Kim	 vxor		$in7,$in7,v31
16317bded2dbSJung-uk Kim
16327bded2dbSJung-uk Kim	bne		Lctr32_enc8x_break	# did $len-129 borrow?
16337bded2dbSJung-uk Kim
16347bded2dbSJung-uk Kim	vcipherlast	$in0,$out0,$in0
16357bded2dbSJung-uk Kim	vcipherlast	$in1,$out1,$in1
16367bded2dbSJung-uk Kim	 vadduwm	$out1,$ivec,$one	# counter values ...
16377bded2dbSJung-uk Kim	vcipherlast	$in2,$out2,$in2
16387bded2dbSJung-uk Kim	 vadduwm	$out2,$ivec,$two
16397bded2dbSJung-uk Kim	 vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
16407bded2dbSJung-uk Kim	vcipherlast	$in3,$out3,$in3
16417bded2dbSJung-uk Kim	 vadduwm	$out3,$out1,$two
16427bded2dbSJung-uk Kim	 vxor		$out1,$out1,$rndkey0
16437bded2dbSJung-uk Kim	vcipherlast	$in4,$out4,$in4
16447bded2dbSJung-uk Kim	 vadduwm	$out4,$out2,$two
16457bded2dbSJung-uk Kim	 vxor		$out2,$out2,$rndkey0
16467bded2dbSJung-uk Kim	vcipherlast	$in5,$out5,$in5
16477bded2dbSJung-uk Kim	 vadduwm	$out5,$out3,$two
16487bded2dbSJung-uk Kim	 vxor		$out3,$out3,$rndkey0
16497bded2dbSJung-uk Kim	vcipherlast	$in6,$out6,$in6
16507bded2dbSJung-uk Kim	 vadduwm	$out6,$out4,$two
16517bded2dbSJung-uk Kim	 vxor		$out4,$out4,$rndkey0
16527bded2dbSJung-uk Kim	vcipherlast	$in7,$out7,$in7
16537bded2dbSJung-uk Kim	 vadduwm	$out7,$out5,$two
16547bded2dbSJung-uk Kim	 vxor		$out5,$out5,$rndkey0
16557bded2dbSJung-uk Kim	le?vperm	$in0,$in0,$in0,$inpperm
16567bded2dbSJung-uk Kim	 vadduwm	$ivec,$out6,$two	# next counter value
16577bded2dbSJung-uk Kim	 vxor		$out6,$out6,$rndkey0
16587bded2dbSJung-uk Kim	le?vperm	$in1,$in1,$in1,$inpperm
16597bded2dbSJung-uk Kim	 vxor		$out7,$out7,$rndkey0
16607bded2dbSJung-uk Kim	mtctr		$rounds
16617bded2dbSJung-uk Kim
16627bded2dbSJung-uk Kim	 vcipher	$out0,$out0,v24
16637bded2dbSJung-uk Kim	stvx_u		$in0,$x00,$out
16647bded2dbSJung-uk Kim	le?vperm	$in2,$in2,$in2,$inpperm
16657bded2dbSJung-uk Kim	 vcipher	$out1,$out1,v24
16667bded2dbSJung-uk Kim	stvx_u		$in1,$x10,$out
16677bded2dbSJung-uk Kim	le?vperm	$in3,$in3,$in3,$inpperm
16687bded2dbSJung-uk Kim	 vcipher	$out2,$out2,v24
16697bded2dbSJung-uk Kim	stvx_u		$in2,$x20,$out
16707bded2dbSJung-uk Kim	le?vperm	$in4,$in4,$in4,$inpperm
16717bded2dbSJung-uk Kim	 vcipher	$out3,$out3,v24
16727bded2dbSJung-uk Kim	stvx_u		$in3,$x30,$out
16737bded2dbSJung-uk Kim	le?vperm	$in5,$in5,$in5,$inpperm
16747bded2dbSJung-uk Kim	 vcipher	$out4,$out4,v24
16757bded2dbSJung-uk Kim	stvx_u		$in4,$x40,$out
16767bded2dbSJung-uk Kim	le?vperm	$in6,$in6,$in6,$inpperm
16777bded2dbSJung-uk Kim	 vcipher	$out5,$out5,v24
16787bded2dbSJung-uk Kim	stvx_u		$in5,$x50,$out
16797bded2dbSJung-uk Kim	le?vperm	$in7,$in7,$in7,$inpperm
16807bded2dbSJung-uk Kim	 vcipher	$out6,$out6,v24
16817bded2dbSJung-uk Kim	stvx_u		$in6,$x60,$out
16827bded2dbSJung-uk Kim	 vcipher	$out7,$out7,v24
16837bded2dbSJung-uk Kim	stvx_u		$in7,$x70,$out
16847bded2dbSJung-uk Kim	addi		$out,$out,0x80
16857bded2dbSJung-uk Kim
16867bded2dbSJung-uk Kim	b		Loop_ctr32_enc8x_middle
16877bded2dbSJung-uk Kim
16887bded2dbSJung-uk Kim.align	5
16897bded2dbSJung-uk KimLctr32_enc8x_break:
16907bded2dbSJung-uk Kim	cmpwi		$len,-0x60
16917bded2dbSJung-uk Kim	blt		Lctr32_enc8x_one
16927bded2dbSJung-uk Kim	nop
16937bded2dbSJung-uk Kim	beq		Lctr32_enc8x_two
16947bded2dbSJung-uk Kim	cmpwi		$len,-0x40
16957bded2dbSJung-uk Kim	blt		Lctr32_enc8x_three
16967bded2dbSJung-uk Kim	nop
16977bded2dbSJung-uk Kim	beq		Lctr32_enc8x_four
16987bded2dbSJung-uk Kim	cmpwi		$len,-0x20
16997bded2dbSJung-uk Kim	blt		Lctr32_enc8x_five
17007bded2dbSJung-uk Kim	nop
17017bded2dbSJung-uk Kim	beq		Lctr32_enc8x_six
17027bded2dbSJung-uk Kim	cmpwi		$len,0x00
17037bded2dbSJung-uk Kim	blt		Lctr32_enc8x_seven
17047bded2dbSJung-uk Kim
17057bded2dbSJung-uk KimLctr32_enc8x_eight:
17067bded2dbSJung-uk Kim	vcipherlast	$out0,$out0,$in0
17077bded2dbSJung-uk Kim	vcipherlast	$out1,$out1,$in1
17087bded2dbSJung-uk Kim	vcipherlast	$out2,$out2,$in2
17097bded2dbSJung-uk Kim	vcipherlast	$out3,$out3,$in3
17107bded2dbSJung-uk Kim	vcipherlast	$out4,$out4,$in4
17117bded2dbSJung-uk Kim	vcipherlast	$out5,$out5,$in5
17127bded2dbSJung-uk Kim	vcipherlast	$out6,$out6,$in6
17137bded2dbSJung-uk Kim	vcipherlast	$out7,$out7,$in7
17147bded2dbSJung-uk Kim
17157bded2dbSJung-uk Kim	le?vperm	$out0,$out0,$out0,$inpperm
17167bded2dbSJung-uk Kim	le?vperm	$out1,$out1,$out1,$inpperm
17177bded2dbSJung-uk Kim	stvx_u		$out0,$x00,$out
17187bded2dbSJung-uk Kim	le?vperm	$out2,$out2,$out2,$inpperm
17197bded2dbSJung-uk Kim	stvx_u		$out1,$x10,$out
17207bded2dbSJung-uk Kim	le?vperm	$out3,$out3,$out3,$inpperm
17217bded2dbSJung-uk Kim	stvx_u		$out2,$x20,$out
17227bded2dbSJung-uk Kim	le?vperm	$out4,$out4,$out4,$inpperm
17237bded2dbSJung-uk Kim	stvx_u		$out3,$x30,$out
17247bded2dbSJung-uk Kim	le?vperm	$out5,$out5,$out5,$inpperm
17257bded2dbSJung-uk Kim	stvx_u		$out4,$x40,$out
17267bded2dbSJung-uk Kim	le?vperm	$out6,$out6,$out6,$inpperm
17277bded2dbSJung-uk Kim	stvx_u		$out5,$x50,$out
17287bded2dbSJung-uk Kim	le?vperm	$out7,$out7,$out7,$inpperm
17297bded2dbSJung-uk Kim	stvx_u		$out6,$x60,$out
17307bded2dbSJung-uk Kim	stvx_u		$out7,$x70,$out
17317bded2dbSJung-uk Kim	addi		$out,$out,0x80
17327bded2dbSJung-uk Kim	b		Lctr32_enc8x_done
17337bded2dbSJung-uk Kim
17347bded2dbSJung-uk Kim.align	5
17357bded2dbSJung-uk KimLctr32_enc8x_seven:
17367bded2dbSJung-uk Kim	vcipherlast	$out0,$out0,$in1
17377bded2dbSJung-uk Kim	vcipherlast	$out1,$out1,$in2
17387bded2dbSJung-uk Kim	vcipherlast	$out2,$out2,$in3
17397bded2dbSJung-uk Kim	vcipherlast	$out3,$out3,$in4
17407bded2dbSJung-uk Kim	vcipherlast	$out4,$out4,$in5
17417bded2dbSJung-uk Kim	vcipherlast	$out5,$out5,$in6
17427bded2dbSJung-uk Kim	vcipherlast	$out6,$out6,$in7
17437bded2dbSJung-uk Kim
17447bded2dbSJung-uk Kim	le?vperm	$out0,$out0,$out0,$inpperm
17457bded2dbSJung-uk Kim	le?vperm	$out1,$out1,$out1,$inpperm
17467bded2dbSJung-uk Kim	stvx_u		$out0,$x00,$out
17477bded2dbSJung-uk Kim	le?vperm	$out2,$out2,$out2,$inpperm
17487bded2dbSJung-uk Kim	stvx_u		$out1,$x10,$out
17497bded2dbSJung-uk Kim	le?vperm	$out3,$out3,$out3,$inpperm
17507bded2dbSJung-uk Kim	stvx_u		$out2,$x20,$out
17517bded2dbSJung-uk Kim	le?vperm	$out4,$out4,$out4,$inpperm
17527bded2dbSJung-uk Kim	stvx_u		$out3,$x30,$out
17537bded2dbSJung-uk Kim	le?vperm	$out5,$out5,$out5,$inpperm
17547bded2dbSJung-uk Kim	stvx_u		$out4,$x40,$out
17557bded2dbSJung-uk Kim	le?vperm	$out6,$out6,$out6,$inpperm
17567bded2dbSJung-uk Kim	stvx_u		$out5,$x50,$out
17577bded2dbSJung-uk Kim	stvx_u		$out6,$x60,$out
17587bded2dbSJung-uk Kim	addi		$out,$out,0x70
17597bded2dbSJung-uk Kim	b		Lctr32_enc8x_done
17607bded2dbSJung-uk Kim
17617bded2dbSJung-uk Kim.align	5
17627bded2dbSJung-uk KimLctr32_enc8x_six:
17637bded2dbSJung-uk Kim	vcipherlast	$out0,$out0,$in2
17647bded2dbSJung-uk Kim	vcipherlast	$out1,$out1,$in3
17657bded2dbSJung-uk Kim	vcipherlast	$out2,$out2,$in4
17667bded2dbSJung-uk Kim	vcipherlast	$out3,$out3,$in5
17677bded2dbSJung-uk Kim	vcipherlast	$out4,$out4,$in6
17687bded2dbSJung-uk Kim	vcipherlast	$out5,$out5,$in7
17697bded2dbSJung-uk Kim
17707bded2dbSJung-uk Kim	le?vperm	$out0,$out0,$out0,$inpperm
17717bded2dbSJung-uk Kim	le?vperm	$out1,$out1,$out1,$inpperm
17727bded2dbSJung-uk Kim	stvx_u		$out0,$x00,$out
17737bded2dbSJung-uk Kim	le?vperm	$out2,$out2,$out2,$inpperm
17747bded2dbSJung-uk Kim	stvx_u		$out1,$x10,$out
17757bded2dbSJung-uk Kim	le?vperm	$out3,$out3,$out3,$inpperm
17767bded2dbSJung-uk Kim	stvx_u		$out2,$x20,$out
17777bded2dbSJung-uk Kim	le?vperm	$out4,$out4,$out4,$inpperm
17787bded2dbSJung-uk Kim	stvx_u		$out3,$x30,$out
17797bded2dbSJung-uk Kim	le?vperm	$out5,$out5,$out5,$inpperm
17807bded2dbSJung-uk Kim	stvx_u		$out4,$x40,$out
17817bded2dbSJung-uk Kim	stvx_u		$out5,$x50,$out
17827bded2dbSJung-uk Kim	addi		$out,$out,0x60
17837bded2dbSJung-uk Kim	b		Lctr32_enc8x_done
17847bded2dbSJung-uk Kim
17857bded2dbSJung-uk Kim.align	5
17867bded2dbSJung-uk KimLctr32_enc8x_five:
17877bded2dbSJung-uk Kim	vcipherlast	$out0,$out0,$in3
17887bded2dbSJung-uk Kim	vcipherlast	$out1,$out1,$in4
17897bded2dbSJung-uk Kim	vcipherlast	$out2,$out2,$in5
17907bded2dbSJung-uk Kim	vcipherlast	$out3,$out3,$in6
17917bded2dbSJung-uk Kim	vcipherlast	$out4,$out4,$in7
17927bded2dbSJung-uk Kim
17937bded2dbSJung-uk Kim	le?vperm	$out0,$out0,$out0,$inpperm
17947bded2dbSJung-uk Kim	le?vperm	$out1,$out1,$out1,$inpperm
17957bded2dbSJung-uk Kim	stvx_u		$out0,$x00,$out
17967bded2dbSJung-uk Kim	le?vperm	$out2,$out2,$out2,$inpperm
17977bded2dbSJung-uk Kim	stvx_u		$out1,$x10,$out
17987bded2dbSJung-uk Kim	le?vperm	$out3,$out3,$out3,$inpperm
17997bded2dbSJung-uk Kim	stvx_u		$out2,$x20,$out
18007bded2dbSJung-uk Kim	le?vperm	$out4,$out4,$out4,$inpperm
18017bded2dbSJung-uk Kim	stvx_u		$out3,$x30,$out
18027bded2dbSJung-uk Kim	stvx_u		$out4,$x40,$out
18037bded2dbSJung-uk Kim	addi		$out,$out,0x50
18047bded2dbSJung-uk Kim	b		Lctr32_enc8x_done
18057bded2dbSJung-uk Kim
18067bded2dbSJung-uk Kim.align	5
18077bded2dbSJung-uk KimLctr32_enc8x_four:
18087bded2dbSJung-uk Kim	vcipherlast	$out0,$out0,$in4
18097bded2dbSJung-uk Kim	vcipherlast	$out1,$out1,$in5
18107bded2dbSJung-uk Kim	vcipherlast	$out2,$out2,$in6
18117bded2dbSJung-uk Kim	vcipherlast	$out3,$out3,$in7
18127bded2dbSJung-uk Kim
18137bded2dbSJung-uk Kim	le?vperm	$out0,$out0,$out0,$inpperm
18147bded2dbSJung-uk Kim	le?vperm	$out1,$out1,$out1,$inpperm
18157bded2dbSJung-uk Kim	stvx_u		$out0,$x00,$out
18167bded2dbSJung-uk Kim	le?vperm	$out2,$out2,$out2,$inpperm
18177bded2dbSJung-uk Kim	stvx_u		$out1,$x10,$out
18187bded2dbSJung-uk Kim	le?vperm	$out3,$out3,$out3,$inpperm
18197bded2dbSJung-uk Kim	stvx_u		$out2,$x20,$out
18207bded2dbSJung-uk Kim	stvx_u		$out3,$x30,$out
18217bded2dbSJung-uk Kim	addi		$out,$out,0x40
18227bded2dbSJung-uk Kim	b		Lctr32_enc8x_done
18237bded2dbSJung-uk Kim
18247bded2dbSJung-uk Kim.align	5
18257bded2dbSJung-uk KimLctr32_enc8x_three:
18267bded2dbSJung-uk Kim	vcipherlast	$out0,$out0,$in5
18277bded2dbSJung-uk Kim	vcipherlast	$out1,$out1,$in6
18287bded2dbSJung-uk Kim	vcipherlast	$out2,$out2,$in7
18297bded2dbSJung-uk Kim
18307bded2dbSJung-uk Kim	le?vperm	$out0,$out0,$out0,$inpperm
18317bded2dbSJung-uk Kim	le?vperm	$out1,$out1,$out1,$inpperm
18327bded2dbSJung-uk Kim	stvx_u		$out0,$x00,$out
18337bded2dbSJung-uk Kim	le?vperm	$out2,$out2,$out2,$inpperm
18347bded2dbSJung-uk Kim	stvx_u		$out1,$x10,$out
18357bded2dbSJung-uk Kim	stvx_u		$out2,$x20,$out
18367bded2dbSJung-uk Kim	addi		$out,$out,0x30
1837610a21fdSJung-uk Kim	b		Lctr32_enc8x_done
18387bded2dbSJung-uk Kim
18397bded2dbSJung-uk Kim.align	5
18407bded2dbSJung-uk KimLctr32_enc8x_two:
18417bded2dbSJung-uk Kim	vcipherlast	$out0,$out0,$in6
18427bded2dbSJung-uk Kim	vcipherlast	$out1,$out1,$in7
18437bded2dbSJung-uk Kim
18447bded2dbSJung-uk Kim	le?vperm	$out0,$out0,$out0,$inpperm
18457bded2dbSJung-uk Kim	le?vperm	$out1,$out1,$out1,$inpperm
18467bded2dbSJung-uk Kim	stvx_u		$out0,$x00,$out
18477bded2dbSJung-uk Kim	stvx_u		$out1,$x10,$out
18487bded2dbSJung-uk Kim	addi		$out,$out,0x20
1849610a21fdSJung-uk Kim	b		Lctr32_enc8x_done
18507bded2dbSJung-uk Kim
18517bded2dbSJung-uk Kim.align	5
18527bded2dbSJung-uk KimLctr32_enc8x_one:
18537bded2dbSJung-uk Kim	vcipherlast	$out0,$out0,$in7
18547bded2dbSJung-uk Kim
18557bded2dbSJung-uk Kim	le?vperm	$out0,$out0,$out0,$inpperm
18567bded2dbSJung-uk Kim	stvx_u		$out0,0,$out
18577bded2dbSJung-uk Kim	addi		$out,$out,0x10
18587bded2dbSJung-uk Kim
18597bded2dbSJung-uk KimLctr32_enc8x_done:
18607bded2dbSJung-uk Kim	li		r10,`$FRAME+15`
18617bded2dbSJung-uk Kim	li		r11,`$FRAME+31`
18627bded2dbSJung-uk Kim	stvx		$inpperm,r10,$sp	# wipe copies of round keys
18637bded2dbSJung-uk Kim	addi		r10,r10,32
18647bded2dbSJung-uk Kim	stvx		$inpperm,r11,$sp
18657bded2dbSJung-uk Kim	addi		r11,r11,32
18667bded2dbSJung-uk Kim	stvx		$inpperm,r10,$sp
18677bded2dbSJung-uk Kim	addi		r10,r10,32
18687bded2dbSJung-uk Kim	stvx		$inpperm,r11,$sp
18697bded2dbSJung-uk Kim	addi		r11,r11,32
18707bded2dbSJung-uk Kim	stvx		$inpperm,r10,$sp
18717bded2dbSJung-uk Kim	addi		r10,r10,32
18727bded2dbSJung-uk Kim	stvx		$inpperm,r11,$sp
18737bded2dbSJung-uk Kim	addi		r11,r11,32
18747bded2dbSJung-uk Kim	stvx		$inpperm,r10,$sp
18757bded2dbSJung-uk Kim	addi		r10,r10,32
18767bded2dbSJung-uk Kim	stvx		$inpperm,r11,$sp
18777bded2dbSJung-uk Kim	addi		r11,r11,32
18787bded2dbSJung-uk Kim
18797bded2dbSJung-uk Kim	mtspr		256,$vrsave
18807bded2dbSJung-uk Kim	lvx		v20,r10,$sp		# ABI says so
18817bded2dbSJung-uk Kim	addi		r10,r10,32
18827bded2dbSJung-uk Kim	lvx		v21,r11,$sp
18837bded2dbSJung-uk Kim	addi		r11,r11,32
18847bded2dbSJung-uk Kim	lvx		v22,r10,$sp
18857bded2dbSJung-uk Kim	addi		r10,r10,32
18867bded2dbSJung-uk Kim	lvx		v23,r11,$sp
18877bded2dbSJung-uk Kim	addi		r11,r11,32
18887bded2dbSJung-uk Kim	lvx		v24,r10,$sp
18897bded2dbSJung-uk Kim	addi		r10,r10,32
18907bded2dbSJung-uk Kim	lvx		v25,r11,$sp
18917bded2dbSJung-uk Kim	addi		r11,r11,32
18927bded2dbSJung-uk Kim	lvx		v26,r10,$sp
18937bded2dbSJung-uk Kim	addi		r10,r10,32
18947bded2dbSJung-uk Kim	lvx		v27,r11,$sp
18957bded2dbSJung-uk Kim	addi		r11,r11,32
18967bded2dbSJung-uk Kim	lvx		v28,r10,$sp
18977bded2dbSJung-uk Kim	addi		r10,r10,32
18987bded2dbSJung-uk Kim	lvx		v29,r11,$sp
18997bded2dbSJung-uk Kim	addi		r11,r11,32
19007bded2dbSJung-uk Kim	lvx		v30,r10,$sp
19017bded2dbSJung-uk Kim	lvx		v31,r11,$sp
19027bded2dbSJung-uk Kim	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
19037bded2dbSJung-uk Kim	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
19047bded2dbSJung-uk Kim	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
19057bded2dbSJung-uk Kim	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
19067bded2dbSJung-uk Kim	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
19077bded2dbSJung-uk Kim	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
19087bded2dbSJung-uk Kim	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
19097bded2dbSJung-uk Kim	blr
19107bded2dbSJung-uk Kim	.long		0
19117bded2dbSJung-uk Kim	.byte		0,12,0x04,0,0x80,6,6,0
19127bded2dbSJung-uk Kim	.long		0
19137bded2dbSJung-uk Kim.size	.${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
19147bded2dbSJung-uk Kim___
19157bded2dbSJung-uk Kim}}	}}}
19167bded2dbSJung-uk Kim
1917e71b7053SJung-uk Kim#########################################################################
1918e71b7053SJung-uk Kim{{{	# XTS procedures						#
1919e71b7053SJung-uk Kim# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,	#
1920e71b7053SJung-uk Kim#                             const AES_KEY *key1, const AES_KEY *key2,	#
1921e71b7053SJung-uk Kim#                             [const] unsigned char iv[16]);		#
1922e71b7053SJung-uk Kim# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which	#
1923e71b7053SJung-uk Kim# input tweak value is assumed to be encrypted already, and last tweak	#
1924e71b7053SJung-uk Kim# value, one suitable for consecutive call on same chunk of data, is	#
1925e71b7053SJung-uk Kim# written back to original buffer. In addition, in "tweak chaining"	#
1926e71b7053SJung-uk Kim# mode only complete input blocks are processed.			#
1927e71b7053SJung-uk Kim
1928e71b7053SJung-uk Kimmy ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =	map("r$_",(3..10));
1929e71b7053SJung-uk Kimmy ($rndkey0,$rndkey1,$inout) =				map("v$_",(0..2));
1930e71b7053SJung-uk Kimmy ($output,$inptail,$inpperm,$leperm,$keyperm) =	map("v$_",(3..7));
1931e71b7053SJung-uk Kimmy ($tweak,$seven,$eighty7,$tmp,$tweak1) =		map("v$_",(8..12));
1932e71b7053SJung-uk Kimmy $taillen = $key2;
1933e71b7053SJung-uk Kim
1934e71b7053SJung-uk Kim   ($inp,$idx) = ($idx,$inp);				# reassign
1935e71b7053SJung-uk Kim
1936e71b7053SJung-uk Kim$code.=<<___;
1937e71b7053SJung-uk Kim.globl	.${prefix}_xts_encrypt
1938e71b7053SJung-uk Kim.align	5
1939e71b7053SJung-uk Kim.${prefix}_xts_encrypt:
1940e71b7053SJung-uk Kim	mr		$inp,r3				# reassign
1941e71b7053SJung-uk Kim	li		r3,-1
1942e71b7053SJung-uk Kim	${UCMP}i	$len,16
1943e71b7053SJung-uk Kim	bltlr-
1944e71b7053SJung-uk Kim
1945e71b7053SJung-uk Kim	lis		r0,0xfff0
1946e71b7053SJung-uk Kim	mfspr		r12,256				# save vrsave
1947e71b7053SJung-uk Kim	li		r11,0
1948e71b7053SJung-uk Kim	mtspr		256,r0
1949e71b7053SJung-uk Kim
1950e71b7053SJung-uk Kim	vspltisb	$seven,0x07			# 0x070707..07
1951e71b7053SJung-uk Kim	le?lvsl		$leperm,r11,r11
1952e71b7053SJung-uk Kim	le?vspltisb	$tmp,0x0f
1953e71b7053SJung-uk Kim	le?vxor		$leperm,$leperm,$seven
1954e71b7053SJung-uk Kim
1955e71b7053SJung-uk Kim	li		$idx,15
1956e71b7053SJung-uk Kim	lvx		$tweak,0,$ivp			# load [unaligned] iv
1957e71b7053SJung-uk Kim	lvsl		$inpperm,0,$ivp
1958e71b7053SJung-uk Kim	lvx		$inptail,$idx,$ivp
1959e71b7053SJung-uk Kim	le?vxor		$inpperm,$inpperm,$tmp
1960e71b7053SJung-uk Kim	vperm		$tweak,$tweak,$inptail,$inpperm
1961e71b7053SJung-uk Kim
1962e71b7053SJung-uk Kim	neg		r11,$inp
1963e71b7053SJung-uk Kim	lvsr		$inpperm,0,r11			# prepare for unaligned load
1964e71b7053SJung-uk Kim	lvx		$inout,0,$inp
1965e71b7053SJung-uk Kim	addi		$inp,$inp,15			# 15 is not typo
1966e71b7053SJung-uk Kim	le?vxor		$inpperm,$inpperm,$tmp
1967e71b7053SJung-uk Kim
1968e71b7053SJung-uk Kim	${UCMP}i	$key2,0				# key2==NULL?
1969e71b7053SJung-uk Kim	beq		Lxts_enc_no_key2
1970e71b7053SJung-uk Kim
1971e71b7053SJung-uk Kim	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
1972e71b7053SJung-uk Kim	lwz		$rounds,240($key2)
1973e71b7053SJung-uk Kim	srwi		$rounds,$rounds,1
1974e71b7053SJung-uk Kim	subi		$rounds,$rounds,1
1975e71b7053SJung-uk Kim	li		$idx,16
1976e71b7053SJung-uk Kim
1977e71b7053SJung-uk Kim	lvx		$rndkey0,0,$key2
1978e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key2
1979e71b7053SJung-uk Kim	addi		$idx,$idx,16
1980e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
1981e71b7053SJung-uk Kim	vxor		$tweak,$tweak,$rndkey0
1982e71b7053SJung-uk Kim	lvx		$rndkey0,$idx,$key2
1983e71b7053SJung-uk Kim	addi		$idx,$idx,16
1984e71b7053SJung-uk Kim	mtctr		$rounds
1985e71b7053SJung-uk Kim
1986e71b7053SJung-uk KimLtweak_xts_enc:
1987e71b7053SJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
1988e71b7053SJung-uk Kim	vcipher		$tweak,$tweak,$rndkey1
1989e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key2
1990e71b7053SJung-uk Kim	addi		$idx,$idx,16
1991e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
1992e71b7053SJung-uk Kim	vcipher		$tweak,$tweak,$rndkey0
1993e71b7053SJung-uk Kim	lvx		$rndkey0,$idx,$key2
1994e71b7053SJung-uk Kim	addi		$idx,$idx,16
1995e71b7053SJung-uk Kim	bdnz		Ltweak_xts_enc
1996e71b7053SJung-uk Kim
1997e71b7053SJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
1998e71b7053SJung-uk Kim	vcipher		$tweak,$tweak,$rndkey1
1999e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key2
2000e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
2001e71b7053SJung-uk Kim	vcipherlast	$tweak,$tweak,$rndkey0
2002e71b7053SJung-uk Kim
2003e71b7053SJung-uk Kim	li		$ivp,0				# don't chain the tweak
2004e71b7053SJung-uk Kim	b		Lxts_enc
2005e71b7053SJung-uk Kim
2006e71b7053SJung-uk KimLxts_enc_no_key2:
2007e71b7053SJung-uk Kim	li		$idx,-16
2008e71b7053SJung-uk Kim	and		$len,$len,$idx			# in "tweak chaining"
2009e71b7053SJung-uk Kim							# mode only complete
2010e71b7053SJung-uk Kim							# blocks are processed
2011e71b7053SJung-uk KimLxts_enc:
2012e71b7053SJung-uk Kim	lvx		$inptail,0,$inp
2013e71b7053SJung-uk Kim	addi		$inp,$inp,16
2014e71b7053SJung-uk Kim
2015e71b7053SJung-uk Kim	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
2016e71b7053SJung-uk Kim	lwz		$rounds,240($key1)
2017e71b7053SJung-uk Kim	srwi		$rounds,$rounds,1
2018e71b7053SJung-uk Kim	subi		$rounds,$rounds,1
2019e71b7053SJung-uk Kim	li		$idx,16
2020e71b7053SJung-uk Kim
2021e71b7053SJung-uk Kim	vslb		$eighty7,$seven,$seven		# 0x808080..80
2022e71b7053SJung-uk Kim	vor		$eighty7,$eighty7,$seven	# 0x878787..87
2023e71b7053SJung-uk Kim	vspltisb	$tmp,1				# 0x010101..01
2024e71b7053SJung-uk Kim	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
2025e71b7053SJung-uk Kim
2026e71b7053SJung-uk Kim	${UCMP}i	$len,96
2027e71b7053SJung-uk Kim	bge		_aesp8_xts_encrypt6x
2028e71b7053SJung-uk Kim
2029e71b7053SJung-uk Kim	andi.		$taillen,$len,15
2030e71b7053SJung-uk Kim	subic		r0,$len,32
2031e71b7053SJung-uk Kim	subi		$taillen,$taillen,16
2032e71b7053SJung-uk Kim	subfe		r0,r0,r0
2033e71b7053SJung-uk Kim	and		r0,r0,$taillen
2034e71b7053SJung-uk Kim	add		$inp,$inp,r0
2035e71b7053SJung-uk Kim
2036e71b7053SJung-uk Kim	lvx		$rndkey0,0,$key1
2037e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key1
2038e71b7053SJung-uk Kim	addi		$idx,$idx,16
2039e71b7053SJung-uk Kim	vperm		$inout,$inout,$inptail,$inpperm
2040e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
2041e71b7053SJung-uk Kim	vxor		$inout,$inout,$tweak
2042e71b7053SJung-uk Kim	vxor		$inout,$inout,$rndkey0
2043e71b7053SJung-uk Kim	lvx		$rndkey0,$idx,$key1
2044e71b7053SJung-uk Kim	addi		$idx,$idx,16
2045e71b7053SJung-uk Kim	mtctr		$rounds
2046e71b7053SJung-uk Kim	b		Loop_xts_enc
2047e71b7053SJung-uk Kim
2048e71b7053SJung-uk Kim.align	5
2049e71b7053SJung-uk KimLoop_xts_enc:
2050e71b7053SJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
2051e71b7053SJung-uk Kim	vcipher		$inout,$inout,$rndkey1
2052e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key1
2053e71b7053SJung-uk Kim	addi		$idx,$idx,16
2054e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
2055e71b7053SJung-uk Kim	vcipher		$inout,$inout,$rndkey0
2056e71b7053SJung-uk Kim	lvx		$rndkey0,$idx,$key1
2057e71b7053SJung-uk Kim	addi		$idx,$idx,16
2058e71b7053SJung-uk Kim	bdnz		Loop_xts_enc
2059e71b7053SJung-uk Kim
2060e71b7053SJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
2061e71b7053SJung-uk Kim	vcipher		$inout,$inout,$rndkey1
2062e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key1
2063e71b7053SJung-uk Kim	li		$idx,16
2064e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
2065e71b7053SJung-uk Kim	vxor		$rndkey0,$rndkey0,$tweak
2066e71b7053SJung-uk Kim	vcipherlast	$output,$inout,$rndkey0
2067e71b7053SJung-uk Kim
2068e71b7053SJung-uk Kim	le?vperm	$tmp,$output,$output,$leperm
2069e71b7053SJung-uk Kim	be?nop
2070e71b7053SJung-uk Kim	le?stvx_u	$tmp,0,$out
2071e71b7053SJung-uk Kim	be?stvx_u	$output,0,$out
2072e71b7053SJung-uk Kim	addi		$out,$out,16
2073e71b7053SJung-uk Kim
2074e71b7053SJung-uk Kim	subic.		$len,$len,16
2075e71b7053SJung-uk Kim	beq		Lxts_enc_done
2076e71b7053SJung-uk Kim
2077e71b7053SJung-uk Kim	vmr		$inout,$inptail
2078e71b7053SJung-uk Kim	lvx		$inptail,0,$inp
2079e71b7053SJung-uk Kim	addi		$inp,$inp,16
2080e71b7053SJung-uk Kim	lvx		$rndkey0,0,$key1
2081e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key1
2082e71b7053SJung-uk Kim	addi		$idx,$idx,16
2083e71b7053SJung-uk Kim
2084e71b7053SJung-uk Kim	subic		r0,$len,32
2085e71b7053SJung-uk Kim	subfe		r0,r0,r0
2086e71b7053SJung-uk Kim	and		r0,r0,$taillen
2087e71b7053SJung-uk Kim	add		$inp,$inp,r0
2088e71b7053SJung-uk Kim
2089e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven		# next tweak value
2090e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
2091e71b7053SJung-uk Kim	vsldoi		$tmp,$tmp,$tmp,15
2092e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
2093e71b7053SJung-uk Kim	vxor		$tweak,$tweak,$tmp
2094e71b7053SJung-uk Kim
2095e71b7053SJung-uk Kim	vperm		$inout,$inout,$inptail,$inpperm
2096e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
2097e71b7053SJung-uk Kim	vxor		$inout,$inout,$tweak
2098e71b7053SJung-uk Kim	vxor		$output,$output,$rndkey0	# just in case $len<16
2099e71b7053SJung-uk Kim	vxor		$inout,$inout,$rndkey0
2100e71b7053SJung-uk Kim	lvx		$rndkey0,$idx,$key1
2101e71b7053SJung-uk Kim	addi		$idx,$idx,16
2102e71b7053SJung-uk Kim
2103e71b7053SJung-uk Kim	mtctr		$rounds
2104e71b7053SJung-uk Kim	${UCMP}i	$len,16
2105e71b7053SJung-uk Kim	bge		Loop_xts_enc
2106e71b7053SJung-uk Kim
2107e71b7053SJung-uk Kim	vxor		$output,$output,$tweak
2108e71b7053SJung-uk Kim	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
2109e71b7053SJung-uk Kim	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
2110e71b7053SJung-uk Kim	vspltisb	$tmp,-1
2111e71b7053SJung-uk Kim	vperm		$inptail,$inptail,$tmp,$inpperm
2112e71b7053SJung-uk Kim	vsel		$inout,$inout,$output,$inptail
2113e71b7053SJung-uk Kim
2114e71b7053SJung-uk Kim	subi		r11,$out,17
2115e71b7053SJung-uk Kim	subi		$out,$out,16
2116e71b7053SJung-uk Kim	mtctr		$len
2117e71b7053SJung-uk Kim	li		$len,16
2118e71b7053SJung-uk KimLoop_xts_enc_steal:
2119e71b7053SJung-uk Kim	lbzu		r0,1(r11)
2120e71b7053SJung-uk Kim	stb		r0,16(r11)
2121e71b7053SJung-uk Kim	bdnz		Loop_xts_enc_steal
2122e71b7053SJung-uk Kim
2123e71b7053SJung-uk Kim	mtctr		$rounds
2124e71b7053SJung-uk Kim	b		Loop_xts_enc			# one more time...
2125e71b7053SJung-uk Kim
2126e71b7053SJung-uk KimLxts_enc_done:
2127e71b7053SJung-uk Kim	${UCMP}i	$ivp,0
2128e71b7053SJung-uk Kim	beq		Lxts_enc_ret
2129e71b7053SJung-uk Kim
2130e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven		# next tweak value
2131e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
2132e71b7053SJung-uk Kim	vsldoi		$tmp,$tmp,$tmp,15
2133e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
2134e71b7053SJung-uk Kim	vxor		$tweak,$tweak,$tmp
2135e71b7053SJung-uk Kim
2136e71b7053SJung-uk Kim	le?vperm	$tweak,$tweak,$tweak,$leperm
2137e71b7053SJung-uk Kim	stvx_u		$tweak,0,$ivp
2138e71b7053SJung-uk Kim
2139e71b7053SJung-uk KimLxts_enc_ret:
2140e71b7053SJung-uk Kim	mtspr		256,r12				# restore vrsave
2141e71b7053SJung-uk Kim	li		r3,0
2142e71b7053SJung-uk Kim	blr
2143e71b7053SJung-uk Kim	.long		0
2144e71b7053SJung-uk Kim	.byte		0,12,0x04,0,0x80,6,6,0
2145e71b7053SJung-uk Kim	.long		0
2146e71b7053SJung-uk Kim.size	.${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2147e71b7053SJung-uk Kim
2148e71b7053SJung-uk Kim.globl	.${prefix}_xts_decrypt
2149e71b7053SJung-uk Kim.align	5
2150e71b7053SJung-uk Kim.${prefix}_xts_decrypt:
2151e71b7053SJung-uk Kim	mr		$inp,r3				# reassign
2152e71b7053SJung-uk Kim	li		r3,-1
2153e71b7053SJung-uk Kim	${UCMP}i	$len,16
2154e71b7053SJung-uk Kim	bltlr-
2155e71b7053SJung-uk Kim
2156e71b7053SJung-uk Kim	lis		r0,0xfff8
2157e71b7053SJung-uk Kim	mfspr		r12,256				# save vrsave
2158e71b7053SJung-uk Kim	li		r11,0
2159e71b7053SJung-uk Kim	mtspr		256,r0
2160e71b7053SJung-uk Kim
2161e71b7053SJung-uk Kim	andi.		r0,$len,15
2162e71b7053SJung-uk Kim	neg		r0,r0
2163e71b7053SJung-uk Kim	andi.		r0,r0,16
2164e71b7053SJung-uk Kim	sub		$len,$len,r0
2165e71b7053SJung-uk Kim
2166e71b7053SJung-uk Kim	vspltisb	$seven,0x07			# 0x070707..07
2167e71b7053SJung-uk Kim	le?lvsl		$leperm,r11,r11
2168e71b7053SJung-uk Kim	le?vspltisb	$tmp,0x0f
2169e71b7053SJung-uk Kim	le?vxor		$leperm,$leperm,$seven
2170e71b7053SJung-uk Kim
2171e71b7053SJung-uk Kim	li		$idx,15
2172e71b7053SJung-uk Kim	lvx		$tweak,0,$ivp			# load [unaligned] iv
2173e71b7053SJung-uk Kim	lvsl		$inpperm,0,$ivp
2174e71b7053SJung-uk Kim	lvx		$inptail,$idx,$ivp
2175e71b7053SJung-uk Kim	le?vxor		$inpperm,$inpperm,$tmp
2176e71b7053SJung-uk Kim	vperm		$tweak,$tweak,$inptail,$inpperm
2177e71b7053SJung-uk Kim
2178e71b7053SJung-uk Kim	neg		r11,$inp
2179e71b7053SJung-uk Kim	lvsr		$inpperm,0,r11			# prepare for unaligned load
2180e71b7053SJung-uk Kim	lvx		$inout,0,$inp
2181e71b7053SJung-uk Kim	addi		$inp,$inp,15			# 15 is not typo
2182e71b7053SJung-uk Kim	le?vxor		$inpperm,$inpperm,$tmp
2183e71b7053SJung-uk Kim
2184e71b7053SJung-uk Kim	${UCMP}i	$key2,0				# key2==NULL?
2185e71b7053SJung-uk Kim	beq		Lxts_dec_no_key2
2186e71b7053SJung-uk Kim
2187e71b7053SJung-uk Kim	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
2188e71b7053SJung-uk Kim	lwz		$rounds,240($key2)
2189e71b7053SJung-uk Kim	srwi		$rounds,$rounds,1
2190e71b7053SJung-uk Kim	subi		$rounds,$rounds,1
2191e71b7053SJung-uk Kim	li		$idx,16
2192e71b7053SJung-uk Kim
2193e71b7053SJung-uk Kim	lvx		$rndkey0,0,$key2
2194e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key2
2195e71b7053SJung-uk Kim	addi		$idx,$idx,16
2196e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
2197e71b7053SJung-uk Kim	vxor		$tweak,$tweak,$rndkey0
2198e71b7053SJung-uk Kim	lvx		$rndkey0,$idx,$key2
2199e71b7053SJung-uk Kim	addi		$idx,$idx,16
2200e71b7053SJung-uk Kim	mtctr		$rounds
2201e71b7053SJung-uk Kim
2202e71b7053SJung-uk KimLtweak_xts_dec:
2203e71b7053SJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
2204e71b7053SJung-uk Kim	vcipher		$tweak,$tweak,$rndkey1
2205e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key2
2206e71b7053SJung-uk Kim	addi		$idx,$idx,16
2207e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
2208e71b7053SJung-uk Kim	vcipher		$tweak,$tweak,$rndkey0
2209e71b7053SJung-uk Kim	lvx		$rndkey0,$idx,$key2
2210e71b7053SJung-uk Kim	addi		$idx,$idx,16
2211e71b7053SJung-uk Kim	bdnz		Ltweak_xts_dec
2212e71b7053SJung-uk Kim
2213e71b7053SJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
2214e71b7053SJung-uk Kim	vcipher		$tweak,$tweak,$rndkey1
2215e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key2
2216e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
2217e71b7053SJung-uk Kim	vcipherlast	$tweak,$tweak,$rndkey0
2218e71b7053SJung-uk Kim
2219e71b7053SJung-uk Kim	li		$ivp,0				# don't chain the tweak
2220e71b7053SJung-uk Kim	b		Lxts_dec
2221e71b7053SJung-uk Kim
2222e71b7053SJung-uk KimLxts_dec_no_key2:
2223e71b7053SJung-uk Kim	neg		$idx,$len
2224e71b7053SJung-uk Kim	andi.		$idx,$idx,15
2225e71b7053SJung-uk Kim	add		$len,$len,$idx			# in "tweak chaining"
2226e71b7053SJung-uk Kim							# mode only complete
2227e71b7053SJung-uk Kim							# blocks are processed
2228e71b7053SJung-uk KimLxts_dec:
2229e71b7053SJung-uk Kim	lvx		$inptail,0,$inp
2230e71b7053SJung-uk Kim	addi		$inp,$inp,16
2231e71b7053SJung-uk Kim
2232e71b7053SJung-uk Kim	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
2233e71b7053SJung-uk Kim	lwz		$rounds,240($key1)
2234e71b7053SJung-uk Kim	srwi		$rounds,$rounds,1
2235e71b7053SJung-uk Kim	subi		$rounds,$rounds,1
2236e71b7053SJung-uk Kim	li		$idx,16
2237e71b7053SJung-uk Kim
2238e71b7053SJung-uk Kim	vslb		$eighty7,$seven,$seven		# 0x808080..80
2239e71b7053SJung-uk Kim	vor		$eighty7,$eighty7,$seven	# 0x878787..87
2240e71b7053SJung-uk Kim	vspltisb	$tmp,1				# 0x010101..01
2241e71b7053SJung-uk Kim	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
2242e71b7053SJung-uk Kim
2243e71b7053SJung-uk Kim	${UCMP}i	$len,96
2244e71b7053SJung-uk Kim	bge		_aesp8_xts_decrypt6x
2245e71b7053SJung-uk Kim
2246e71b7053SJung-uk Kim	lvx		$rndkey0,0,$key1
2247e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key1
2248e71b7053SJung-uk Kim	addi		$idx,$idx,16
2249e71b7053SJung-uk Kim	vperm		$inout,$inout,$inptail,$inpperm
2250e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
2251e71b7053SJung-uk Kim	vxor		$inout,$inout,$tweak
2252e71b7053SJung-uk Kim	vxor		$inout,$inout,$rndkey0
2253e71b7053SJung-uk Kim	lvx		$rndkey0,$idx,$key1
2254e71b7053SJung-uk Kim	addi		$idx,$idx,16
2255e71b7053SJung-uk Kim	mtctr		$rounds
2256e71b7053SJung-uk Kim
2257e71b7053SJung-uk Kim	${UCMP}i	$len,16
2258e71b7053SJung-uk Kim	blt		Ltail_xts_dec
2259e71b7053SJung-uk Kim	be?b		Loop_xts_dec
2260e71b7053SJung-uk Kim
2261e71b7053SJung-uk Kim.align	5
2262e71b7053SJung-uk KimLoop_xts_dec:
2263e71b7053SJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
2264e71b7053SJung-uk Kim	vncipher	$inout,$inout,$rndkey1
2265e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key1
2266e71b7053SJung-uk Kim	addi		$idx,$idx,16
2267e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
2268e71b7053SJung-uk Kim	vncipher	$inout,$inout,$rndkey0
2269e71b7053SJung-uk Kim	lvx		$rndkey0,$idx,$key1
2270e71b7053SJung-uk Kim	addi		$idx,$idx,16
2271e71b7053SJung-uk Kim	bdnz		Loop_xts_dec
2272e71b7053SJung-uk Kim
2273e71b7053SJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
2274e71b7053SJung-uk Kim	vncipher	$inout,$inout,$rndkey1
2275e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key1
2276e71b7053SJung-uk Kim	li		$idx,16
2277e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
2278e71b7053SJung-uk Kim	vxor		$rndkey0,$rndkey0,$tweak
2279e71b7053SJung-uk Kim	vncipherlast	$output,$inout,$rndkey0
2280e71b7053SJung-uk Kim
2281e71b7053SJung-uk Kim	le?vperm	$tmp,$output,$output,$leperm
2282e71b7053SJung-uk Kim	be?nop
2283e71b7053SJung-uk Kim	le?stvx_u	$tmp,0,$out
2284e71b7053SJung-uk Kim	be?stvx_u	$output,0,$out
2285e71b7053SJung-uk Kim	addi		$out,$out,16
2286e71b7053SJung-uk Kim
2287e71b7053SJung-uk Kim	subic.		$len,$len,16
2288e71b7053SJung-uk Kim	beq		Lxts_dec_done
2289e71b7053SJung-uk Kim
2290e71b7053SJung-uk Kim	vmr		$inout,$inptail
2291e71b7053SJung-uk Kim	lvx		$inptail,0,$inp
2292e71b7053SJung-uk Kim	addi		$inp,$inp,16
2293e71b7053SJung-uk Kim	lvx		$rndkey0,0,$key1
2294e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key1
2295e71b7053SJung-uk Kim	addi		$idx,$idx,16
2296e71b7053SJung-uk Kim
2297e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven		# next tweak value
2298e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
2299e71b7053SJung-uk Kim	vsldoi		$tmp,$tmp,$tmp,15
2300e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
2301e71b7053SJung-uk Kim	vxor		$tweak,$tweak,$tmp
2302e71b7053SJung-uk Kim
2303e71b7053SJung-uk Kim	vperm		$inout,$inout,$inptail,$inpperm
2304e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
2305e71b7053SJung-uk Kim	vxor		$inout,$inout,$tweak
2306e71b7053SJung-uk Kim	vxor		$inout,$inout,$rndkey0
2307e71b7053SJung-uk Kim	lvx		$rndkey0,$idx,$key1
2308e71b7053SJung-uk Kim	addi		$idx,$idx,16
2309e71b7053SJung-uk Kim
2310e71b7053SJung-uk Kim	mtctr		$rounds
2311e71b7053SJung-uk Kim	${UCMP}i	$len,16
2312e71b7053SJung-uk Kim	bge		Loop_xts_dec
2313e71b7053SJung-uk Kim
2314e71b7053SJung-uk KimLtail_xts_dec:
2315e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven		# next tweak value
2316e71b7053SJung-uk Kim	vaddubm		$tweak1,$tweak,$tweak
2317e71b7053SJung-uk Kim	vsldoi		$tmp,$tmp,$tmp,15
2318e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
2319e71b7053SJung-uk Kim	vxor		$tweak1,$tweak1,$tmp
2320e71b7053SJung-uk Kim
2321e71b7053SJung-uk Kim	subi		$inp,$inp,16
2322e71b7053SJung-uk Kim	add		$inp,$inp,$len
2323e71b7053SJung-uk Kim
2324e71b7053SJung-uk Kim	vxor		$inout,$inout,$tweak		# :-(
2325e71b7053SJung-uk Kim	vxor		$inout,$inout,$tweak1		# :-)
2326e71b7053SJung-uk Kim
2327e71b7053SJung-uk KimLoop_xts_dec_short:
2328e71b7053SJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
2329e71b7053SJung-uk Kim	vncipher	$inout,$inout,$rndkey1
2330e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key1
2331e71b7053SJung-uk Kim	addi		$idx,$idx,16
2332e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
2333e71b7053SJung-uk Kim	vncipher	$inout,$inout,$rndkey0
2334e71b7053SJung-uk Kim	lvx		$rndkey0,$idx,$key1
2335e71b7053SJung-uk Kim	addi		$idx,$idx,16
2336e71b7053SJung-uk Kim	bdnz		Loop_xts_dec_short
2337e71b7053SJung-uk Kim
2338e71b7053SJung-uk Kim	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
2339e71b7053SJung-uk Kim	vncipher	$inout,$inout,$rndkey1
2340e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key1
2341e71b7053SJung-uk Kim	li		$idx,16
2342e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
2343e71b7053SJung-uk Kim	vxor		$rndkey0,$rndkey0,$tweak1
2344e71b7053SJung-uk Kim	vncipherlast	$output,$inout,$rndkey0
2345e71b7053SJung-uk Kim
2346e71b7053SJung-uk Kim	le?vperm	$tmp,$output,$output,$leperm
2347e71b7053SJung-uk Kim	be?nop
2348e71b7053SJung-uk Kim	le?stvx_u	$tmp,0,$out
2349e71b7053SJung-uk Kim	be?stvx_u	$output,0,$out
2350e71b7053SJung-uk Kim
2351e71b7053SJung-uk Kim	vmr		$inout,$inptail
2352e71b7053SJung-uk Kim	lvx		$inptail,0,$inp
2353e71b7053SJung-uk Kim	#addi		$inp,$inp,16
2354e71b7053SJung-uk Kim	lvx		$rndkey0,0,$key1
2355e71b7053SJung-uk Kim	lvx		$rndkey1,$idx,$key1
2356e71b7053SJung-uk Kim	addi		$idx,$idx,16
2357e71b7053SJung-uk Kim	vperm		$inout,$inout,$inptail,$inpperm
2358e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
2359e71b7053SJung-uk Kim
2360e71b7053SJung-uk Kim	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
2361e71b7053SJung-uk Kim	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
2362e71b7053SJung-uk Kim	vspltisb	$tmp,-1
2363e71b7053SJung-uk Kim	vperm		$inptail,$inptail,$tmp,$inpperm
2364e71b7053SJung-uk Kim	vsel		$inout,$inout,$output,$inptail
2365e71b7053SJung-uk Kim
2366e71b7053SJung-uk Kim	vxor		$rndkey0,$rndkey0,$tweak
2367e71b7053SJung-uk Kim	vxor		$inout,$inout,$rndkey0
2368e71b7053SJung-uk Kim	lvx		$rndkey0,$idx,$key1
2369e71b7053SJung-uk Kim	addi		$idx,$idx,16
2370e71b7053SJung-uk Kim
2371e71b7053SJung-uk Kim	subi		r11,$out,1
2372e71b7053SJung-uk Kim	mtctr		$len
2373e71b7053SJung-uk Kim	li		$len,16
2374e71b7053SJung-uk KimLoop_xts_dec_steal:
2375e71b7053SJung-uk Kim	lbzu		r0,1(r11)
2376e71b7053SJung-uk Kim	stb		r0,16(r11)
2377e71b7053SJung-uk Kim	bdnz		Loop_xts_dec_steal
2378e71b7053SJung-uk Kim
2379e71b7053SJung-uk Kim	mtctr		$rounds
2380e71b7053SJung-uk Kim	b		Loop_xts_dec			# one more time...
2381e71b7053SJung-uk Kim
2382e71b7053SJung-uk KimLxts_dec_done:
2383e71b7053SJung-uk Kim	${UCMP}i	$ivp,0
2384e71b7053SJung-uk Kim	beq		Lxts_dec_ret
2385e71b7053SJung-uk Kim
2386e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven		# next tweak value
2387e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
2388e71b7053SJung-uk Kim	vsldoi		$tmp,$tmp,$tmp,15
2389e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
2390e71b7053SJung-uk Kim	vxor		$tweak,$tweak,$tmp
2391e71b7053SJung-uk Kim
2392e71b7053SJung-uk Kim	le?vperm	$tweak,$tweak,$tweak,$leperm
2393e71b7053SJung-uk Kim	stvx_u		$tweak,0,$ivp
2394e71b7053SJung-uk Kim
2395e71b7053SJung-uk KimLxts_dec_ret:
2396e71b7053SJung-uk Kim	mtspr		256,r12				# restore vrsave
2397e71b7053SJung-uk Kim	li		r3,0
2398e71b7053SJung-uk Kim	blr
2399e71b7053SJung-uk Kim	.long		0
2400e71b7053SJung-uk Kim	.byte		0,12,0x04,0,0x80,6,6,0
2401e71b7053SJung-uk Kim	.long		0
2402e71b7053SJung-uk Kim.size	.${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2403e71b7053SJung-uk Kim___
2404e71b7053SJung-uk Kim#########################################################################
2405e71b7053SJung-uk Kim{{	# Optimized XTS procedures					#
2406e71b7053SJung-uk Kimmy $key_=$key2;
2407e71b7053SJung-uk Kimmy ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2408e71b7053SJung-uk Kim    $x00=0 if ($flavour =~ /osx/);
2409e71b7053SJung-uk Kimmy ($in0,  $in1,  $in2,  $in3,  $in4,  $in5)=map("v$_",(0..5));
2410e71b7053SJung-uk Kimmy ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2411e71b7053SJung-uk Kimmy ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2412e71b7053SJung-uk Kimmy $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
2413e71b7053SJung-uk Kim			# v26-v31 last 6 round keys
2414e71b7053SJung-uk Kimmy ($keyperm)=($out0);	# aliases with "caller", redundant assignment
2415e71b7053SJung-uk Kimmy $taillen=$x70;
2416e71b7053SJung-uk Kim
2417e71b7053SJung-uk Kim$code.=<<___;
2418e71b7053SJung-uk Kim.align	5
2419e71b7053SJung-uk Kim_aesp8_xts_encrypt6x:
2420e71b7053SJung-uk Kim	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2421e71b7053SJung-uk Kim	mflr		r11
2422e71b7053SJung-uk Kim	li		r7,`$FRAME+8*16+15`
2423e71b7053SJung-uk Kim	li		r3,`$FRAME+8*16+31`
2424e71b7053SJung-uk Kim	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2425e71b7053SJung-uk Kim	stvx		v20,r7,$sp		# ABI says so
2426e71b7053SJung-uk Kim	addi		r7,r7,32
2427e71b7053SJung-uk Kim	stvx		v21,r3,$sp
2428e71b7053SJung-uk Kim	addi		r3,r3,32
2429e71b7053SJung-uk Kim	stvx		v22,r7,$sp
2430e71b7053SJung-uk Kim	addi		r7,r7,32
2431e71b7053SJung-uk Kim	stvx		v23,r3,$sp
2432e71b7053SJung-uk Kim	addi		r3,r3,32
2433e71b7053SJung-uk Kim	stvx		v24,r7,$sp
2434e71b7053SJung-uk Kim	addi		r7,r7,32
2435e71b7053SJung-uk Kim	stvx		v25,r3,$sp
2436e71b7053SJung-uk Kim	addi		r3,r3,32
2437e71b7053SJung-uk Kim	stvx		v26,r7,$sp
2438e71b7053SJung-uk Kim	addi		r7,r7,32
2439e71b7053SJung-uk Kim	stvx		v27,r3,$sp
2440e71b7053SJung-uk Kim	addi		r3,r3,32
2441e71b7053SJung-uk Kim	stvx		v28,r7,$sp
2442e71b7053SJung-uk Kim	addi		r7,r7,32
2443e71b7053SJung-uk Kim	stvx		v29,r3,$sp
2444e71b7053SJung-uk Kim	addi		r3,r3,32
2445e71b7053SJung-uk Kim	stvx		v30,r7,$sp
2446e71b7053SJung-uk Kim	stvx		v31,r3,$sp
2447e71b7053SJung-uk Kim	li		r0,-1
2448e71b7053SJung-uk Kim	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
2449e71b7053SJung-uk Kim	li		$x10,0x10
2450e71b7053SJung-uk Kim	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2451e71b7053SJung-uk Kim	li		$x20,0x20
2452e71b7053SJung-uk Kim	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2453e71b7053SJung-uk Kim	li		$x30,0x30
2454e71b7053SJung-uk Kim	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2455e71b7053SJung-uk Kim	li		$x40,0x40
2456e71b7053SJung-uk Kim	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2457e71b7053SJung-uk Kim	li		$x50,0x50
2458e71b7053SJung-uk Kim	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2459e71b7053SJung-uk Kim	li		$x60,0x60
2460e71b7053SJung-uk Kim	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2461e71b7053SJung-uk Kim	li		$x70,0x70
2462e71b7053SJung-uk Kim	mtspr		256,r0
2463e71b7053SJung-uk Kim
2464*a7148ab3SEnji Cooper	# Reverse eighty7 to 0x010101..87
2465*a7148ab3SEnji Cooper	xxlor		2, 32+$eighty7, 32+$eighty7
2466*a7148ab3SEnji Cooper	vsldoi		$eighty7,$tmp,$eighty7,1	# 0x010101..87
2467*a7148ab3SEnji Cooper	xxlor		1, 32+$eighty7, 32+$eighty7
2468*a7148ab3SEnji Cooper
2469*a7148ab3SEnji Cooper	# Load XOR contents. 0xf102132435465768798a9bacbdcedfe
2470*a7148ab3SEnji Cooper	mr		$x70, r6
2471*a7148ab3SEnji Cooper	bl		Lconsts
2472*a7148ab3SEnji Cooper	lxvw4x		0, $x40, r6		# load XOR contents
2473*a7148ab3SEnji Cooper	mr		r6, $x70
2474*a7148ab3SEnji Cooper	li		$x70,0x70
2475*a7148ab3SEnji Cooper
2476e71b7053SJung-uk Kim	subi		$rounds,$rounds,3	# -4 in total
2477e71b7053SJung-uk Kim
2478e71b7053SJung-uk Kim	lvx		$rndkey0,$x00,$key1	# load key schedule
2479e71b7053SJung-uk Kim	lvx		v30,$x10,$key1
2480e71b7053SJung-uk Kim	addi		$key1,$key1,0x20
2481e71b7053SJung-uk Kim	lvx		v31,$x00,$key1
2482e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,v30,$keyperm
2483e71b7053SJung-uk Kim	addi		$key_,$sp,$FRAME+15
2484e71b7053SJung-uk Kim	mtctr		$rounds
2485e71b7053SJung-uk Kim
2486e71b7053SJung-uk KimLoad_xts_enc_key:
2487e71b7053SJung-uk Kim	?vperm		v24,v30,v31,$keyperm
2488e71b7053SJung-uk Kim	lvx		v30,$x10,$key1
2489e71b7053SJung-uk Kim	addi		$key1,$key1,0x20
2490e71b7053SJung-uk Kim	stvx		v24,$x00,$key_		# off-load round[1]
2491e71b7053SJung-uk Kim	?vperm		v25,v31,v30,$keyperm
2492e71b7053SJung-uk Kim	lvx		v31,$x00,$key1
2493e71b7053SJung-uk Kim	stvx		v25,$x10,$key_		# off-load round[2]
2494e71b7053SJung-uk Kim	addi		$key_,$key_,0x20
2495e71b7053SJung-uk Kim	bdnz		Load_xts_enc_key
2496e71b7053SJung-uk Kim
2497e71b7053SJung-uk Kim	lvx		v26,$x10,$key1
2498e71b7053SJung-uk Kim	?vperm		v24,v30,v31,$keyperm
2499e71b7053SJung-uk Kim	lvx		v27,$x20,$key1
2500e71b7053SJung-uk Kim	stvx		v24,$x00,$key_		# off-load round[3]
2501e71b7053SJung-uk Kim	?vperm		v25,v31,v26,$keyperm
2502e71b7053SJung-uk Kim	lvx		v28,$x30,$key1
2503e71b7053SJung-uk Kim	stvx		v25,$x10,$key_		# off-load round[4]
2504e71b7053SJung-uk Kim	addi		$key_,$sp,$FRAME+15	# rewind $key_
2505e71b7053SJung-uk Kim	?vperm		v26,v26,v27,$keyperm
2506e71b7053SJung-uk Kim	lvx		v29,$x40,$key1
2507e71b7053SJung-uk Kim	?vperm		v27,v27,v28,$keyperm
2508e71b7053SJung-uk Kim	lvx		v30,$x50,$key1
2509e71b7053SJung-uk Kim	?vperm		v28,v28,v29,$keyperm
2510e71b7053SJung-uk Kim	lvx		v31,$x60,$key1
2511e71b7053SJung-uk Kim	?vperm		v29,v29,v30,$keyperm
2512e71b7053SJung-uk Kim	lvx		$twk5,$x70,$key1	# borrow $twk5
2513e71b7053SJung-uk Kim	?vperm		v30,v30,v31,$keyperm
2514e71b7053SJung-uk Kim	lvx		v24,$x00,$key_		# pre-load round[1]
2515e71b7053SJung-uk Kim	?vperm		v31,v31,$twk5,$keyperm
2516e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# pre-load round[2]
2517e71b7053SJung-uk Kim
2518*a7148ab3SEnji Cooper	# Switch to use the following codes with 0x010101..87 to generate tweak.
2519*a7148ab3SEnji Cooper	#     eighty7 = 0x010101..87
2520*a7148ab3SEnji Cooper	# vsrab		tmp, tweak, seven	# next tweak value, right shift 7 bits
2521*a7148ab3SEnji Cooper	# vand		tmp, tmp, eighty7	# last byte with carry
2522*a7148ab3SEnji Cooper	# vaddubm	tweak, tweak, tweak	# left shift 1 bit (x2)
2523*a7148ab3SEnji Cooper	# xxlor		vsx, 0, 0
2524*a7148ab3SEnji Cooper	# vpermxor	tweak, tweak, tmp, vsx
2525*a7148ab3SEnji Cooper
2526e71b7053SJung-uk Kim	 vperm		$in0,$inout,$inptail,$inpperm
2527e71b7053SJung-uk Kim	 subi		$inp,$inp,31		# undo "caller"
2528e71b7053SJung-uk Kim	vxor		$twk0,$tweak,$rndkey0
2529e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven	# next tweak value
2530e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
2531e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
2532e71b7053SJung-uk Kim	 vxor		$out0,$in0,$twk0
2533*a7148ab3SEnji Cooper	xxlor		32+$in1, 0, 0
2534*a7148ab3SEnji Cooper	vpermxor	$tweak, $tweak, $tmp, $in1
2535e71b7053SJung-uk Kim
2536e71b7053SJung-uk Kim	 lvx_u		$in1,$x10,$inp
2537e71b7053SJung-uk Kim	vxor		$twk1,$tweak,$rndkey0
2538e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven	# next tweak value
2539e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
2540e71b7053SJung-uk Kim	 le?vperm	$in1,$in1,$in1,$leperm
2541e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
2542e71b7053SJung-uk Kim	 vxor		$out1,$in1,$twk1
2543*a7148ab3SEnji Cooper	xxlor		32+$in2, 0, 0
2544*a7148ab3SEnji Cooper	vpermxor	$tweak, $tweak, $tmp, $in2
2545e71b7053SJung-uk Kim
2546e71b7053SJung-uk Kim	 lvx_u		$in2,$x20,$inp
2547e71b7053SJung-uk Kim	 andi.		$taillen,$len,15
2548e71b7053SJung-uk Kim	vxor		$twk2,$tweak,$rndkey0
2549e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven	# next tweak value
2550e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
2551e71b7053SJung-uk Kim	 le?vperm	$in2,$in2,$in2,$leperm
2552e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
2553e71b7053SJung-uk Kim	 vxor		$out2,$in2,$twk2
2554*a7148ab3SEnji Cooper	xxlor		32+$in3, 0, 0
2555*a7148ab3SEnji Cooper	vpermxor	$tweak, $tweak, $tmp, $in3
2556e71b7053SJung-uk Kim
2557e71b7053SJung-uk Kim	 lvx_u		$in3,$x30,$inp
2558e71b7053SJung-uk Kim	 sub		$len,$len,$taillen
2559e71b7053SJung-uk Kim	vxor		$twk3,$tweak,$rndkey0
2560e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven	# next tweak value
2561e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
2562e71b7053SJung-uk Kim	 le?vperm	$in3,$in3,$in3,$leperm
2563e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
2564e71b7053SJung-uk Kim	 vxor		$out3,$in3,$twk3
2565*a7148ab3SEnji Cooper	xxlor		32+$in4, 0, 0
2566*a7148ab3SEnji Cooper	vpermxor	$tweak, $tweak, $tmp, $in4
2567e71b7053SJung-uk Kim
2568e71b7053SJung-uk Kim	 lvx_u		$in4,$x40,$inp
2569e71b7053SJung-uk Kim	 subi		$len,$len,0x60
2570e71b7053SJung-uk Kim	vxor		$twk4,$tweak,$rndkey0
2571e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven	# next tweak value
2572e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
2573e71b7053SJung-uk Kim	 le?vperm	$in4,$in4,$in4,$leperm
2574e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
2575e71b7053SJung-uk Kim	 vxor		$out4,$in4,$twk4
2576*a7148ab3SEnji Cooper	xxlor		32+$in5, 0, 0
2577*a7148ab3SEnji Cooper	vpermxor	$tweak, $tweak, $tmp, $in5
2578e71b7053SJung-uk Kim
2579e71b7053SJung-uk Kim	 lvx_u		$in5,$x50,$inp
2580e71b7053SJung-uk Kim	 addi		$inp,$inp,0x60
2581e71b7053SJung-uk Kim	vxor		$twk5,$tweak,$rndkey0
2582e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven	# next tweak value
2583e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
2584e71b7053SJung-uk Kim	 le?vperm	$in5,$in5,$in5,$leperm
2585e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
2586e71b7053SJung-uk Kim	 vxor		$out5,$in5,$twk5
2587*a7148ab3SEnji Cooper	xxlor		32+$in0, 0, 0
2588*a7148ab3SEnji Cooper	vpermxor	$tweak, $tweak, $tmp, $in0
2589e71b7053SJung-uk Kim
2590e71b7053SJung-uk Kim	vxor		v31,v31,$rndkey0
2591e71b7053SJung-uk Kim	mtctr		$rounds
2592e71b7053SJung-uk Kim	b		Loop_xts_enc6x
2593e71b7053SJung-uk Kim
2594e71b7053SJung-uk Kim.align	5
2595e71b7053SJung-uk KimLoop_xts_enc6x:
2596e71b7053SJung-uk Kim	vcipher		$out0,$out0,v24
2597e71b7053SJung-uk Kim	vcipher		$out1,$out1,v24
2598e71b7053SJung-uk Kim	vcipher		$out2,$out2,v24
2599e71b7053SJung-uk Kim	vcipher		$out3,$out3,v24
2600e71b7053SJung-uk Kim	vcipher		$out4,$out4,v24
2601e71b7053SJung-uk Kim	vcipher		$out5,$out5,v24
2602e71b7053SJung-uk Kim	lvx		v24,$x20,$key_		# round[3]
2603e71b7053SJung-uk Kim	addi		$key_,$key_,0x20
2604e71b7053SJung-uk Kim
2605e71b7053SJung-uk Kim	vcipher		$out0,$out0,v25
2606e71b7053SJung-uk Kim	vcipher		$out1,$out1,v25
2607e71b7053SJung-uk Kim	vcipher		$out2,$out2,v25
2608e71b7053SJung-uk Kim	vcipher		$out3,$out3,v25
2609e71b7053SJung-uk Kim	vcipher		$out4,$out4,v25
2610e71b7053SJung-uk Kim	vcipher		$out5,$out5,v25
2611e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# round[4]
2612e71b7053SJung-uk Kim	bdnz		Loop_xts_enc6x
2613e71b7053SJung-uk Kim
2614*a7148ab3SEnji Cooper	xxlor		32+$eighty7, 1, 1		# 0x010101..87
2615*a7148ab3SEnji Cooper
2616e71b7053SJung-uk Kim	subic		$len,$len,96		# $len-=96
2617e71b7053SJung-uk Kim	 vxor		$in0,$twk0,v31		# xor with last round key
2618e71b7053SJung-uk Kim	vcipher		$out0,$out0,v24
2619e71b7053SJung-uk Kim	vcipher		$out1,$out1,v24
2620e71b7053SJung-uk Kim	 vsrab		$tmp,$tweak,$seven	# next tweak value
2621e71b7053SJung-uk Kim	 vxor		$twk0,$tweak,$rndkey0
2622e71b7053SJung-uk Kim	 vaddubm	$tweak,$tweak,$tweak
2623e71b7053SJung-uk Kim	vcipher		$out2,$out2,v24
2624e71b7053SJung-uk Kim	vcipher		$out3,$out3,v24
2625e71b7053SJung-uk Kim	vcipher		$out4,$out4,v24
2626e71b7053SJung-uk Kim	vcipher		$out5,$out5,v24
2627e71b7053SJung-uk Kim
2628e71b7053SJung-uk Kim	subfe.		r0,r0,r0		# borrow?-1:0
2629e71b7053SJung-uk Kim	 vand		$tmp,$tmp,$eighty7
2630e71b7053SJung-uk Kim	vcipher		$out0,$out0,v25
2631e71b7053SJung-uk Kim	vcipher		$out1,$out1,v25
2632*a7148ab3SEnji Cooper	 xxlor		32+$in1, 0, 0
2633*a7148ab3SEnji Cooper	 vpermxor	$tweak, $tweak, $tmp, $in1
2634e71b7053SJung-uk Kim	vcipher		$out2,$out2,v25
2635e71b7053SJung-uk Kim	vcipher		$out3,$out3,v25
2636e71b7053SJung-uk Kim	 vxor		$in1,$twk1,v31
2637e71b7053SJung-uk Kim	 vsrab		$tmp,$tweak,$seven	# next tweak value
2638e71b7053SJung-uk Kim	 vxor		$twk1,$tweak,$rndkey0
2639e71b7053SJung-uk Kim	vcipher		$out4,$out4,v25
2640e71b7053SJung-uk Kim	vcipher		$out5,$out5,v25
2641e71b7053SJung-uk Kim
2642e71b7053SJung-uk Kim	and		r0,r0,$len
2643e71b7053SJung-uk Kim	 vaddubm	$tweak,$tweak,$tweak
2644e71b7053SJung-uk Kim	vcipher		$out0,$out0,v26
2645e71b7053SJung-uk Kim	vcipher		$out1,$out1,v26
2646e71b7053SJung-uk Kim	 vand		$tmp,$tmp,$eighty7
2647e71b7053SJung-uk Kim	vcipher		$out2,$out2,v26
2648e71b7053SJung-uk Kim	vcipher		$out3,$out3,v26
2649*a7148ab3SEnji Cooper	 xxlor		32+$in2, 0, 0
2650*a7148ab3SEnji Cooper	 vpermxor	$tweak, $tweak, $tmp, $in2
2651e71b7053SJung-uk Kim	vcipher		$out4,$out4,v26
2652e71b7053SJung-uk Kim	vcipher		$out5,$out5,v26
2653e71b7053SJung-uk Kim
2654e71b7053SJung-uk Kim	add		$inp,$inp,r0		# $inp is adjusted in such
2655e71b7053SJung-uk Kim						# way that at exit from the
2656e71b7053SJung-uk Kim						# loop inX-in5 are loaded
2657e71b7053SJung-uk Kim						# with last "words"
2658e71b7053SJung-uk Kim	 vxor		$in2,$twk2,v31
2659e71b7053SJung-uk Kim	 vsrab		$tmp,$tweak,$seven	# next tweak value
2660e71b7053SJung-uk Kim	 vxor		$twk2,$tweak,$rndkey0
2661e71b7053SJung-uk Kim	 vaddubm	$tweak,$tweak,$tweak
2662e71b7053SJung-uk Kim	vcipher		$out0,$out0,v27
2663e71b7053SJung-uk Kim	vcipher		$out1,$out1,v27
2664e71b7053SJung-uk Kim	vcipher		$out2,$out2,v27
2665e71b7053SJung-uk Kim	vcipher		$out3,$out3,v27
2666e71b7053SJung-uk Kim	 vand		$tmp,$tmp,$eighty7
2667e71b7053SJung-uk Kim	vcipher		$out4,$out4,v27
2668e71b7053SJung-uk Kim	vcipher		$out5,$out5,v27
2669e71b7053SJung-uk Kim
2670e71b7053SJung-uk Kim	addi		$key_,$sp,$FRAME+15	# rewind $key_
2671*a7148ab3SEnji Cooper	 xxlor		32+$in3, 0, 0
2672*a7148ab3SEnji Cooper	 vpermxor	$tweak, $tweak, $tmp, $in3
2673e71b7053SJung-uk Kim	vcipher		$out0,$out0,v28
2674e71b7053SJung-uk Kim	vcipher		$out1,$out1,v28
2675e71b7053SJung-uk Kim	 vxor		$in3,$twk3,v31
2676e71b7053SJung-uk Kim	 vsrab		$tmp,$tweak,$seven	# next tweak value
2677e71b7053SJung-uk Kim	 vxor		$twk3,$tweak,$rndkey0
2678e71b7053SJung-uk Kim	vcipher		$out2,$out2,v28
2679e71b7053SJung-uk Kim	vcipher		$out3,$out3,v28
2680e71b7053SJung-uk Kim	 vaddubm	$tweak,$tweak,$tweak
2681e71b7053SJung-uk Kim	vcipher		$out4,$out4,v28
2682e71b7053SJung-uk Kim	vcipher		$out5,$out5,v28
2683e71b7053SJung-uk Kim	lvx		v24,$x00,$key_		# re-pre-load round[1]
2684e71b7053SJung-uk Kim	 vand		$tmp,$tmp,$eighty7
2685e71b7053SJung-uk Kim
2686e71b7053SJung-uk Kim	vcipher		$out0,$out0,v29
2687e71b7053SJung-uk Kim	vcipher		$out1,$out1,v29
2688*a7148ab3SEnji Cooper	 xxlor		32+$in4, 0, 0
2689*a7148ab3SEnji Cooper	 vpermxor	$tweak, $tweak, $tmp, $in4
2690e71b7053SJung-uk Kim	vcipher		$out2,$out2,v29
2691e71b7053SJung-uk Kim	vcipher		$out3,$out3,v29
2692e71b7053SJung-uk Kim	 vxor		$in4,$twk4,v31
2693e71b7053SJung-uk Kim	 vsrab		$tmp,$tweak,$seven	# next tweak value
2694e71b7053SJung-uk Kim	 vxor		$twk4,$tweak,$rndkey0
2695e71b7053SJung-uk Kim	vcipher		$out4,$out4,v29
2696e71b7053SJung-uk Kim	vcipher		$out5,$out5,v29
2697e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# re-pre-load round[2]
2698e71b7053SJung-uk Kim	 vaddubm	$tweak,$tweak,$tweak
2699e71b7053SJung-uk Kim
2700e71b7053SJung-uk Kim	vcipher		$out0,$out0,v30
2701e71b7053SJung-uk Kim	vcipher		$out1,$out1,v30
2702e71b7053SJung-uk Kim	 vand		$tmp,$tmp,$eighty7
2703e71b7053SJung-uk Kim	vcipher		$out2,$out2,v30
2704e71b7053SJung-uk Kim	vcipher		$out3,$out3,v30
2705*a7148ab3SEnji Cooper	 xxlor		32+$in5, 0, 0
2706*a7148ab3SEnji Cooper	 vpermxor	$tweak, $tweak, $tmp, $in5
2707e71b7053SJung-uk Kim	vcipher		$out4,$out4,v30
2708e71b7053SJung-uk Kim	vcipher		$out5,$out5,v30
2709e71b7053SJung-uk Kim	 vxor		$in5,$twk5,v31
2710e71b7053SJung-uk Kim	 vsrab		$tmp,$tweak,$seven	# next tweak value
2711e71b7053SJung-uk Kim	 vxor		$twk5,$tweak,$rndkey0
2712e71b7053SJung-uk Kim
2713e71b7053SJung-uk Kim	vcipherlast	$out0,$out0,$in0
2714e71b7053SJung-uk Kim	 lvx_u		$in0,$x00,$inp		# load next input block
2715e71b7053SJung-uk Kim	 vaddubm	$tweak,$tweak,$tweak
2716e71b7053SJung-uk Kim	vcipherlast	$out1,$out1,$in1
2717e71b7053SJung-uk Kim	 lvx_u		$in1,$x10,$inp
2718e71b7053SJung-uk Kim	vcipherlast	$out2,$out2,$in2
2719e71b7053SJung-uk Kim	 le?vperm	$in0,$in0,$in0,$leperm
2720e71b7053SJung-uk Kim	 lvx_u		$in2,$x20,$inp
2721e71b7053SJung-uk Kim	 vand		$tmp,$tmp,$eighty7
2722e71b7053SJung-uk Kim	vcipherlast	$out3,$out3,$in3
2723e71b7053SJung-uk Kim	 le?vperm	$in1,$in1,$in1,$leperm
2724e71b7053SJung-uk Kim	 lvx_u		$in3,$x30,$inp
2725e71b7053SJung-uk Kim	vcipherlast	$out4,$out4,$in4
2726e71b7053SJung-uk Kim	 le?vperm	$in2,$in2,$in2,$leperm
2727e71b7053SJung-uk Kim	 lvx_u		$in4,$x40,$inp
2728*a7148ab3SEnji Cooper	 xxlor		10, 32+$in0, 32+$in0
2729*a7148ab3SEnji Cooper	 xxlor		32+$in0, 0, 0
2730*a7148ab3SEnji Cooper	 vpermxor	$tweak, $tweak, $tmp, $in0
2731*a7148ab3SEnji Cooper	 xxlor		32+$in0, 10, 10
2732e71b7053SJung-uk Kim	vcipherlast	$tmp,$out5,$in5		# last block might be needed
2733e71b7053SJung-uk Kim						# in stealing mode
2734e71b7053SJung-uk Kim	 le?vperm	$in3,$in3,$in3,$leperm
2735e71b7053SJung-uk Kim	 lvx_u		$in5,$x50,$inp
2736e71b7053SJung-uk Kim	 addi		$inp,$inp,0x60
2737e71b7053SJung-uk Kim	 le?vperm	$in4,$in4,$in4,$leperm
2738e71b7053SJung-uk Kim	 le?vperm	$in5,$in5,$in5,$leperm
2739e71b7053SJung-uk Kim
2740e71b7053SJung-uk Kim	le?vperm	$out0,$out0,$out0,$leperm
2741e71b7053SJung-uk Kim	le?vperm	$out1,$out1,$out1,$leperm
2742e71b7053SJung-uk Kim	stvx_u		$out0,$x00,$out		# store output
2743e71b7053SJung-uk Kim	 vxor		$out0,$in0,$twk0
2744e71b7053SJung-uk Kim	le?vperm	$out2,$out2,$out2,$leperm
2745e71b7053SJung-uk Kim	stvx_u		$out1,$x10,$out
2746e71b7053SJung-uk Kim	 vxor		$out1,$in1,$twk1
2747e71b7053SJung-uk Kim	le?vperm	$out3,$out3,$out3,$leperm
2748e71b7053SJung-uk Kim	stvx_u		$out2,$x20,$out
2749e71b7053SJung-uk Kim	 vxor		$out2,$in2,$twk2
2750e71b7053SJung-uk Kim	le?vperm	$out4,$out4,$out4,$leperm
2751e71b7053SJung-uk Kim	stvx_u		$out3,$x30,$out
2752e71b7053SJung-uk Kim	 vxor		$out3,$in3,$twk3
2753e71b7053SJung-uk Kim	le?vperm	$out5,$tmp,$tmp,$leperm
2754e71b7053SJung-uk Kim	stvx_u		$out4,$x40,$out
2755e71b7053SJung-uk Kim	 vxor		$out4,$in4,$twk4
2756e71b7053SJung-uk Kim	le?stvx_u	$out5,$x50,$out
2757e71b7053SJung-uk Kim	be?stvx_u	$tmp, $x50,$out
2758e71b7053SJung-uk Kim	 vxor		$out5,$in5,$twk5
2759e71b7053SJung-uk Kim	addi		$out,$out,0x60
2760e71b7053SJung-uk Kim
2761e71b7053SJung-uk Kim	mtctr		$rounds
2762e71b7053SJung-uk Kim	beq		Loop_xts_enc6x		# did $len-=96 borrow?
2763e71b7053SJung-uk Kim
2764*a7148ab3SEnji Cooper	xxlor		32+$eighty7, 2, 2		# 0x870101..01
2765*a7148ab3SEnji Cooper
2766e71b7053SJung-uk Kim	addic.		$len,$len,0x60
2767e71b7053SJung-uk Kim	beq		Lxts_enc6x_zero
2768e71b7053SJung-uk Kim	cmpwi		$len,0x20
2769e71b7053SJung-uk Kim	blt		Lxts_enc6x_one
2770e71b7053SJung-uk Kim	nop
2771e71b7053SJung-uk Kim	beq		Lxts_enc6x_two
2772e71b7053SJung-uk Kim	cmpwi		$len,0x40
2773e71b7053SJung-uk Kim	blt		Lxts_enc6x_three
2774e71b7053SJung-uk Kim	nop
2775e71b7053SJung-uk Kim	beq		Lxts_enc6x_four
2776e71b7053SJung-uk Kim
2777e71b7053SJung-uk KimLxts_enc6x_five:
2778e71b7053SJung-uk Kim	vxor		$out0,$in1,$twk0
2779e71b7053SJung-uk Kim	vxor		$out1,$in2,$twk1
2780e71b7053SJung-uk Kim	vxor		$out2,$in3,$twk2
2781e71b7053SJung-uk Kim	vxor		$out3,$in4,$twk3
2782e71b7053SJung-uk Kim	vxor		$out4,$in5,$twk4
2783e71b7053SJung-uk Kim
2784e71b7053SJung-uk Kim	bl		_aesp8_xts_enc5x
2785e71b7053SJung-uk Kim
2786e71b7053SJung-uk Kim	le?vperm	$out0,$out0,$out0,$leperm
2787e71b7053SJung-uk Kim	vmr		$twk0,$twk5		# unused tweak
2788e71b7053SJung-uk Kim	le?vperm	$out1,$out1,$out1,$leperm
2789e71b7053SJung-uk Kim	stvx_u		$out0,$x00,$out		# store output
2790e71b7053SJung-uk Kim	le?vperm	$out2,$out2,$out2,$leperm
2791e71b7053SJung-uk Kim	stvx_u		$out1,$x10,$out
2792e71b7053SJung-uk Kim	le?vperm	$out3,$out3,$out3,$leperm
2793e71b7053SJung-uk Kim	stvx_u		$out2,$x20,$out
2794e71b7053SJung-uk Kim	vxor		$tmp,$out4,$twk5	# last block prep for stealing
2795e71b7053SJung-uk Kim	le?vperm	$out4,$out4,$out4,$leperm
2796e71b7053SJung-uk Kim	stvx_u		$out3,$x30,$out
2797e71b7053SJung-uk Kim	stvx_u		$out4,$x40,$out
2798e71b7053SJung-uk Kim	addi		$out,$out,0x50
2799e71b7053SJung-uk Kim	bne		Lxts_enc6x_steal
2800e71b7053SJung-uk Kim	b		Lxts_enc6x_done
2801e71b7053SJung-uk Kim
2802e71b7053SJung-uk Kim.align	4
2803e71b7053SJung-uk KimLxts_enc6x_four:
2804e71b7053SJung-uk Kim	vxor		$out0,$in2,$twk0
2805e71b7053SJung-uk Kim	vxor		$out1,$in3,$twk1
2806e71b7053SJung-uk Kim	vxor		$out2,$in4,$twk2
2807e71b7053SJung-uk Kim	vxor		$out3,$in5,$twk3
2808e71b7053SJung-uk Kim	vxor		$out4,$out4,$out4
2809e71b7053SJung-uk Kim
2810e71b7053SJung-uk Kim	bl		_aesp8_xts_enc5x
2811e71b7053SJung-uk Kim
2812e71b7053SJung-uk Kim	le?vperm	$out0,$out0,$out0,$leperm
2813e71b7053SJung-uk Kim	vmr		$twk0,$twk4		# unused tweak
2814e71b7053SJung-uk Kim	le?vperm	$out1,$out1,$out1,$leperm
2815e71b7053SJung-uk Kim	stvx_u		$out0,$x00,$out		# store output
2816e71b7053SJung-uk Kim	le?vperm	$out2,$out2,$out2,$leperm
2817e71b7053SJung-uk Kim	stvx_u		$out1,$x10,$out
2818e71b7053SJung-uk Kim	vxor		$tmp,$out3,$twk4	# last block prep for stealing
2819e71b7053SJung-uk Kim	le?vperm	$out3,$out3,$out3,$leperm
2820e71b7053SJung-uk Kim	stvx_u		$out2,$x20,$out
2821e71b7053SJung-uk Kim	stvx_u		$out3,$x30,$out
2822e71b7053SJung-uk Kim	addi		$out,$out,0x40
2823e71b7053SJung-uk Kim	bne		Lxts_enc6x_steal
2824e71b7053SJung-uk Kim	b		Lxts_enc6x_done
2825e71b7053SJung-uk Kim
2826e71b7053SJung-uk Kim.align	4
2827e71b7053SJung-uk KimLxts_enc6x_three:
2828e71b7053SJung-uk Kim	vxor		$out0,$in3,$twk0
2829e71b7053SJung-uk Kim	vxor		$out1,$in4,$twk1
2830e71b7053SJung-uk Kim	vxor		$out2,$in5,$twk2
2831e71b7053SJung-uk Kim	vxor		$out3,$out3,$out3
2832e71b7053SJung-uk Kim	vxor		$out4,$out4,$out4
2833e71b7053SJung-uk Kim
2834e71b7053SJung-uk Kim	bl		_aesp8_xts_enc5x
2835e71b7053SJung-uk Kim
2836e71b7053SJung-uk Kim	le?vperm	$out0,$out0,$out0,$leperm
2837e71b7053SJung-uk Kim	vmr		$twk0,$twk3		# unused tweak
2838e71b7053SJung-uk Kim	le?vperm	$out1,$out1,$out1,$leperm
2839e71b7053SJung-uk Kim	stvx_u		$out0,$x00,$out		# store output
2840e71b7053SJung-uk Kim	vxor		$tmp,$out2,$twk3	# last block prep for stealing
2841e71b7053SJung-uk Kim	le?vperm	$out2,$out2,$out2,$leperm
2842e71b7053SJung-uk Kim	stvx_u		$out1,$x10,$out
2843e71b7053SJung-uk Kim	stvx_u		$out2,$x20,$out
2844e71b7053SJung-uk Kim	addi		$out,$out,0x30
2845e71b7053SJung-uk Kim	bne		Lxts_enc6x_steal
2846e71b7053SJung-uk Kim	b		Lxts_enc6x_done
2847e71b7053SJung-uk Kim
2848e71b7053SJung-uk Kim.align	4
2849e71b7053SJung-uk KimLxts_enc6x_two:
2850e71b7053SJung-uk Kim	vxor		$out0,$in4,$twk0
2851e71b7053SJung-uk Kim	vxor		$out1,$in5,$twk1
2852e71b7053SJung-uk Kim	vxor		$out2,$out2,$out2
2853e71b7053SJung-uk Kim	vxor		$out3,$out3,$out3
2854e71b7053SJung-uk Kim	vxor		$out4,$out4,$out4
2855e71b7053SJung-uk Kim
2856e71b7053SJung-uk Kim	bl		_aesp8_xts_enc5x
2857e71b7053SJung-uk Kim
2858e71b7053SJung-uk Kim	le?vperm	$out0,$out0,$out0,$leperm
2859e71b7053SJung-uk Kim	vmr		$twk0,$twk2		# unused tweak
2860e71b7053SJung-uk Kim	vxor		$tmp,$out1,$twk2	# last block prep for stealing
2861e71b7053SJung-uk Kim	le?vperm	$out1,$out1,$out1,$leperm
2862e71b7053SJung-uk Kim	stvx_u		$out0,$x00,$out		# store output
2863e71b7053SJung-uk Kim	stvx_u		$out1,$x10,$out
2864e71b7053SJung-uk Kim	addi		$out,$out,0x20
2865e71b7053SJung-uk Kim	bne		Lxts_enc6x_steal
2866e71b7053SJung-uk Kim	b		Lxts_enc6x_done
2867e71b7053SJung-uk Kim
2868e71b7053SJung-uk Kim.align	4
2869e71b7053SJung-uk KimLxts_enc6x_one:
2870e71b7053SJung-uk Kim	vxor		$out0,$in5,$twk0
2871e71b7053SJung-uk Kim	nop
2872e71b7053SJung-uk KimLoop_xts_enc1x:
2873e71b7053SJung-uk Kim	vcipher		$out0,$out0,v24
2874e71b7053SJung-uk Kim	lvx		v24,$x20,$key_		# round[3]
2875e71b7053SJung-uk Kim	addi		$key_,$key_,0x20
2876e71b7053SJung-uk Kim
2877e71b7053SJung-uk Kim	vcipher		$out0,$out0,v25
2878e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# round[4]
2879e71b7053SJung-uk Kim	bdnz		Loop_xts_enc1x
2880e71b7053SJung-uk Kim
2881e71b7053SJung-uk Kim	add		$inp,$inp,$taillen
2882e71b7053SJung-uk Kim	cmpwi		$taillen,0
2883e71b7053SJung-uk Kim	vcipher		$out0,$out0,v24
2884e71b7053SJung-uk Kim
2885e71b7053SJung-uk Kim	subi		$inp,$inp,16
2886e71b7053SJung-uk Kim	vcipher		$out0,$out0,v25
2887e71b7053SJung-uk Kim
2888e71b7053SJung-uk Kim	lvsr		$inpperm,0,$taillen
2889e71b7053SJung-uk Kim	vcipher		$out0,$out0,v26
2890e71b7053SJung-uk Kim
2891e71b7053SJung-uk Kim	lvx_u		$in0,0,$inp
2892e71b7053SJung-uk Kim	vcipher		$out0,$out0,v27
2893e71b7053SJung-uk Kim
2894e71b7053SJung-uk Kim	addi		$key_,$sp,$FRAME+15	# rewind $key_
2895e71b7053SJung-uk Kim	vcipher		$out0,$out0,v28
2896e71b7053SJung-uk Kim	lvx		v24,$x00,$key_		# re-pre-load round[1]
2897e71b7053SJung-uk Kim
2898e71b7053SJung-uk Kim	vcipher		$out0,$out0,v29
2899e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# re-pre-load round[2]
2900e71b7053SJung-uk Kim	 vxor		$twk0,$twk0,v31
2901e71b7053SJung-uk Kim
2902e71b7053SJung-uk Kim	le?vperm	$in0,$in0,$in0,$leperm
2903e71b7053SJung-uk Kim	vcipher		$out0,$out0,v30
2904e71b7053SJung-uk Kim
2905e71b7053SJung-uk Kim	vperm		$in0,$in0,$in0,$inpperm
2906e71b7053SJung-uk Kim	vcipherlast	$out0,$out0,$twk0
2907e71b7053SJung-uk Kim
2908e71b7053SJung-uk Kim	vmr		$twk0,$twk1		# unused tweak
2909e71b7053SJung-uk Kim	vxor		$tmp,$out0,$twk1	# last block prep for stealing
2910e71b7053SJung-uk Kim	le?vperm	$out0,$out0,$out0,$leperm
2911e71b7053SJung-uk Kim	stvx_u		$out0,$x00,$out		# store output
2912e71b7053SJung-uk Kim	addi		$out,$out,0x10
2913e71b7053SJung-uk Kim	bne		Lxts_enc6x_steal
2914e71b7053SJung-uk Kim	b		Lxts_enc6x_done
2915e71b7053SJung-uk Kim
2916e71b7053SJung-uk Kim.align	4
2917e71b7053SJung-uk KimLxts_enc6x_zero:
2918e71b7053SJung-uk Kim	cmpwi		$taillen,0
2919e71b7053SJung-uk Kim	beq		Lxts_enc6x_done
2920e71b7053SJung-uk Kim
2921e71b7053SJung-uk Kim	add		$inp,$inp,$taillen
2922e71b7053SJung-uk Kim	subi		$inp,$inp,16
2923e71b7053SJung-uk Kim	lvx_u		$in0,0,$inp
2924e71b7053SJung-uk Kim	lvsr		$inpperm,0,$taillen	# $in5 is no more
2925e71b7053SJung-uk Kim	le?vperm	$in0,$in0,$in0,$leperm
2926e71b7053SJung-uk Kim	vperm		$in0,$in0,$in0,$inpperm
2927e71b7053SJung-uk Kim	vxor		$tmp,$tmp,$twk0
2928e71b7053SJung-uk KimLxts_enc6x_steal:
2929e71b7053SJung-uk Kim	vxor		$in0,$in0,$twk0
2930e71b7053SJung-uk Kim	vxor		$out0,$out0,$out0
2931e71b7053SJung-uk Kim	vspltisb	$out1,-1
2932e71b7053SJung-uk Kim	vperm		$out0,$out0,$out1,$inpperm
2933e71b7053SJung-uk Kim	vsel		$out0,$in0,$tmp,$out0	# $tmp is last block, remember?
2934e71b7053SJung-uk Kim
2935e71b7053SJung-uk Kim	subi		r30,$out,17
2936e71b7053SJung-uk Kim	subi		$out,$out,16
2937e71b7053SJung-uk Kim	mtctr		$taillen
2938e71b7053SJung-uk KimLoop_xts_enc6x_steal:
2939e71b7053SJung-uk Kim	lbzu		r0,1(r30)
2940e71b7053SJung-uk Kim	stb		r0,16(r30)
2941e71b7053SJung-uk Kim	bdnz		Loop_xts_enc6x_steal
2942e71b7053SJung-uk Kim
2943e71b7053SJung-uk Kim	li		$taillen,0
2944e71b7053SJung-uk Kim	mtctr		$rounds
2945e71b7053SJung-uk Kim	b		Loop_xts_enc1x		# one more time...
2946e71b7053SJung-uk Kim
2947e71b7053SJung-uk Kim.align	4
2948e71b7053SJung-uk KimLxts_enc6x_done:
2949e71b7053SJung-uk Kim	${UCMP}i	$ivp,0
2950e71b7053SJung-uk Kim	beq		Lxts_enc6x_ret
2951e71b7053SJung-uk Kim
2952e71b7053SJung-uk Kim	vxor		$tweak,$twk0,$rndkey0
2953e71b7053SJung-uk Kim	le?vperm	$tweak,$tweak,$tweak,$leperm
2954e71b7053SJung-uk Kim	stvx_u		$tweak,0,$ivp
2955e71b7053SJung-uk Kim
2956e71b7053SJung-uk KimLxts_enc6x_ret:
2957e71b7053SJung-uk Kim	mtlr		r11
2958e71b7053SJung-uk Kim	li		r10,`$FRAME+15`
2959e71b7053SJung-uk Kim	li		r11,`$FRAME+31`
2960e71b7053SJung-uk Kim	stvx		$seven,r10,$sp		# wipe copies of round keys
2961e71b7053SJung-uk Kim	addi		r10,r10,32
2962e71b7053SJung-uk Kim	stvx		$seven,r11,$sp
2963e71b7053SJung-uk Kim	addi		r11,r11,32
2964e71b7053SJung-uk Kim	stvx		$seven,r10,$sp
2965e71b7053SJung-uk Kim	addi		r10,r10,32
2966e71b7053SJung-uk Kim	stvx		$seven,r11,$sp
2967e71b7053SJung-uk Kim	addi		r11,r11,32
2968e71b7053SJung-uk Kim	stvx		$seven,r10,$sp
2969e71b7053SJung-uk Kim	addi		r10,r10,32
2970e71b7053SJung-uk Kim	stvx		$seven,r11,$sp
2971e71b7053SJung-uk Kim	addi		r11,r11,32
2972e71b7053SJung-uk Kim	stvx		$seven,r10,$sp
2973e71b7053SJung-uk Kim	addi		r10,r10,32
2974e71b7053SJung-uk Kim	stvx		$seven,r11,$sp
2975e71b7053SJung-uk Kim	addi		r11,r11,32
2976e71b7053SJung-uk Kim
2977e71b7053SJung-uk Kim	mtspr		256,$vrsave
2978e71b7053SJung-uk Kim	lvx		v20,r10,$sp		# ABI says so
2979e71b7053SJung-uk Kim	addi		r10,r10,32
2980e71b7053SJung-uk Kim	lvx		v21,r11,$sp
2981e71b7053SJung-uk Kim	addi		r11,r11,32
2982e71b7053SJung-uk Kim	lvx		v22,r10,$sp
2983e71b7053SJung-uk Kim	addi		r10,r10,32
2984e71b7053SJung-uk Kim	lvx		v23,r11,$sp
2985e71b7053SJung-uk Kim	addi		r11,r11,32
2986e71b7053SJung-uk Kim	lvx		v24,r10,$sp
2987e71b7053SJung-uk Kim	addi		r10,r10,32
2988e71b7053SJung-uk Kim	lvx		v25,r11,$sp
2989e71b7053SJung-uk Kim	addi		r11,r11,32
2990e71b7053SJung-uk Kim	lvx		v26,r10,$sp
2991e71b7053SJung-uk Kim	addi		r10,r10,32
2992e71b7053SJung-uk Kim	lvx		v27,r11,$sp
2993e71b7053SJung-uk Kim	addi		r11,r11,32
2994e71b7053SJung-uk Kim	lvx		v28,r10,$sp
2995e71b7053SJung-uk Kim	addi		r10,r10,32
2996e71b7053SJung-uk Kim	lvx		v29,r11,$sp
2997e71b7053SJung-uk Kim	addi		r11,r11,32
2998e71b7053SJung-uk Kim	lvx		v30,r10,$sp
2999e71b7053SJung-uk Kim	lvx		v31,r11,$sp
3000e71b7053SJung-uk Kim	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3001e71b7053SJung-uk Kim	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3002e71b7053SJung-uk Kim	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3003e71b7053SJung-uk Kim	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3004e71b7053SJung-uk Kim	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3005e71b7053SJung-uk Kim	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3006e71b7053SJung-uk Kim	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3007e71b7053SJung-uk Kim	blr
3008e71b7053SJung-uk Kim	.long		0
3009e71b7053SJung-uk Kim	.byte		0,12,0x04,1,0x80,6,6,0
3010e71b7053SJung-uk Kim	.long		0
3011e71b7053SJung-uk Kim
3012e71b7053SJung-uk Kim.align	5
3013e71b7053SJung-uk Kim_aesp8_xts_enc5x:
3014e71b7053SJung-uk Kim	vcipher		$out0,$out0,v24
3015e71b7053SJung-uk Kim	vcipher		$out1,$out1,v24
3016e71b7053SJung-uk Kim	vcipher		$out2,$out2,v24
3017e71b7053SJung-uk Kim	vcipher		$out3,$out3,v24
3018e71b7053SJung-uk Kim	vcipher		$out4,$out4,v24
3019e71b7053SJung-uk Kim	lvx		v24,$x20,$key_		# round[3]
3020e71b7053SJung-uk Kim	addi		$key_,$key_,0x20
3021e71b7053SJung-uk Kim
3022e71b7053SJung-uk Kim	vcipher		$out0,$out0,v25
3023e71b7053SJung-uk Kim	vcipher		$out1,$out1,v25
3024e71b7053SJung-uk Kim	vcipher		$out2,$out2,v25
3025e71b7053SJung-uk Kim	vcipher		$out3,$out3,v25
3026e71b7053SJung-uk Kim	vcipher		$out4,$out4,v25
3027e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# round[4]
3028e71b7053SJung-uk Kim	bdnz		_aesp8_xts_enc5x
3029e71b7053SJung-uk Kim
3030e71b7053SJung-uk Kim	add		$inp,$inp,$taillen
3031e71b7053SJung-uk Kim	cmpwi		$taillen,0
3032e71b7053SJung-uk Kim	vcipher		$out0,$out0,v24
3033e71b7053SJung-uk Kim	vcipher		$out1,$out1,v24
3034e71b7053SJung-uk Kim	vcipher		$out2,$out2,v24
3035e71b7053SJung-uk Kim	vcipher		$out3,$out3,v24
3036e71b7053SJung-uk Kim	vcipher		$out4,$out4,v24
3037e71b7053SJung-uk Kim
3038e71b7053SJung-uk Kim	subi		$inp,$inp,16
3039e71b7053SJung-uk Kim	vcipher		$out0,$out0,v25
3040e71b7053SJung-uk Kim	vcipher		$out1,$out1,v25
3041e71b7053SJung-uk Kim	vcipher		$out2,$out2,v25
3042e71b7053SJung-uk Kim	vcipher		$out3,$out3,v25
3043e71b7053SJung-uk Kim	vcipher		$out4,$out4,v25
3044e71b7053SJung-uk Kim	 vxor		$twk0,$twk0,v31
3045e71b7053SJung-uk Kim
3046e71b7053SJung-uk Kim	vcipher		$out0,$out0,v26
3047e71b7053SJung-uk Kim	lvsr		$inpperm,0,$taillen	# $in5 is no more
3048e71b7053SJung-uk Kim	vcipher		$out1,$out1,v26
3049e71b7053SJung-uk Kim	vcipher		$out2,$out2,v26
3050e71b7053SJung-uk Kim	vcipher		$out3,$out3,v26
3051e71b7053SJung-uk Kim	vcipher		$out4,$out4,v26
3052e71b7053SJung-uk Kim	 vxor		$in1,$twk1,v31
3053e71b7053SJung-uk Kim
3054e71b7053SJung-uk Kim	vcipher		$out0,$out0,v27
3055e71b7053SJung-uk Kim	lvx_u		$in0,0,$inp
3056e71b7053SJung-uk Kim	vcipher		$out1,$out1,v27
3057e71b7053SJung-uk Kim	vcipher		$out2,$out2,v27
3058e71b7053SJung-uk Kim	vcipher		$out3,$out3,v27
3059e71b7053SJung-uk Kim	vcipher		$out4,$out4,v27
3060e71b7053SJung-uk Kim	 vxor		$in2,$twk2,v31
3061e71b7053SJung-uk Kim
3062e71b7053SJung-uk Kim	addi		$key_,$sp,$FRAME+15	# rewind $key_
3063e71b7053SJung-uk Kim	vcipher		$out0,$out0,v28
3064e71b7053SJung-uk Kim	vcipher		$out1,$out1,v28
3065e71b7053SJung-uk Kim	vcipher		$out2,$out2,v28
3066e71b7053SJung-uk Kim	vcipher		$out3,$out3,v28
3067e71b7053SJung-uk Kim	vcipher		$out4,$out4,v28
3068e71b7053SJung-uk Kim	lvx		v24,$x00,$key_		# re-pre-load round[1]
3069e71b7053SJung-uk Kim	 vxor		$in3,$twk3,v31
3070e71b7053SJung-uk Kim
3071e71b7053SJung-uk Kim	vcipher		$out0,$out0,v29
3072e71b7053SJung-uk Kim	le?vperm	$in0,$in0,$in0,$leperm
3073e71b7053SJung-uk Kim	vcipher		$out1,$out1,v29
3074e71b7053SJung-uk Kim	vcipher		$out2,$out2,v29
3075e71b7053SJung-uk Kim	vcipher		$out3,$out3,v29
3076e71b7053SJung-uk Kim	vcipher		$out4,$out4,v29
3077e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# re-pre-load round[2]
3078e71b7053SJung-uk Kim	 vxor		$in4,$twk4,v31
3079e71b7053SJung-uk Kim
3080e71b7053SJung-uk Kim	vcipher		$out0,$out0,v30
3081e71b7053SJung-uk Kim	vperm		$in0,$in0,$in0,$inpperm
3082e71b7053SJung-uk Kim	vcipher		$out1,$out1,v30
3083e71b7053SJung-uk Kim	vcipher		$out2,$out2,v30
3084e71b7053SJung-uk Kim	vcipher		$out3,$out3,v30
3085e71b7053SJung-uk Kim	vcipher		$out4,$out4,v30
3086e71b7053SJung-uk Kim
3087e71b7053SJung-uk Kim	vcipherlast	$out0,$out0,$twk0
3088e71b7053SJung-uk Kim	vcipherlast	$out1,$out1,$in1
3089e71b7053SJung-uk Kim	vcipherlast	$out2,$out2,$in2
3090e71b7053SJung-uk Kim	vcipherlast	$out3,$out3,$in3
3091e71b7053SJung-uk Kim	vcipherlast	$out4,$out4,$in4
3092e71b7053SJung-uk Kim	blr
3093e71b7053SJung-uk Kim        .long   	0
3094e71b7053SJung-uk Kim        .byte   	0,12,0x14,0,0,0,0,0
3095e71b7053SJung-uk Kim
3096e71b7053SJung-uk Kim.align	5
3097e71b7053SJung-uk Kim_aesp8_xts_decrypt6x:
3098e71b7053SJung-uk Kim	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3099e71b7053SJung-uk Kim	mflr		r11
3100e71b7053SJung-uk Kim	li		r7,`$FRAME+8*16+15`
3101e71b7053SJung-uk Kim	li		r3,`$FRAME+8*16+31`
3102e71b7053SJung-uk Kim	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3103e71b7053SJung-uk Kim	stvx		v20,r7,$sp		# ABI says so
3104e71b7053SJung-uk Kim	addi		r7,r7,32
3105e71b7053SJung-uk Kim	stvx		v21,r3,$sp
3106e71b7053SJung-uk Kim	addi		r3,r3,32
3107e71b7053SJung-uk Kim	stvx		v22,r7,$sp
3108e71b7053SJung-uk Kim	addi		r7,r7,32
3109e71b7053SJung-uk Kim	stvx		v23,r3,$sp
3110e71b7053SJung-uk Kim	addi		r3,r3,32
3111e71b7053SJung-uk Kim	stvx		v24,r7,$sp
3112e71b7053SJung-uk Kim	addi		r7,r7,32
3113e71b7053SJung-uk Kim	stvx		v25,r3,$sp
3114e71b7053SJung-uk Kim	addi		r3,r3,32
3115e71b7053SJung-uk Kim	stvx		v26,r7,$sp
3116e71b7053SJung-uk Kim	addi		r7,r7,32
3117e71b7053SJung-uk Kim	stvx		v27,r3,$sp
3118e71b7053SJung-uk Kim	addi		r3,r3,32
3119e71b7053SJung-uk Kim	stvx		v28,r7,$sp
3120e71b7053SJung-uk Kim	addi		r7,r7,32
3121e71b7053SJung-uk Kim	stvx		v29,r3,$sp
3122e71b7053SJung-uk Kim	addi		r3,r3,32
3123e71b7053SJung-uk Kim	stvx		v30,r7,$sp
3124e71b7053SJung-uk Kim	stvx		v31,r3,$sp
3125e71b7053SJung-uk Kim	li		r0,-1
3126e71b7053SJung-uk Kim	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
3127e71b7053SJung-uk Kim	li		$x10,0x10
3128e71b7053SJung-uk Kim	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3129e71b7053SJung-uk Kim	li		$x20,0x20
3130e71b7053SJung-uk Kim	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3131e71b7053SJung-uk Kim	li		$x30,0x30
3132e71b7053SJung-uk Kim	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3133e71b7053SJung-uk Kim	li		$x40,0x40
3134e71b7053SJung-uk Kim	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3135e71b7053SJung-uk Kim	li		$x50,0x50
3136e71b7053SJung-uk Kim	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3137e71b7053SJung-uk Kim	li		$x60,0x60
3138e71b7053SJung-uk Kim	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3139e71b7053SJung-uk Kim	li		$x70,0x70
3140e71b7053SJung-uk Kim	mtspr		256,r0
3141e71b7053SJung-uk Kim
3142*a7148ab3SEnji Cooper	# Reverse eighty7 to 0x010101..87
3143*a7148ab3SEnji Cooper	xxlor		2, 32+$eighty7, 32+$eighty7
3144*a7148ab3SEnji Cooper	vsldoi		$eighty7,$tmp,$eighty7,1	# 0x010101..87
3145*a7148ab3SEnji Cooper	xxlor		1, 32+$eighty7, 32+$eighty7
3146*a7148ab3SEnji Cooper
3147*a7148ab3SEnji Cooper	# Load XOR contents. 0xf102132435465768798a9bacbdcedfe
3148*a7148ab3SEnji Cooper	mr		$x70, r6
3149*a7148ab3SEnji Cooper	bl		Lconsts
3150*a7148ab3SEnji Cooper	lxvw4x		0, $x40, r6		# load XOR contents
3151*a7148ab3SEnji Cooper	mr		r6, $x70
3152*a7148ab3SEnji Cooper	li		$x70,0x70
3153*a7148ab3SEnji Cooper
3154e71b7053SJung-uk Kim	subi		$rounds,$rounds,3	# -4 in total
3155e71b7053SJung-uk Kim
3156e71b7053SJung-uk Kim	lvx		$rndkey0,$x00,$key1	# load key schedule
3157e71b7053SJung-uk Kim	lvx		v30,$x10,$key1
3158e71b7053SJung-uk Kim	addi		$key1,$key1,0x20
3159e71b7053SJung-uk Kim	lvx		v31,$x00,$key1
3160e71b7053SJung-uk Kim	?vperm		$rndkey0,$rndkey0,v30,$keyperm
3161e71b7053SJung-uk Kim	addi		$key_,$sp,$FRAME+15
3162e71b7053SJung-uk Kim	mtctr		$rounds
3163e71b7053SJung-uk Kim
3164e71b7053SJung-uk KimLoad_xts_dec_key:
3165e71b7053SJung-uk Kim	?vperm		v24,v30,v31,$keyperm
3166e71b7053SJung-uk Kim	lvx		v30,$x10,$key1
3167e71b7053SJung-uk Kim	addi		$key1,$key1,0x20
3168e71b7053SJung-uk Kim	stvx		v24,$x00,$key_		# off-load round[1]
3169e71b7053SJung-uk Kim	?vperm		v25,v31,v30,$keyperm
3170e71b7053SJung-uk Kim	lvx		v31,$x00,$key1
3171e71b7053SJung-uk Kim	stvx		v25,$x10,$key_		# off-load round[2]
3172e71b7053SJung-uk Kim	addi		$key_,$key_,0x20
3173e71b7053SJung-uk Kim	bdnz		Load_xts_dec_key
3174e71b7053SJung-uk Kim
3175e71b7053SJung-uk Kim	lvx		v26,$x10,$key1
3176e71b7053SJung-uk Kim	?vperm		v24,v30,v31,$keyperm
3177e71b7053SJung-uk Kim	lvx		v27,$x20,$key1
3178e71b7053SJung-uk Kim	stvx		v24,$x00,$key_		# off-load round[3]
3179e71b7053SJung-uk Kim	?vperm		v25,v31,v26,$keyperm
3180e71b7053SJung-uk Kim	lvx		v28,$x30,$key1
3181e71b7053SJung-uk Kim	stvx		v25,$x10,$key_		# off-load round[4]
3182e71b7053SJung-uk Kim	addi		$key_,$sp,$FRAME+15	# rewind $key_
3183e71b7053SJung-uk Kim	?vperm		v26,v26,v27,$keyperm
3184e71b7053SJung-uk Kim	lvx		v29,$x40,$key1
3185e71b7053SJung-uk Kim	?vperm		v27,v27,v28,$keyperm
3186e71b7053SJung-uk Kim	lvx		v30,$x50,$key1
3187e71b7053SJung-uk Kim	?vperm		v28,v28,v29,$keyperm
3188e71b7053SJung-uk Kim	lvx		v31,$x60,$key1
3189e71b7053SJung-uk Kim	?vperm		v29,v29,v30,$keyperm
3190e71b7053SJung-uk Kim	lvx		$twk5,$x70,$key1	# borrow $twk5
3191e71b7053SJung-uk Kim	?vperm		v30,v30,v31,$keyperm
3192e71b7053SJung-uk Kim	lvx		v24,$x00,$key_		# pre-load round[1]
3193e71b7053SJung-uk Kim	?vperm		v31,v31,$twk5,$keyperm
3194e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# pre-load round[2]
3195e71b7053SJung-uk Kim
3196e71b7053SJung-uk Kim	 vperm		$in0,$inout,$inptail,$inpperm
3197e71b7053SJung-uk Kim	 subi		$inp,$inp,31		# undo "caller"
3198e71b7053SJung-uk Kim	vxor		$twk0,$tweak,$rndkey0
3199e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven	# next tweak value
3200e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
3201e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
3202e71b7053SJung-uk Kim	 vxor		$out0,$in0,$twk0
3203*a7148ab3SEnji Cooper	xxlor		32+$in1, 0, 0
3204*a7148ab3SEnji Cooper	vpermxor	$tweak, $tweak, $tmp, $in1
3205e71b7053SJung-uk Kim
3206e71b7053SJung-uk Kim	 lvx_u		$in1,$x10,$inp
3207e71b7053SJung-uk Kim	vxor		$twk1,$tweak,$rndkey0
3208e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven	# next tweak value
3209e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
3210e71b7053SJung-uk Kim	 le?vperm	$in1,$in1,$in1,$leperm
3211e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
3212e71b7053SJung-uk Kim	 vxor		$out1,$in1,$twk1
3213*a7148ab3SEnji Cooper	xxlor		32+$in2, 0, 0
3214*a7148ab3SEnji Cooper	vpermxor	$tweak, $tweak, $tmp, $in2
3215e71b7053SJung-uk Kim
3216e71b7053SJung-uk Kim	 lvx_u		$in2,$x20,$inp
3217e71b7053SJung-uk Kim	 andi.		$taillen,$len,15
3218e71b7053SJung-uk Kim	vxor		$twk2,$tweak,$rndkey0
3219e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven	# next tweak value
3220e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
3221e71b7053SJung-uk Kim	 le?vperm	$in2,$in2,$in2,$leperm
3222e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
3223e71b7053SJung-uk Kim	 vxor		$out2,$in2,$twk2
3224*a7148ab3SEnji Cooper	xxlor		32+$in3, 0, 0
3225*a7148ab3SEnji Cooper	vpermxor	$tweak, $tweak, $tmp, $in3
3226e71b7053SJung-uk Kim
3227e71b7053SJung-uk Kim	 lvx_u		$in3,$x30,$inp
3228e71b7053SJung-uk Kim	 sub		$len,$len,$taillen
3229e71b7053SJung-uk Kim	vxor		$twk3,$tweak,$rndkey0
3230e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven	# next tweak value
3231e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
3232e71b7053SJung-uk Kim	 le?vperm	$in3,$in3,$in3,$leperm
3233e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
3234e71b7053SJung-uk Kim	 vxor		$out3,$in3,$twk3
3235*a7148ab3SEnji Cooper	xxlor		32+$in4, 0, 0
3236*a7148ab3SEnji Cooper	vpermxor	$tweak, $tweak, $tmp, $in4
3237e71b7053SJung-uk Kim
3238e71b7053SJung-uk Kim	 lvx_u		$in4,$x40,$inp
3239e71b7053SJung-uk Kim	 subi		$len,$len,0x60
3240e71b7053SJung-uk Kim	vxor		$twk4,$tweak,$rndkey0
3241e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven	# next tweak value
3242e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
3243e71b7053SJung-uk Kim	 le?vperm	$in4,$in4,$in4,$leperm
3244e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
3245e71b7053SJung-uk Kim	 vxor		$out4,$in4,$twk4
3246*a7148ab3SEnji Cooper	xxlor		32+$in5, 0, 0
3247*a7148ab3SEnji Cooper	vpermxor	$tweak, $tweak, $tmp, $in5
3248e71b7053SJung-uk Kim
3249e71b7053SJung-uk Kim	 lvx_u		$in5,$x50,$inp
3250e71b7053SJung-uk Kim	 addi		$inp,$inp,0x60
3251e71b7053SJung-uk Kim	vxor		$twk5,$tweak,$rndkey0
3252e71b7053SJung-uk Kim	vsrab		$tmp,$tweak,$seven	# next tweak value
3253e71b7053SJung-uk Kim	vaddubm		$tweak,$tweak,$tweak
3254e71b7053SJung-uk Kim	 le?vperm	$in5,$in5,$in5,$leperm
3255e71b7053SJung-uk Kim	vand		$tmp,$tmp,$eighty7
3256e71b7053SJung-uk Kim	 vxor		$out5,$in5,$twk5
3257*a7148ab3SEnji Cooper	xxlor		32+$in0, 0, 0
3258*a7148ab3SEnji Cooper	vpermxor	$tweak, $tweak, $tmp, $in0
3259e71b7053SJung-uk Kim
3260e71b7053SJung-uk Kim	vxor		v31,v31,$rndkey0
3261e71b7053SJung-uk Kim	mtctr		$rounds
3262e71b7053SJung-uk Kim	b		Loop_xts_dec6x
3263e71b7053SJung-uk Kim
3264e71b7053SJung-uk Kim.align	5
3265e71b7053SJung-uk KimLoop_xts_dec6x:
3266e71b7053SJung-uk Kim	vncipher	$out0,$out0,v24
3267e71b7053SJung-uk Kim	vncipher	$out1,$out1,v24
3268e71b7053SJung-uk Kim	vncipher	$out2,$out2,v24
3269e71b7053SJung-uk Kim	vncipher	$out3,$out3,v24
3270e71b7053SJung-uk Kim	vncipher	$out4,$out4,v24
3271e71b7053SJung-uk Kim	vncipher	$out5,$out5,v24
3272e71b7053SJung-uk Kim	lvx		v24,$x20,$key_		# round[3]
3273e71b7053SJung-uk Kim	addi		$key_,$key_,0x20
3274e71b7053SJung-uk Kim
3275e71b7053SJung-uk Kim	vncipher	$out0,$out0,v25
3276e71b7053SJung-uk Kim	vncipher	$out1,$out1,v25
3277e71b7053SJung-uk Kim	vncipher	$out2,$out2,v25
3278e71b7053SJung-uk Kim	vncipher	$out3,$out3,v25
3279e71b7053SJung-uk Kim	vncipher	$out4,$out4,v25
3280e71b7053SJung-uk Kim	vncipher	$out5,$out5,v25
3281e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# round[4]
3282e71b7053SJung-uk Kim	bdnz		Loop_xts_dec6x
3283e71b7053SJung-uk Kim
3284*a7148ab3SEnji Cooper	xxlor		32+$eighty7, 1, 1
3285*a7148ab3SEnji Cooper
3286e71b7053SJung-uk Kim	subic		$len,$len,96		# $len-=96
3287e71b7053SJung-uk Kim	 vxor		$in0,$twk0,v31		# xor with last round key
3288e71b7053SJung-uk Kim	vncipher	$out0,$out0,v24
3289e71b7053SJung-uk Kim	vncipher	$out1,$out1,v24
3290e71b7053SJung-uk Kim	 vsrab		$tmp,$tweak,$seven	# next tweak value
3291e71b7053SJung-uk Kim	 vxor		$twk0,$tweak,$rndkey0
3292e71b7053SJung-uk Kim	 vaddubm	$tweak,$tweak,$tweak
3293e71b7053SJung-uk Kim	vncipher	$out2,$out2,v24
3294e71b7053SJung-uk Kim	vncipher	$out3,$out3,v24
3295e71b7053SJung-uk Kim	vncipher	$out4,$out4,v24
3296e71b7053SJung-uk Kim	vncipher	$out5,$out5,v24
3297e71b7053SJung-uk Kim
3298e71b7053SJung-uk Kim	subfe.		r0,r0,r0		# borrow?-1:0
3299e71b7053SJung-uk Kim	 vand		$tmp,$tmp,$eighty7
3300e71b7053SJung-uk Kim	vncipher	$out0,$out0,v25
3301e71b7053SJung-uk Kim	vncipher	$out1,$out1,v25
3302*a7148ab3SEnji Cooper	 xxlor		32+$in1, 0, 0
3303*a7148ab3SEnji Cooper	 vpermxor	$tweak, $tweak, $tmp, $in1
3304e71b7053SJung-uk Kim	vncipher	$out2,$out2,v25
3305e71b7053SJung-uk Kim	vncipher	$out3,$out3,v25
3306e71b7053SJung-uk Kim	 vxor		$in1,$twk1,v31
3307e71b7053SJung-uk Kim	 vsrab		$tmp,$tweak,$seven	# next tweak value
3308e71b7053SJung-uk Kim	 vxor		$twk1,$tweak,$rndkey0
3309e71b7053SJung-uk Kim	vncipher	$out4,$out4,v25
3310e71b7053SJung-uk Kim	vncipher	$out5,$out5,v25
3311e71b7053SJung-uk Kim
3312e71b7053SJung-uk Kim	and		r0,r0,$len
3313e71b7053SJung-uk Kim	 vaddubm	$tweak,$tweak,$tweak
3314e71b7053SJung-uk Kim	vncipher	$out0,$out0,v26
3315e71b7053SJung-uk Kim	vncipher	$out1,$out1,v26
3316e71b7053SJung-uk Kim	 vand		$tmp,$tmp,$eighty7
3317e71b7053SJung-uk Kim	vncipher	$out2,$out2,v26
3318e71b7053SJung-uk Kim	vncipher	$out3,$out3,v26
3319*a7148ab3SEnji Cooper	 xxlor		32+$in2, 0, 0
3320*a7148ab3SEnji Cooper	 vpermxor	$tweak, $tweak, $tmp, $in2
3321e71b7053SJung-uk Kim	vncipher	$out4,$out4,v26
3322e71b7053SJung-uk Kim	vncipher	$out5,$out5,v26
3323e71b7053SJung-uk Kim
3324e71b7053SJung-uk Kim	add		$inp,$inp,r0		# $inp is adjusted in such
3325e71b7053SJung-uk Kim						# way that at exit from the
3326e71b7053SJung-uk Kim						# loop inX-in5 are loaded
3327e71b7053SJung-uk Kim						# with last "words"
3328e71b7053SJung-uk Kim	 vxor		$in2,$twk2,v31
3329e71b7053SJung-uk Kim	 vsrab		$tmp,$tweak,$seven	# next tweak value
3330e71b7053SJung-uk Kim	 vxor		$twk2,$tweak,$rndkey0
3331e71b7053SJung-uk Kim	 vaddubm	$tweak,$tweak,$tweak
3332e71b7053SJung-uk Kim	vncipher	$out0,$out0,v27
3333e71b7053SJung-uk Kim	vncipher	$out1,$out1,v27
3334e71b7053SJung-uk Kim	vncipher	$out2,$out2,v27
3335e71b7053SJung-uk Kim	vncipher	$out3,$out3,v27
3336e71b7053SJung-uk Kim	 vand		$tmp,$tmp,$eighty7
3337e71b7053SJung-uk Kim	vncipher	$out4,$out4,v27
3338e71b7053SJung-uk Kim	vncipher	$out5,$out5,v27
3339e71b7053SJung-uk Kim
3340e71b7053SJung-uk Kim	addi		$key_,$sp,$FRAME+15	# rewind $key_
3341*a7148ab3SEnji Cooper	 xxlor		32+$in3, 0, 0
3342*a7148ab3SEnji Cooper	 vpermxor	$tweak, $tweak, $tmp, $in3
3343e71b7053SJung-uk Kim	vncipher	$out0,$out0,v28
3344e71b7053SJung-uk Kim	vncipher	$out1,$out1,v28
3345e71b7053SJung-uk Kim	 vxor		$in3,$twk3,v31
3346e71b7053SJung-uk Kim	 vsrab		$tmp,$tweak,$seven	# next tweak value
3347e71b7053SJung-uk Kim	 vxor		$twk3,$tweak,$rndkey0
3348e71b7053SJung-uk Kim	vncipher	$out2,$out2,v28
3349e71b7053SJung-uk Kim	vncipher	$out3,$out3,v28
3350e71b7053SJung-uk Kim	 vaddubm	$tweak,$tweak,$tweak
3351e71b7053SJung-uk Kim	vncipher	$out4,$out4,v28
3352e71b7053SJung-uk Kim	vncipher	$out5,$out5,v28
3353e71b7053SJung-uk Kim	lvx		v24,$x00,$key_		# re-pre-load round[1]
3354e71b7053SJung-uk Kim	 vand		$tmp,$tmp,$eighty7
3355e71b7053SJung-uk Kim
3356e71b7053SJung-uk Kim	vncipher	$out0,$out0,v29
3357e71b7053SJung-uk Kim	vncipher	$out1,$out1,v29
3358*a7148ab3SEnji Cooper	 xxlor		32+$in4, 0, 0
3359*a7148ab3SEnji Cooper	 vpermxor	$tweak, $tweak, $tmp, $in4
3360e71b7053SJung-uk Kim	vncipher	$out2,$out2,v29
3361e71b7053SJung-uk Kim	vncipher	$out3,$out3,v29
3362e71b7053SJung-uk Kim	 vxor		$in4,$twk4,v31
3363e71b7053SJung-uk Kim	 vsrab		$tmp,$tweak,$seven	# next tweak value
3364e71b7053SJung-uk Kim	 vxor		$twk4,$tweak,$rndkey0
3365e71b7053SJung-uk Kim	vncipher	$out4,$out4,v29
3366e71b7053SJung-uk Kim	vncipher	$out5,$out5,v29
3367e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# re-pre-load round[2]
3368e71b7053SJung-uk Kim	 vaddubm	$tweak,$tweak,$tweak
3369e71b7053SJung-uk Kim
3370e71b7053SJung-uk Kim	vncipher	$out0,$out0,v30
3371e71b7053SJung-uk Kim	vncipher	$out1,$out1,v30
3372e71b7053SJung-uk Kim	 vand		$tmp,$tmp,$eighty7
3373e71b7053SJung-uk Kim	vncipher	$out2,$out2,v30
3374e71b7053SJung-uk Kim	vncipher	$out3,$out3,v30
3375*a7148ab3SEnji Cooper	 xxlor		32+$in5, 0, 0
3376*a7148ab3SEnji Cooper	 vpermxor	$tweak, $tweak, $tmp, $in5
3377e71b7053SJung-uk Kim	vncipher	$out4,$out4,v30
3378e71b7053SJung-uk Kim	vncipher	$out5,$out5,v30
3379e71b7053SJung-uk Kim	 vxor		$in5,$twk5,v31
3380e71b7053SJung-uk Kim	 vsrab		$tmp,$tweak,$seven	# next tweak value
3381e71b7053SJung-uk Kim	 vxor		$twk5,$tweak,$rndkey0
3382e71b7053SJung-uk Kim
3383e71b7053SJung-uk Kim	vncipherlast	$out0,$out0,$in0
3384e71b7053SJung-uk Kim	 lvx_u		$in0,$x00,$inp		# load next input block
3385e71b7053SJung-uk Kim	 vaddubm	$tweak,$tweak,$tweak
3386e71b7053SJung-uk Kim	vncipherlast	$out1,$out1,$in1
3387e71b7053SJung-uk Kim	 lvx_u		$in1,$x10,$inp
3388e71b7053SJung-uk Kim	vncipherlast	$out2,$out2,$in2
3389e71b7053SJung-uk Kim	 le?vperm	$in0,$in0,$in0,$leperm
3390e71b7053SJung-uk Kim	 lvx_u		$in2,$x20,$inp
3391e71b7053SJung-uk Kim	 vand		$tmp,$tmp,$eighty7
3392e71b7053SJung-uk Kim	vncipherlast	$out3,$out3,$in3
3393e71b7053SJung-uk Kim	 le?vperm	$in1,$in1,$in1,$leperm
3394e71b7053SJung-uk Kim	 lvx_u		$in3,$x30,$inp
3395e71b7053SJung-uk Kim	vncipherlast	$out4,$out4,$in4
3396e71b7053SJung-uk Kim	 le?vperm	$in2,$in2,$in2,$leperm
3397e71b7053SJung-uk Kim	 lvx_u		$in4,$x40,$inp
3398*a7148ab3SEnji Cooper	 xxlor		10, 32+$in0, 32+$in0
3399*a7148ab3SEnji Cooper	 xxlor		32+$in0, 0, 0
3400*a7148ab3SEnji Cooper	 vpermxor	$tweak, $tweak, $tmp, $in0
3401*a7148ab3SEnji Cooper	 xxlor		32+$in0, 10, 10
3402e71b7053SJung-uk Kim	vncipherlast	$out5,$out5,$in5
3403e71b7053SJung-uk Kim	 le?vperm	$in3,$in3,$in3,$leperm
3404e71b7053SJung-uk Kim	 lvx_u		$in5,$x50,$inp
3405e71b7053SJung-uk Kim	 addi		$inp,$inp,0x60
3406e71b7053SJung-uk Kim	 le?vperm	$in4,$in4,$in4,$leperm
3407e71b7053SJung-uk Kim	 le?vperm	$in5,$in5,$in5,$leperm
3408e71b7053SJung-uk Kim
3409e71b7053SJung-uk Kim	le?vperm	$out0,$out0,$out0,$leperm
3410e71b7053SJung-uk Kim	le?vperm	$out1,$out1,$out1,$leperm
3411e71b7053SJung-uk Kim	stvx_u		$out0,$x00,$out		# store output
3412e71b7053SJung-uk Kim	 vxor		$out0,$in0,$twk0
3413e71b7053SJung-uk Kim	le?vperm	$out2,$out2,$out2,$leperm
3414e71b7053SJung-uk Kim	stvx_u		$out1,$x10,$out
3415e71b7053SJung-uk Kim	 vxor		$out1,$in1,$twk1
3416e71b7053SJung-uk Kim	le?vperm	$out3,$out3,$out3,$leperm
3417e71b7053SJung-uk Kim	stvx_u		$out2,$x20,$out
3418e71b7053SJung-uk Kim	 vxor		$out2,$in2,$twk2
3419e71b7053SJung-uk Kim	le?vperm	$out4,$out4,$out4,$leperm
3420e71b7053SJung-uk Kim	stvx_u		$out3,$x30,$out
3421e71b7053SJung-uk Kim	 vxor		$out3,$in3,$twk3
3422e71b7053SJung-uk Kim	le?vperm	$out5,$out5,$out5,$leperm
3423e71b7053SJung-uk Kim	stvx_u		$out4,$x40,$out
3424e71b7053SJung-uk Kim	 vxor		$out4,$in4,$twk4
3425e71b7053SJung-uk Kim	stvx_u		$out5,$x50,$out
3426e71b7053SJung-uk Kim	 vxor		$out5,$in5,$twk5
3427e71b7053SJung-uk Kim	addi		$out,$out,0x60
3428e71b7053SJung-uk Kim
3429e71b7053SJung-uk Kim	mtctr		$rounds
3430e71b7053SJung-uk Kim	beq		Loop_xts_dec6x		# did $len-=96 borrow?
3431e71b7053SJung-uk Kim
3432*a7148ab3SEnji Cooper	xxlor		32+$eighty7, 2, 2
3433*a7148ab3SEnji Cooper
3434e71b7053SJung-uk Kim	addic.		$len,$len,0x60
3435e71b7053SJung-uk Kim	beq		Lxts_dec6x_zero
3436e71b7053SJung-uk Kim	cmpwi		$len,0x20
3437e71b7053SJung-uk Kim	blt		Lxts_dec6x_one
3438e71b7053SJung-uk Kim	nop
3439e71b7053SJung-uk Kim	beq		Lxts_dec6x_two
3440e71b7053SJung-uk Kim	cmpwi		$len,0x40
3441e71b7053SJung-uk Kim	blt		Lxts_dec6x_three
3442e71b7053SJung-uk Kim	nop
3443e71b7053SJung-uk Kim	beq		Lxts_dec6x_four
3444e71b7053SJung-uk Kim
3445e71b7053SJung-uk KimLxts_dec6x_five:
3446e71b7053SJung-uk Kim	vxor		$out0,$in1,$twk0
3447e71b7053SJung-uk Kim	vxor		$out1,$in2,$twk1
3448e71b7053SJung-uk Kim	vxor		$out2,$in3,$twk2
3449e71b7053SJung-uk Kim	vxor		$out3,$in4,$twk3
3450e71b7053SJung-uk Kim	vxor		$out4,$in5,$twk4
3451e71b7053SJung-uk Kim
3452e71b7053SJung-uk Kim	bl		_aesp8_xts_dec5x
3453e71b7053SJung-uk Kim
3454e71b7053SJung-uk Kim	le?vperm	$out0,$out0,$out0,$leperm
3455e71b7053SJung-uk Kim	vmr		$twk0,$twk5		# unused tweak
3456e71b7053SJung-uk Kim	vxor		$twk1,$tweak,$rndkey0
3457e71b7053SJung-uk Kim	le?vperm	$out1,$out1,$out1,$leperm
3458e71b7053SJung-uk Kim	stvx_u		$out0,$x00,$out		# store output
3459e71b7053SJung-uk Kim	vxor		$out0,$in0,$twk1
3460e71b7053SJung-uk Kim	le?vperm	$out2,$out2,$out2,$leperm
3461e71b7053SJung-uk Kim	stvx_u		$out1,$x10,$out
3462e71b7053SJung-uk Kim	le?vperm	$out3,$out3,$out3,$leperm
3463e71b7053SJung-uk Kim	stvx_u		$out2,$x20,$out
3464e71b7053SJung-uk Kim	le?vperm	$out4,$out4,$out4,$leperm
3465e71b7053SJung-uk Kim	stvx_u		$out3,$x30,$out
3466e71b7053SJung-uk Kim	stvx_u		$out4,$x40,$out
3467e71b7053SJung-uk Kim	addi		$out,$out,0x50
3468e71b7053SJung-uk Kim	bne		Lxts_dec6x_steal
3469e71b7053SJung-uk Kim	b		Lxts_dec6x_done
3470e71b7053SJung-uk Kim
3471e71b7053SJung-uk Kim.align	4
3472e71b7053SJung-uk KimLxts_dec6x_four:
3473e71b7053SJung-uk Kim	vxor		$out0,$in2,$twk0
3474e71b7053SJung-uk Kim	vxor		$out1,$in3,$twk1
3475e71b7053SJung-uk Kim	vxor		$out2,$in4,$twk2
3476e71b7053SJung-uk Kim	vxor		$out3,$in5,$twk3
3477e71b7053SJung-uk Kim	vxor		$out4,$out4,$out4
3478e71b7053SJung-uk Kim
3479e71b7053SJung-uk Kim	bl		_aesp8_xts_dec5x
3480e71b7053SJung-uk Kim
3481e71b7053SJung-uk Kim	le?vperm	$out0,$out0,$out0,$leperm
3482e71b7053SJung-uk Kim	vmr		$twk0,$twk4		# unused tweak
3483e71b7053SJung-uk Kim	vmr		$twk1,$twk5
3484e71b7053SJung-uk Kim	le?vperm	$out1,$out1,$out1,$leperm
3485e71b7053SJung-uk Kim	stvx_u		$out0,$x00,$out		# store output
3486e71b7053SJung-uk Kim	vxor		$out0,$in0,$twk5
3487e71b7053SJung-uk Kim	le?vperm	$out2,$out2,$out2,$leperm
3488e71b7053SJung-uk Kim	stvx_u		$out1,$x10,$out
3489e71b7053SJung-uk Kim	le?vperm	$out3,$out3,$out3,$leperm
3490e71b7053SJung-uk Kim	stvx_u		$out2,$x20,$out
3491e71b7053SJung-uk Kim	stvx_u		$out3,$x30,$out
3492e71b7053SJung-uk Kim	addi		$out,$out,0x40
3493e71b7053SJung-uk Kim	bne		Lxts_dec6x_steal
3494e71b7053SJung-uk Kim	b		Lxts_dec6x_done
3495e71b7053SJung-uk Kim
3496e71b7053SJung-uk Kim.align	4
3497e71b7053SJung-uk KimLxts_dec6x_three:
3498e71b7053SJung-uk Kim	vxor		$out0,$in3,$twk0
3499e71b7053SJung-uk Kim	vxor		$out1,$in4,$twk1
3500e71b7053SJung-uk Kim	vxor		$out2,$in5,$twk2
3501e71b7053SJung-uk Kim	vxor		$out3,$out3,$out3
3502e71b7053SJung-uk Kim	vxor		$out4,$out4,$out4
3503e71b7053SJung-uk Kim
3504e71b7053SJung-uk Kim	bl		_aesp8_xts_dec5x
3505e71b7053SJung-uk Kim
3506e71b7053SJung-uk Kim	le?vperm	$out0,$out0,$out0,$leperm
3507e71b7053SJung-uk Kim	vmr		$twk0,$twk3		# unused tweak
3508e71b7053SJung-uk Kim	vmr		$twk1,$twk4
3509e71b7053SJung-uk Kim	le?vperm	$out1,$out1,$out1,$leperm
3510e71b7053SJung-uk Kim	stvx_u		$out0,$x00,$out		# store output
3511e71b7053SJung-uk Kim	vxor		$out0,$in0,$twk4
3512e71b7053SJung-uk Kim	le?vperm	$out2,$out2,$out2,$leperm
3513e71b7053SJung-uk Kim	stvx_u		$out1,$x10,$out
3514e71b7053SJung-uk Kim	stvx_u		$out2,$x20,$out
3515e71b7053SJung-uk Kim	addi		$out,$out,0x30
3516e71b7053SJung-uk Kim	bne		Lxts_dec6x_steal
3517e71b7053SJung-uk Kim	b		Lxts_dec6x_done
3518e71b7053SJung-uk Kim
3519e71b7053SJung-uk Kim.align	4
3520e71b7053SJung-uk KimLxts_dec6x_two:
3521e71b7053SJung-uk Kim	vxor		$out0,$in4,$twk0
3522e71b7053SJung-uk Kim	vxor		$out1,$in5,$twk1
3523e71b7053SJung-uk Kim	vxor		$out2,$out2,$out2
3524e71b7053SJung-uk Kim	vxor		$out3,$out3,$out3
3525e71b7053SJung-uk Kim	vxor		$out4,$out4,$out4
3526e71b7053SJung-uk Kim
3527e71b7053SJung-uk Kim	bl		_aesp8_xts_dec5x
3528e71b7053SJung-uk Kim
3529e71b7053SJung-uk Kim	le?vperm	$out0,$out0,$out0,$leperm
3530e71b7053SJung-uk Kim	vmr		$twk0,$twk2		# unused tweak
3531e71b7053SJung-uk Kim	vmr		$twk1,$twk3
3532e71b7053SJung-uk Kim	le?vperm	$out1,$out1,$out1,$leperm
3533e71b7053SJung-uk Kim	stvx_u		$out0,$x00,$out		# store output
3534e71b7053SJung-uk Kim	vxor		$out0,$in0,$twk3
3535e71b7053SJung-uk Kim	stvx_u		$out1,$x10,$out
3536e71b7053SJung-uk Kim	addi		$out,$out,0x20
3537e71b7053SJung-uk Kim	bne		Lxts_dec6x_steal
3538e71b7053SJung-uk Kim	b		Lxts_dec6x_done
3539e71b7053SJung-uk Kim
3540e71b7053SJung-uk Kim.align	4
3541e71b7053SJung-uk KimLxts_dec6x_one:
3542e71b7053SJung-uk Kim	vxor		$out0,$in5,$twk0
3543e71b7053SJung-uk Kim	nop
3544e71b7053SJung-uk KimLoop_xts_dec1x:
3545e71b7053SJung-uk Kim	vncipher	$out0,$out0,v24
3546e71b7053SJung-uk Kim	lvx		v24,$x20,$key_		# round[3]
3547e71b7053SJung-uk Kim	addi		$key_,$key_,0x20
3548e71b7053SJung-uk Kim
3549e71b7053SJung-uk Kim	vncipher	$out0,$out0,v25
3550e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# round[4]
3551e71b7053SJung-uk Kim	bdnz		Loop_xts_dec1x
3552e71b7053SJung-uk Kim
3553e71b7053SJung-uk Kim	subi		r0,$taillen,1
3554e71b7053SJung-uk Kim	vncipher	$out0,$out0,v24
3555e71b7053SJung-uk Kim
3556e71b7053SJung-uk Kim	andi.		r0,r0,16
3557e71b7053SJung-uk Kim	cmpwi		$taillen,0
3558e71b7053SJung-uk Kim	vncipher	$out0,$out0,v25
3559e71b7053SJung-uk Kim
3560e71b7053SJung-uk Kim	sub		$inp,$inp,r0
3561e71b7053SJung-uk Kim	vncipher	$out0,$out0,v26
3562e71b7053SJung-uk Kim
3563e71b7053SJung-uk Kim	lvx_u		$in0,0,$inp
3564e71b7053SJung-uk Kim	vncipher	$out0,$out0,v27
3565e71b7053SJung-uk Kim
3566e71b7053SJung-uk Kim	addi		$key_,$sp,$FRAME+15	# rewind $key_
3567e71b7053SJung-uk Kim	vncipher	$out0,$out0,v28
3568e71b7053SJung-uk Kim	lvx		v24,$x00,$key_		# re-pre-load round[1]
3569e71b7053SJung-uk Kim
3570e71b7053SJung-uk Kim	vncipher	$out0,$out0,v29
3571e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# re-pre-load round[2]
3572e71b7053SJung-uk Kim	 vxor		$twk0,$twk0,v31
3573e71b7053SJung-uk Kim
3574e71b7053SJung-uk Kim	le?vperm	$in0,$in0,$in0,$leperm
3575e71b7053SJung-uk Kim	vncipher	$out0,$out0,v30
3576e71b7053SJung-uk Kim
3577e71b7053SJung-uk Kim	mtctr		$rounds
3578e71b7053SJung-uk Kim	vncipherlast	$out0,$out0,$twk0
3579e71b7053SJung-uk Kim
3580e71b7053SJung-uk Kim	vmr		$twk0,$twk1		# unused tweak
3581e71b7053SJung-uk Kim	vmr		$twk1,$twk2
3582e71b7053SJung-uk Kim	le?vperm	$out0,$out0,$out0,$leperm
3583e71b7053SJung-uk Kim	stvx_u		$out0,$x00,$out		# store output
3584e71b7053SJung-uk Kim	addi		$out,$out,0x10
3585e71b7053SJung-uk Kim	vxor		$out0,$in0,$twk2
3586e71b7053SJung-uk Kim	bne		Lxts_dec6x_steal
3587e71b7053SJung-uk Kim	b		Lxts_dec6x_done
3588e71b7053SJung-uk Kim
3589e71b7053SJung-uk Kim.align	4
3590e71b7053SJung-uk KimLxts_dec6x_zero:
3591e71b7053SJung-uk Kim	cmpwi		$taillen,0
3592e71b7053SJung-uk Kim	beq		Lxts_dec6x_done
3593e71b7053SJung-uk Kim
3594e71b7053SJung-uk Kim	lvx_u		$in0,0,$inp
3595e71b7053SJung-uk Kim	le?vperm	$in0,$in0,$in0,$leperm
3596e71b7053SJung-uk Kim	vxor		$out0,$in0,$twk1
3597e71b7053SJung-uk KimLxts_dec6x_steal:
3598e71b7053SJung-uk Kim	vncipher	$out0,$out0,v24
3599e71b7053SJung-uk Kim	lvx		v24,$x20,$key_		# round[3]
3600e71b7053SJung-uk Kim	addi		$key_,$key_,0x20
3601e71b7053SJung-uk Kim
3602e71b7053SJung-uk Kim	vncipher	$out0,$out0,v25
3603e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# round[4]
3604e71b7053SJung-uk Kim	bdnz		Lxts_dec6x_steal
3605e71b7053SJung-uk Kim
3606e71b7053SJung-uk Kim	add		$inp,$inp,$taillen
3607e71b7053SJung-uk Kim	vncipher	$out0,$out0,v24
3608e71b7053SJung-uk Kim
3609e71b7053SJung-uk Kim	cmpwi		$taillen,0
3610e71b7053SJung-uk Kim	vncipher	$out0,$out0,v25
3611e71b7053SJung-uk Kim
3612e71b7053SJung-uk Kim	lvx_u		$in0,0,$inp
3613e71b7053SJung-uk Kim	vncipher	$out0,$out0,v26
3614e71b7053SJung-uk Kim
3615e71b7053SJung-uk Kim	lvsr		$inpperm,0,$taillen	# $in5 is no more
3616e71b7053SJung-uk Kim	vncipher	$out0,$out0,v27
3617e71b7053SJung-uk Kim
3618e71b7053SJung-uk Kim	addi		$key_,$sp,$FRAME+15	# rewind $key_
3619e71b7053SJung-uk Kim	vncipher	$out0,$out0,v28
3620e71b7053SJung-uk Kim	lvx		v24,$x00,$key_		# re-pre-load round[1]
3621e71b7053SJung-uk Kim
3622e71b7053SJung-uk Kim	vncipher	$out0,$out0,v29
3623e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# re-pre-load round[2]
3624e71b7053SJung-uk Kim	 vxor		$twk1,$twk1,v31
3625e71b7053SJung-uk Kim
3626e71b7053SJung-uk Kim	le?vperm	$in0,$in0,$in0,$leperm
3627e71b7053SJung-uk Kim	vncipher	$out0,$out0,v30
3628e71b7053SJung-uk Kim
3629e71b7053SJung-uk Kim	vperm		$in0,$in0,$in0,$inpperm
3630e71b7053SJung-uk Kim	vncipherlast	$tmp,$out0,$twk1
3631e71b7053SJung-uk Kim
3632e71b7053SJung-uk Kim	le?vperm	$out0,$tmp,$tmp,$leperm
3633e71b7053SJung-uk Kim	le?stvx_u	$out0,0,$out
3634e71b7053SJung-uk Kim	be?stvx_u	$tmp,0,$out
3635e71b7053SJung-uk Kim
3636e71b7053SJung-uk Kim	vxor		$out0,$out0,$out0
3637e71b7053SJung-uk Kim	vspltisb	$out1,-1
3638e71b7053SJung-uk Kim	vperm		$out0,$out0,$out1,$inpperm
3639e71b7053SJung-uk Kim	vsel		$out0,$in0,$tmp,$out0
3640e71b7053SJung-uk Kim	vxor		$out0,$out0,$twk0
3641e71b7053SJung-uk Kim
3642e71b7053SJung-uk Kim	subi		r30,$out,1
3643e71b7053SJung-uk Kim	mtctr		$taillen
3644e71b7053SJung-uk KimLoop_xts_dec6x_steal:
3645e71b7053SJung-uk Kim	lbzu		r0,1(r30)
3646e71b7053SJung-uk Kim	stb		r0,16(r30)
3647e71b7053SJung-uk Kim	bdnz		Loop_xts_dec6x_steal
3648e71b7053SJung-uk Kim
3649e71b7053SJung-uk Kim	li		$taillen,0
3650e71b7053SJung-uk Kim	mtctr		$rounds
3651e71b7053SJung-uk Kim	b		Loop_xts_dec1x		# one more time...
3652e71b7053SJung-uk Kim
3653e71b7053SJung-uk Kim.align	4
3654e71b7053SJung-uk KimLxts_dec6x_done:
3655e71b7053SJung-uk Kim	${UCMP}i	$ivp,0
3656e71b7053SJung-uk Kim	beq		Lxts_dec6x_ret
3657e71b7053SJung-uk Kim
3658e71b7053SJung-uk Kim	vxor		$tweak,$twk0,$rndkey0
3659e71b7053SJung-uk Kim	le?vperm	$tweak,$tweak,$tweak,$leperm
3660e71b7053SJung-uk Kim	stvx_u		$tweak,0,$ivp
3661e71b7053SJung-uk Kim
3662e71b7053SJung-uk KimLxts_dec6x_ret:
3663e71b7053SJung-uk Kim	mtlr		r11
3664e71b7053SJung-uk Kim	li		r10,`$FRAME+15`
3665e71b7053SJung-uk Kim	li		r11,`$FRAME+31`
3666e71b7053SJung-uk Kim	stvx		$seven,r10,$sp		# wipe copies of round keys
3667e71b7053SJung-uk Kim	addi		r10,r10,32
3668e71b7053SJung-uk Kim	stvx		$seven,r11,$sp
3669e71b7053SJung-uk Kim	addi		r11,r11,32
3670e71b7053SJung-uk Kim	stvx		$seven,r10,$sp
3671e71b7053SJung-uk Kim	addi		r10,r10,32
3672e71b7053SJung-uk Kim	stvx		$seven,r11,$sp
3673e71b7053SJung-uk Kim	addi		r11,r11,32
3674e71b7053SJung-uk Kim	stvx		$seven,r10,$sp
3675e71b7053SJung-uk Kim	addi		r10,r10,32
3676e71b7053SJung-uk Kim	stvx		$seven,r11,$sp
3677e71b7053SJung-uk Kim	addi		r11,r11,32
3678e71b7053SJung-uk Kim	stvx		$seven,r10,$sp
3679e71b7053SJung-uk Kim	addi		r10,r10,32
3680e71b7053SJung-uk Kim	stvx		$seven,r11,$sp
3681e71b7053SJung-uk Kim	addi		r11,r11,32
3682e71b7053SJung-uk Kim
3683e71b7053SJung-uk Kim	mtspr		256,$vrsave
3684e71b7053SJung-uk Kim	lvx		v20,r10,$sp		# ABI says so
3685e71b7053SJung-uk Kim	addi		r10,r10,32
3686e71b7053SJung-uk Kim	lvx		v21,r11,$sp
3687e71b7053SJung-uk Kim	addi		r11,r11,32
3688e71b7053SJung-uk Kim	lvx		v22,r10,$sp
3689e71b7053SJung-uk Kim	addi		r10,r10,32
3690e71b7053SJung-uk Kim	lvx		v23,r11,$sp
3691e71b7053SJung-uk Kim	addi		r11,r11,32
3692e71b7053SJung-uk Kim	lvx		v24,r10,$sp
3693e71b7053SJung-uk Kim	addi		r10,r10,32
3694e71b7053SJung-uk Kim	lvx		v25,r11,$sp
3695e71b7053SJung-uk Kim	addi		r11,r11,32
3696e71b7053SJung-uk Kim	lvx		v26,r10,$sp
3697e71b7053SJung-uk Kim	addi		r10,r10,32
3698e71b7053SJung-uk Kim	lvx		v27,r11,$sp
3699e71b7053SJung-uk Kim	addi		r11,r11,32
3700e71b7053SJung-uk Kim	lvx		v28,r10,$sp
3701e71b7053SJung-uk Kim	addi		r10,r10,32
3702e71b7053SJung-uk Kim	lvx		v29,r11,$sp
3703e71b7053SJung-uk Kim	addi		r11,r11,32
3704e71b7053SJung-uk Kim	lvx		v30,r10,$sp
3705e71b7053SJung-uk Kim	lvx		v31,r11,$sp
3706e71b7053SJung-uk Kim	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3707e71b7053SJung-uk Kim	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3708e71b7053SJung-uk Kim	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3709e71b7053SJung-uk Kim	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3710e71b7053SJung-uk Kim	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3711e71b7053SJung-uk Kim	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3712e71b7053SJung-uk Kim	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3713e71b7053SJung-uk Kim	blr
3714e71b7053SJung-uk Kim	.long		0
3715e71b7053SJung-uk Kim	.byte		0,12,0x04,1,0x80,6,6,0
3716e71b7053SJung-uk Kim	.long		0
3717e71b7053SJung-uk Kim
3718e71b7053SJung-uk Kim.align	5
3719e71b7053SJung-uk Kim_aesp8_xts_dec5x:
3720e71b7053SJung-uk Kim	vncipher	$out0,$out0,v24
3721e71b7053SJung-uk Kim	vncipher	$out1,$out1,v24
3722e71b7053SJung-uk Kim	vncipher	$out2,$out2,v24
3723e71b7053SJung-uk Kim	vncipher	$out3,$out3,v24
3724e71b7053SJung-uk Kim	vncipher	$out4,$out4,v24
3725e71b7053SJung-uk Kim	lvx		v24,$x20,$key_		# round[3]
3726e71b7053SJung-uk Kim	addi		$key_,$key_,0x20
3727e71b7053SJung-uk Kim
3728e71b7053SJung-uk Kim	vncipher	$out0,$out0,v25
3729e71b7053SJung-uk Kim	vncipher	$out1,$out1,v25
3730e71b7053SJung-uk Kim	vncipher	$out2,$out2,v25
3731e71b7053SJung-uk Kim	vncipher	$out3,$out3,v25
3732e71b7053SJung-uk Kim	vncipher	$out4,$out4,v25
3733e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# round[4]
3734e71b7053SJung-uk Kim	bdnz		_aesp8_xts_dec5x
3735e71b7053SJung-uk Kim
3736e71b7053SJung-uk Kim	subi		r0,$taillen,1
3737e71b7053SJung-uk Kim	vncipher	$out0,$out0,v24
3738e71b7053SJung-uk Kim	vncipher	$out1,$out1,v24
3739e71b7053SJung-uk Kim	vncipher	$out2,$out2,v24
3740e71b7053SJung-uk Kim	vncipher	$out3,$out3,v24
3741e71b7053SJung-uk Kim	vncipher	$out4,$out4,v24
3742e71b7053SJung-uk Kim
3743e71b7053SJung-uk Kim	andi.		r0,r0,16
3744e71b7053SJung-uk Kim	cmpwi		$taillen,0
3745e71b7053SJung-uk Kim	vncipher	$out0,$out0,v25
3746e71b7053SJung-uk Kim	vncipher	$out1,$out1,v25
3747e71b7053SJung-uk Kim	vncipher	$out2,$out2,v25
3748e71b7053SJung-uk Kim	vncipher	$out3,$out3,v25
3749e71b7053SJung-uk Kim	vncipher	$out4,$out4,v25
3750e71b7053SJung-uk Kim	 vxor		$twk0,$twk0,v31
3751e71b7053SJung-uk Kim
3752e71b7053SJung-uk Kim	sub		$inp,$inp,r0
3753e71b7053SJung-uk Kim	vncipher	$out0,$out0,v26
3754e71b7053SJung-uk Kim	vncipher	$out1,$out1,v26
3755e71b7053SJung-uk Kim	vncipher	$out2,$out2,v26
3756e71b7053SJung-uk Kim	vncipher	$out3,$out3,v26
3757e71b7053SJung-uk Kim	vncipher	$out4,$out4,v26
3758e71b7053SJung-uk Kim	 vxor		$in1,$twk1,v31
3759e71b7053SJung-uk Kim
3760e71b7053SJung-uk Kim	vncipher	$out0,$out0,v27
3761e71b7053SJung-uk Kim	lvx_u		$in0,0,$inp
3762e71b7053SJung-uk Kim	vncipher	$out1,$out1,v27
3763e71b7053SJung-uk Kim	vncipher	$out2,$out2,v27
3764e71b7053SJung-uk Kim	vncipher	$out3,$out3,v27
3765e71b7053SJung-uk Kim	vncipher	$out4,$out4,v27
3766e71b7053SJung-uk Kim	 vxor		$in2,$twk2,v31
3767e71b7053SJung-uk Kim
3768e71b7053SJung-uk Kim	addi		$key_,$sp,$FRAME+15	# rewind $key_
3769e71b7053SJung-uk Kim	vncipher	$out0,$out0,v28
3770e71b7053SJung-uk Kim	vncipher	$out1,$out1,v28
3771e71b7053SJung-uk Kim	vncipher	$out2,$out2,v28
3772e71b7053SJung-uk Kim	vncipher	$out3,$out3,v28
3773e71b7053SJung-uk Kim	vncipher	$out4,$out4,v28
3774e71b7053SJung-uk Kim	lvx		v24,$x00,$key_		# re-pre-load round[1]
3775e71b7053SJung-uk Kim	 vxor		$in3,$twk3,v31
3776e71b7053SJung-uk Kim
3777e71b7053SJung-uk Kim	vncipher	$out0,$out0,v29
3778e71b7053SJung-uk Kim	le?vperm	$in0,$in0,$in0,$leperm
3779e71b7053SJung-uk Kim	vncipher	$out1,$out1,v29
3780e71b7053SJung-uk Kim	vncipher	$out2,$out2,v29
3781e71b7053SJung-uk Kim	vncipher	$out3,$out3,v29
3782e71b7053SJung-uk Kim	vncipher	$out4,$out4,v29
3783e71b7053SJung-uk Kim	lvx		v25,$x10,$key_		# re-pre-load round[2]
3784e71b7053SJung-uk Kim	 vxor		$in4,$twk4,v31
3785e71b7053SJung-uk Kim
3786e71b7053SJung-uk Kim	vncipher	$out0,$out0,v30
3787e71b7053SJung-uk Kim	vncipher	$out1,$out1,v30
3788e71b7053SJung-uk Kim	vncipher	$out2,$out2,v30
3789e71b7053SJung-uk Kim	vncipher	$out3,$out3,v30
3790e71b7053SJung-uk Kim	vncipher	$out4,$out4,v30
3791e71b7053SJung-uk Kim
3792e71b7053SJung-uk Kim	vncipherlast	$out0,$out0,$twk0
3793e71b7053SJung-uk Kim	vncipherlast	$out1,$out1,$in1
3794e71b7053SJung-uk Kim	vncipherlast	$out2,$out2,$in2
3795e71b7053SJung-uk Kim	vncipherlast	$out3,$out3,$in3
3796e71b7053SJung-uk Kim	vncipherlast	$out4,$out4,$in4
3797e71b7053SJung-uk Kim	mtctr		$rounds
3798e71b7053SJung-uk Kim	blr
3799e71b7053SJung-uk Kim        .long   	0
3800e71b7053SJung-uk Kim        .byte   	0,12,0x14,0,0,0,0,0
3801e71b7053SJung-uk Kim___
3802e71b7053SJung-uk Kim}}	}}}
3803e71b7053SJung-uk Kim
38047bded2dbSJung-uk Kimmy $consts=1;
38057bded2dbSJung-uk Kimforeach(split("\n",$code)) {
38067bded2dbSJung-uk Kim        s/\`([^\`]*)\`/eval($1)/geo;
38077bded2dbSJung-uk Kim
38087bded2dbSJung-uk Kim	# constants table endian-specific conversion
38097bded2dbSJung-uk Kim	if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
38107bded2dbSJung-uk Kim	    my $conv=$3;
38117bded2dbSJung-uk Kim	    my @bytes=();
38127bded2dbSJung-uk Kim
38137bded2dbSJung-uk Kim	    # convert to endian-agnostic format
38147bded2dbSJung-uk Kim	    if ($1 eq "long") {
38157bded2dbSJung-uk Kim	      foreach (split(/,\s*/,$2)) {
38167bded2dbSJung-uk Kim		my $l = /^0/?oct:int;
38177bded2dbSJung-uk Kim		push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
38187bded2dbSJung-uk Kim	      }
38197bded2dbSJung-uk Kim	    } else {
38207bded2dbSJung-uk Kim		@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
38217bded2dbSJung-uk Kim	    }
38227bded2dbSJung-uk Kim
38237bded2dbSJung-uk Kim	    # little-endian conversion
38247bded2dbSJung-uk Kim	    if ($flavour =~ /le$/o) {
38257bded2dbSJung-uk Kim		SWITCH: for($conv)  {
38267bded2dbSJung-uk Kim		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
38277bded2dbSJung-uk Kim		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
38287bded2dbSJung-uk Kim		}
38297bded2dbSJung-uk Kim	    }
38307bded2dbSJung-uk Kim
38317bded2dbSJung-uk Kim	    #emit
38327bded2dbSJung-uk Kim	    print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
38337bded2dbSJung-uk Kim	    next;
38347bded2dbSJung-uk Kim	}
38357bded2dbSJung-uk Kim	$consts=0 if (m/Lconsts:/o);	# end of table
38367bded2dbSJung-uk Kim
38377bded2dbSJung-uk Kim	# instructions prefixed with '?' are endian-specific and need
38387bded2dbSJung-uk Kim	# to be adjusted accordingly...
38397bded2dbSJung-uk Kim	if ($flavour =~ /le$/o) {	# little-endian
38407bded2dbSJung-uk Kim	    s/le\?//o		or
38417bded2dbSJung-uk Kim	    s/be\?/#be#/o	or
38427bded2dbSJung-uk Kim	    s/\?lvsr/lvsl/o	or
38437bded2dbSJung-uk Kim	    s/\?lvsl/lvsr/o	or
38447bded2dbSJung-uk Kim	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
38457bded2dbSJung-uk Kim	    s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
38467bded2dbSJung-uk Kim	    s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
38477bded2dbSJung-uk Kim	} else {			# big-endian
38487bded2dbSJung-uk Kim	    s/le\?/#le#/o	or
38497bded2dbSJung-uk Kim	    s/be\?//o		or
38507bded2dbSJung-uk Kim	    s/\?([a-z]+)/$1/o;
38517bded2dbSJung-uk Kim	}
38527bded2dbSJung-uk Kim
38537bded2dbSJung-uk Kim        print $_,"\n";
38547bded2dbSJung-uk Kim}
38557bded2dbSJung-uk Kim
385617f01e99SJung-uk Kimclose STDOUT or die "error closing STDOUT: $!";
3857