1*e7be843bSPierre Pronchery#! /usr/bin/env perl 2*e7be843bSPierre Pronchery# Copyright 2020-2025 The OpenSSL Project Authors. All Rights Reserved. 3*e7be843bSPierre Pronchery# 4*e7be843bSPierre Pronchery# Licensed under the Apache License 2.0 (the "License"). You may not use 5*e7be843bSPierre Pronchery# this file except in compliance with the License. You can obtain a copy 6*e7be843bSPierre Pronchery# in the file LICENSE in the source distribution or at 7*e7be843bSPierre Pronchery# https://www.openssl.org/source/license.html 8*e7be843bSPierre Pronchery 9*e7be843bSPierre Pronchery# 10*e7be843bSPierre Pronchery# This module implements SM4 with ASIMD on aarch64 11*e7be843bSPierre Pronchery# 12*e7be843bSPierre Pronchery# Feb 2022 13*e7be843bSPierre Pronchery# 14*e7be843bSPierre Pronchery 15*e7be843bSPierre Pronchery# $output is the last argument if it looks like a file (it has an extension) 16*e7be843bSPierre Pronchery# $flavour is the first argument if it doesn't look like a file 17*e7be843bSPierre Pronchery$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 18*e7be843bSPierre Pronchery$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 19*e7be843bSPierre Pronchery 20*e7be843bSPierre Pronchery$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 21*e7be843bSPierre Pronchery( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or 22*e7be843bSPierre Pronchery( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or 23*e7be843bSPierre Proncherydie "can't locate arm-xlate.pl"; 24*e7be843bSPierre Pronchery 25*e7be843bSPierre Proncheryopen OUT,"| \"$^X\" $xlate $flavour \"$output\"" 26*e7be843bSPierre Pronchery or die "can't call $xlate: $!"; 27*e7be843bSPierre Pronchery*STDOUT=*OUT; 28*e7be843bSPierre Pronchery 29*e7be843bSPierre Pronchery$prefix="vpsm4"; 30*e7be843bSPierre Proncherymy @vtmp=map("v$_",(0..3)); 31*e7be843bSPierre Proncherymy @qtmp=map("q$_",(0..3)); 32*e7be843bSPierre Proncherymy @data=map("v$_",(4..7)); 33*e7be843bSPierre Proncherymy @datax=map("v$_",(8..11)); 34*e7be843bSPierre Proncherymy ($rk0,$rk1)=("v12","v13"); 35*e7be843bSPierre Proncherymy ($rka,$rkb)=("v14","v15"); 36*e7be843bSPierre Proncherymy @vtmpx=map("v$_",(12..15)); 37*e7be843bSPierre Proncherymy @sbox=map("v$_",(16..31)); 38*e7be843bSPierre Proncherymy ($inp,$outp,$blocks,$rks)=("x0","x1","w2","x3"); 39*e7be843bSPierre Proncherymy ($tmpw,$tmp,$wtmp0,$wtmp1,$wtmp2)=("w6","x6","w7","w8","w9"); 40*e7be843bSPierre Proncherymy ($xtmp1,$xtmp2)=("x8","x9"); 41*e7be843bSPierre Proncherymy ($ptr,$counter)=("x10","w11"); 42*e7be843bSPierre Proncherymy ($word0,$word1,$word2,$word3)=("w12","w13","w14","w15"); 43*e7be843bSPierre Pronchery 44*e7be843bSPierre Proncherysub rev32() { 45*e7be843bSPierre Pronchery my $dst = shift; 46*e7be843bSPierre Pronchery my $src = shift; 47*e7be843bSPierre Pronchery 48*e7be843bSPierre Pronchery if ($src and ("$src" ne "$dst")) { 49*e7be843bSPierre Pronchery$code.=<<___; 50*e7be843bSPierre Pronchery#ifndef __AARCH64EB__ 51*e7be843bSPierre Pronchery rev32 $dst.16b,$src.16b 52*e7be843bSPierre Pronchery#else 53*e7be843bSPierre Pronchery mov $dst.16b,$src.16b 54*e7be843bSPierre Pronchery#endif 55*e7be843bSPierre Pronchery___ 56*e7be843bSPierre Pronchery } else { 57*e7be843bSPierre Pronchery$code.=<<___; 58*e7be843bSPierre Pronchery#ifndef __AARCH64EB__ 59*e7be843bSPierre Pronchery rev32 $dst.16b,$dst.16b 60*e7be843bSPierre Pronchery#endif 61*e7be843bSPierre Pronchery___ 62*e7be843bSPierre Pronchery } 63*e7be843bSPierre Pronchery} 64*e7be843bSPierre Pronchery 65*e7be843bSPierre Proncherysub rev32_armeb() { 66*e7be843bSPierre Pronchery my $dst = shift; 67*e7be843bSPierre Pronchery my $src = shift; 68*e7be843bSPierre Pronchery 69*e7be843bSPierre Pronchery if ($src and ("$src" ne "$dst")) { 70*e7be843bSPierre Pronchery$code.=<<___; 71*e7be843bSPierre Pronchery#ifdef __AARCH64EB__ 72*e7be843bSPierre Pronchery rev32 $dst.16b,$src.16b 73*e7be843bSPierre Pronchery#else 74*e7be843bSPierre Pronchery mov $dst.16b,$src.16b 75*e7be843bSPierre Pronchery#endif 76*e7be843bSPierre Pronchery___ 77*e7be843bSPierre Pronchery } else { 78*e7be843bSPierre Pronchery$code.=<<___; 79*e7be843bSPierre Pronchery#ifdef __AARCH64EB__ 80*e7be843bSPierre Pronchery rev32 $dst.16b,$dst.16b 81*e7be843bSPierre Pronchery#endif 82*e7be843bSPierre Pronchery___ 83*e7be843bSPierre Pronchery } 84*e7be843bSPierre Pronchery} 85*e7be843bSPierre Pronchery 86*e7be843bSPierre Proncherysub rbit() { 87*e7be843bSPierre Pronchery my $dst = shift; 88*e7be843bSPierre Pronchery my $src = shift; 89*e7be843bSPierre Pronchery my $std = shift; 90*e7be843bSPierre Pronchery 91*e7be843bSPierre Pronchery if ($src and ("$src" ne "$dst")) { 92*e7be843bSPierre Pronchery if ($std eq "_gb") { 93*e7be843bSPierre Pronchery$code.=<<___; 94*e7be843bSPierre Pronchery rbit $dst.16b,$src.16b 95*e7be843bSPierre Pronchery___ 96*e7be843bSPierre Pronchery } else { 97*e7be843bSPierre Pronchery$code.=<<___; 98*e7be843bSPierre Pronchery mov $dst.16b,$src.16b 99*e7be843bSPierre Pronchery___ 100*e7be843bSPierre Pronchery } 101*e7be843bSPierre Pronchery } else { 102*e7be843bSPierre Pronchery if ($std eq "_gb") { 103*e7be843bSPierre Pronchery$code.=<<___; 104*e7be843bSPierre Pronchery rbit $dst.16b,$src.16b 105*e7be843bSPierre Pronchery___ 106*e7be843bSPierre Pronchery } 107*e7be843bSPierre Pronchery } 108*e7be843bSPierre Pronchery} 109*e7be843bSPierre Pronchery 110*e7be843bSPierre Proncherysub transpose() { 111*e7be843bSPierre Pronchery my ($dat0,$dat1,$dat2,$dat3,$vt0,$vt1,$vt2,$vt3) = @_; 112*e7be843bSPierre Pronchery 113*e7be843bSPierre Pronchery$code.=<<___; 114*e7be843bSPierre Pronchery zip1 $vt0.4s,$dat0.4s,$dat1.4s 115*e7be843bSPierre Pronchery zip2 $vt1.4s,$dat0.4s,$dat1.4s 116*e7be843bSPierre Pronchery zip1 $vt2.4s,$dat2.4s,$dat3.4s 117*e7be843bSPierre Pronchery zip2 $vt3.4s,$dat2.4s,$dat3.4s 118*e7be843bSPierre Pronchery zip1 $dat0.2d,$vt0.2d,$vt2.2d 119*e7be843bSPierre Pronchery zip2 $dat1.2d,$vt0.2d,$vt2.2d 120*e7be843bSPierre Pronchery zip1 $dat2.2d,$vt1.2d,$vt3.2d 121*e7be843bSPierre Pronchery zip2 $dat3.2d,$vt1.2d,$vt3.2d 122*e7be843bSPierre Pronchery___ 123*e7be843bSPierre Pronchery} 124*e7be843bSPierre Pronchery 125*e7be843bSPierre Pronchery# sbox operations for 4-lane of words 126*e7be843bSPierre Proncherysub sbox() { 127*e7be843bSPierre Pronchery my $dat = shift; 128*e7be843bSPierre Pronchery 129*e7be843bSPierre Pronchery$code.=<<___; 130*e7be843bSPierre Pronchery movi @vtmp[0].16b,#64 131*e7be843bSPierre Pronchery movi @vtmp[1].16b,#128 132*e7be843bSPierre Pronchery movi @vtmp[2].16b,#192 133*e7be843bSPierre Pronchery sub @vtmp[0].16b,$dat.16b,@vtmp[0].16b 134*e7be843bSPierre Pronchery sub @vtmp[1].16b,$dat.16b,@vtmp[1].16b 135*e7be843bSPierre Pronchery sub @vtmp[2].16b,$dat.16b,@vtmp[2].16b 136*e7be843bSPierre Pronchery tbl $dat.16b,{@sbox[0].16b,@sbox[1].16b,@sbox[2].16b,@sbox[3].16b},$dat.16b 137*e7be843bSPierre Pronchery tbl @vtmp[0].16b,{@sbox[4].16b,@sbox[5].16b,@sbox[6].16b,@sbox[7].16b},@vtmp[0].16b 138*e7be843bSPierre Pronchery tbl @vtmp[1].16b,{@sbox[8].16b,@sbox[9].16b,@sbox[10].16b,@sbox[11].16b},@vtmp[1].16b 139*e7be843bSPierre Pronchery tbl @vtmp[2].16b,{@sbox[12].16b,@sbox[13].16b,@sbox[14].16b,@sbox[15].16b},@vtmp[2].16b 140*e7be843bSPierre Pronchery add @vtmp[0].2d,@vtmp[0].2d,@vtmp[1].2d 141*e7be843bSPierre Pronchery add @vtmp[2].2d,@vtmp[2].2d,$dat.2d 142*e7be843bSPierre Pronchery add $dat.2d,@vtmp[0].2d,@vtmp[2].2d 143*e7be843bSPierre Pronchery 144*e7be843bSPierre Pronchery ushr @vtmp[0].4s,$dat.4s,32-2 145*e7be843bSPierre Pronchery sli @vtmp[0].4s,$dat.4s,2 146*e7be843bSPierre Pronchery ushr @vtmp[2].4s,$dat.4s,32-10 147*e7be843bSPierre Pronchery eor @vtmp[1].16b,@vtmp[0].16b,$dat.16b 148*e7be843bSPierre Pronchery sli @vtmp[2].4s,$dat.4s,10 149*e7be843bSPierre Pronchery eor @vtmp[1].16b,@vtmp[2].16b,$vtmp[1].16b 150*e7be843bSPierre Pronchery ushr @vtmp[0].4s,$dat.4s,32-18 151*e7be843bSPierre Pronchery sli @vtmp[0].4s,$dat.4s,18 152*e7be843bSPierre Pronchery ushr @vtmp[2].4s,$dat.4s,32-24 153*e7be843bSPierre Pronchery eor @vtmp[1].16b,@vtmp[0].16b,$vtmp[1].16b 154*e7be843bSPierre Pronchery sli @vtmp[2].4s,$dat.4s,24 155*e7be843bSPierre Pronchery eor $dat.16b,@vtmp[2].16b,@vtmp[1].16b 156*e7be843bSPierre Pronchery___ 157*e7be843bSPierre Pronchery} 158*e7be843bSPierre Pronchery 159*e7be843bSPierre Pronchery# sbox operation for 8-lane of words 160*e7be843bSPierre Proncherysub sbox_double() { 161*e7be843bSPierre Pronchery my $dat = shift; 162*e7be843bSPierre Pronchery my $datx = shift; 163*e7be843bSPierre Pronchery 164*e7be843bSPierre Pronchery$code.=<<___; 165*e7be843bSPierre Pronchery movi @vtmp[3].16b,#64 166*e7be843bSPierre Pronchery sub @vtmp[0].16b,$dat.16b,@vtmp[3].16b 167*e7be843bSPierre Pronchery sub @vtmp[1].16b,@vtmp[0].16b,@vtmp[3].16b 168*e7be843bSPierre Pronchery sub @vtmp[2].16b,@vtmp[1].16b,@vtmp[3].16b 169*e7be843bSPierre Pronchery tbl $dat.16b,{@sbox[0].16b,@sbox[1].16b,@sbox[2].16b,@sbox[3].16b},$dat.16b 170*e7be843bSPierre Pronchery tbl @vtmp[0].16b,{@sbox[4].16b,@sbox[5].16b,@sbox[6].16b,@sbox[7].16b},@vtmp[0].16b 171*e7be843bSPierre Pronchery tbl @vtmp[1].16b,{@sbox[8].16b,@sbox[9].16b,@sbox[10].16b,@sbox[11].16b},@vtmp[1].16b 172*e7be843bSPierre Pronchery tbl @vtmp[2].16b,{@sbox[12].16b,@sbox[13].16b,@sbox[14].16b,@sbox[15].16b},@vtmp[2].16b 173*e7be843bSPierre Pronchery add @vtmp[1].2d,@vtmp[0].2d,@vtmp[1].2d 174*e7be843bSPierre Pronchery add $dat.2d,@vtmp[2].2d,$dat.2d 175*e7be843bSPierre Pronchery add $dat.2d,@vtmp[1].2d,$dat.2d 176*e7be843bSPierre Pronchery 177*e7be843bSPierre Pronchery sub @vtmp[0].16b,$datx.16b,@vtmp[3].16b 178*e7be843bSPierre Pronchery sub @vtmp[1].16b,@vtmp[0].16b,@vtmp[3].16b 179*e7be843bSPierre Pronchery sub @vtmp[2].16b,@vtmp[1].16b,@vtmp[3].16b 180*e7be843bSPierre Pronchery tbl $datx.16b,{@sbox[0].16b,@sbox[1].16b,@sbox[2].16b,@sbox[3].16b},$datx.16b 181*e7be843bSPierre Pronchery tbl @vtmp[0].16b,{@sbox[4].16b,@sbox[5].16b,@sbox[6].16b,@sbox[7].16b},@vtmp[0].16b 182*e7be843bSPierre Pronchery tbl @vtmp[1].16b,{@sbox[8].16b,@sbox[9].16b,@sbox[10].16b,@sbox[11].16b},@vtmp[1].16b 183*e7be843bSPierre Pronchery tbl @vtmp[2].16b,{@sbox[12].16b,@sbox[13].16b,@sbox[14].16b,@sbox[15].16b},@vtmp[2].16b 184*e7be843bSPierre Pronchery add @vtmp[1].2d,@vtmp[0].2d,@vtmp[1].2d 185*e7be843bSPierre Pronchery add $datx.2d,@vtmp[2].2d,$datx.2d 186*e7be843bSPierre Pronchery add $datx.2d,@vtmp[1].2d,$datx.2d 187*e7be843bSPierre Pronchery 188*e7be843bSPierre Pronchery ushr @vtmp[0].4s,$dat.4s,32-2 189*e7be843bSPierre Pronchery sli @vtmp[0].4s,$dat.4s,2 190*e7be843bSPierre Pronchery ushr @vtmp[2].4s,$datx.4s,32-2 191*e7be843bSPierre Pronchery eor @vtmp[1].16b,@vtmp[0].16b,$dat.16b 192*e7be843bSPierre Pronchery sli @vtmp[2].4s,$datx.4s,2 193*e7be843bSPierre Pronchery 194*e7be843bSPierre Pronchery ushr @vtmp[0].4s,$dat.4s,32-10 195*e7be843bSPierre Pronchery eor @vtmp[3].16b,@vtmp[2].16b,$datx.16b 196*e7be843bSPierre Pronchery sli @vtmp[0].4s,$dat.4s,10 197*e7be843bSPierre Pronchery ushr @vtmp[2].4s,$datx.4s,32-10 198*e7be843bSPierre Pronchery eor @vtmp[1].16b,@vtmp[0].16b,$vtmp[1].16b 199*e7be843bSPierre Pronchery sli @vtmp[2].4s,$datx.4s,10 200*e7be843bSPierre Pronchery 201*e7be843bSPierre Pronchery ushr @vtmp[0].4s,$dat.4s,32-18 202*e7be843bSPierre Pronchery eor @vtmp[3].16b,@vtmp[2].16b,$vtmp[3].16b 203*e7be843bSPierre Pronchery sli @vtmp[0].4s,$dat.4s,18 204*e7be843bSPierre Pronchery ushr @vtmp[2].4s,$datx.4s,32-18 205*e7be843bSPierre Pronchery eor @vtmp[1].16b,@vtmp[0].16b,$vtmp[1].16b 206*e7be843bSPierre Pronchery sli @vtmp[2].4s,$datx.4s,18 207*e7be843bSPierre Pronchery 208*e7be843bSPierre Pronchery ushr @vtmp[0].4s,$dat.4s,32-24 209*e7be843bSPierre Pronchery eor @vtmp[3].16b,@vtmp[2].16b,$vtmp[3].16b 210*e7be843bSPierre Pronchery sli @vtmp[0].4s,$dat.4s,24 211*e7be843bSPierre Pronchery ushr @vtmp[2].4s,$datx.4s,32-24 212*e7be843bSPierre Pronchery eor $dat.16b,@vtmp[0].16b,@vtmp[1].16b 213*e7be843bSPierre Pronchery sli @vtmp[2].4s,$datx.4s,24 214*e7be843bSPierre Pronchery eor $datx.16b,@vtmp[2].16b,@vtmp[3].16b 215*e7be843bSPierre Pronchery___ 216*e7be843bSPierre Pronchery} 217*e7be843bSPierre Pronchery 218*e7be843bSPierre Pronchery# sbox operation for one single word 219*e7be843bSPierre Proncherysub sbox_1word () { 220*e7be843bSPierre Pronchery my $word = shift; 221*e7be843bSPierre Pronchery 222*e7be843bSPierre Pronchery$code.=<<___; 223*e7be843bSPierre Pronchery movi @vtmp[1].16b,#64 224*e7be843bSPierre Pronchery movi @vtmp[2].16b,#128 225*e7be843bSPierre Pronchery movi @vtmp[3].16b,#192 226*e7be843bSPierre Pronchery mov @vtmp[0].s[0],$word 227*e7be843bSPierre Pronchery 228*e7be843bSPierre Pronchery sub @vtmp[1].16b,@vtmp[0].16b,@vtmp[1].16b 229*e7be843bSPierre Pronchery sub @vtmp[2].16b,@vtmp[0].16b,@vtmp[2].16b 230*e7be843bSPierre Pronchery sub @vtmp[3].16b,@vtmp[0].16b,@vtmp[3].16b 231*e7be843bSPierre Pronchery 232*e7be843bSPierre Pronchery tbl @vtmp[0].16b,{@sbox[0].16b,@sbox[1].16b,@sbox[2].16b,@sbox[3].16b},@vtmp[0].16b 233*e7be843bSPierre Pronchery tbl @vtmp[1].16b,{@sbox[4].16b,@sbox[5].16b,@sbox[6].16b,@sbox[7].16b},@vtmp[1].16b 234*e7be843bSPierre Pronchery tbl @vtmp[2].16b,{@sbox[8].16b,@sbox[9].16b,@sbox[10].16b,@sbox[11].16b},@vtmp[2].16b 235*e7be843bSPierre Pronchery tbl @vtmp[3].16b,{@sbox[12].16b,@sbox[13].16b,@sbox[14].16b,@sbox[15].16b},@vtmp[3].16b 236*e7be843bSPierre Pronchery 237*e7be843bSPierre Pronchery mov $word,@vtmp[0].s[0] 238*e7be843bSPierre Pronchery mov $wtmp0,@vtmp[1].s[0] 239*e7be843bSPierre Pronchery mov $wtmp2,@vtmp[2].s[0] 240*e7be843bSPierre Pronchery add $wtmp0,$word,$wtmp0 241*e7be843bSPierre Pronchery mov $word,@vtmp[3].s[0] 242*e7be843bSPierre Pronchery add $wtmp0,$wtmp0,$wtmp2 243*e7be843bSPierre Pronchery add $wtmp0,$wtmp0,$word 244*e7be843bSPierre Pronchery 245*e7be843bSPierre Pronchery eor $word,$wtmp0,$wtmp0,ror #32-2 246*e7be843bSPierre Pronchery eor $word,$word,$wtmp0,ror #32-10 247*e7be843bSPierre Pronchery eor $word,$word,$wtmp0,ror #32-18 248*e7be843bSPierre Pronchery eor $word,$word,$wtmp0,ror #32-24 249*e7be843bSPierre Pronchery___ 250*e7be843bSPierre Pronchery} 251*e7be843bSPierre Pronchery 252*e7be843bSPierre Pronchery# sm4 for one block of data, in scalar registers word0/word1/word2/word3 253*e7be843bSPierre Proncherysub sm4_1blk () { 254*e7be843bSPierre Pronchery my $kptr = shift; 255*e7be843bSPierre Pronchery 256*e7be843bSPierre Pronchery$code.=<<___; 257*e7be843bSPierre Pronchery ldp $wtmp0,$wtmp1,[$kptr],8 258*e7be843bSPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 259*e7be843bSPierre Pronchery eor $tmpw,$word2,$word3 260*e7be843bSPierre Pronchery eor $wtmp2,$wtmp0,$word1 261*e7be843bSPierre Pronchery eor $tmpw,$tmpw,$wtmp2 262*e7be843bSPierre Pronchery___ 263*e7be843bSPierre Pronchery &sbox_1word($tmpw); 264*e7be843bSPierre Pronchery$code.=<<___; 265*e7be843bSPierre Pronchery eor $word0,$word0,$tmpw 266*e7be843bSPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 267*e7be843bSPierre Pronchery eor $tmpw,$word2,$word3 268*e7be843bSPierre Pronchery eor $wtmp2,$word0,$wtmp1 269*e7be843bSPierre Pronchery eor $tmpw,$tmpw,$wtmp2 270*e7be843bSPierre Pronchery___ 271*e7be843bSPierre Pronchery &sbox_1word($tmpw); 272*e7be843bSPierre Pronchery$code.=<<___; 273*e7be843bSPierre Pronchery ldp $wtmp0,$wtmp1,[$kptr],8 274*e7be843bSPierre Pronchery eor $word1,$word1,$tmpw 275*e7be843bSPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 276*e7be843bSPierre Pronchery eor $tmpw,$word0,$word1 277*e7be843bSPierre Pronchery eor $wtmp2,$wtmp0,$word3 278*e7be843bSPierre Pronchery eor $tmpw,$tmpw,$wtmp2 279*e7be843bSPierre Pronchery___ 280*e7be843bSPierre Pronchery &sbox_1word($tmpw); 281*e7be843bSPierre Pronchery$code.=<<___; 282*e7be843bSPierre Pronchery eor $word2,$word2,$tmpw 283*e7be843bSPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 284*e7be843bSPierre Pronchery eor $tmpw,$word0,$word1 285*e7be843bSPierre Pronchery eor $wtmp2,$word2,$wtmp1 286*e7be843bSPierre Pronchery eor $tmpw,$tmpw,$wtmp2 287*e7be843bSPierre Pronchery___ 288*e7be843bSPierre Pronchery &sbox_1word($tmpw); 289*e7be843bSPierre Pronchery$code.=<<___; 290*e7be843bSPierre Pronchery eor $word3,$word3,$tmpw 291*e7be843bSPierre Pronchery___ 292*e7be843bSPierre Pronchery} 293*e7be843bSPierre Pronchery 294*e7be843bSPierre Pronchery# sm4 for 4-lanes of data, in neon registers data0/data1/data2/data3 295*e7be843bSPierre Proncherysub sm4_4blks () { 296*e7be843bSPierre Pronchery my $kptr = shift; 297*e7be843bSPierre Pronchery 298*e7be843bSPierre Pronchery$code.=<<___; 299*e7be843bSPierre Pronchery ldp $wtmp0,$wtmp1,[$kptr],8 300*e7be843bSPierre Pronchery dup $rk0.4s,$wtmp0 301*e7be843bSPierre Pronchery dup $rk1.4s,$wtmp1 302*e7be843bSPierre Pronchery 303*e7be843bSPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 304*e7be843bSPierre Pronchery eor $rka.16b,@data[2].16b,@data[3].16b 305*e7be843bSPierre Pronchery eor $rk0.16b,@data[1].16b,$rk0.16b 306*e7be843bSPierre Pronchery eor $rk0.16b,$rka.16b,$rk0.16b 307*e7be843bSPierre Pronchery___ 308*e7be843bSPierre Pronchery &sbox($rk0); 309*e7be843bSPierre Pronchery$code.=<<___; 310*e7be843bSPierre Pronchery eor @data[0].16b,@data[0].16b,$rk0.16b 311*e7be843bSPierre Pronchery 312*e7be843bSPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 313*e7be843bSPierre Pronchery eor $rka.16b,$rka.16b,@data[0].16b 314*e7be843bSPierre Pronchery eor $rk1.16b,$rka.16b,$rk1.16b 315*e7be843bSPierre Pronchery___ 316*e7be843bSPierre Pronchery &sbox($rk1); 317*e7be843bSPierre Pronchery$code.=<<___; 318*e7be843bSPierre Pronchery ldp $wtmp0,$wtmp1,[$kptr],8 319*e7be843bSPierre Pronchery eor @data[1].16b,@data[1].16b,$rk1.16b 320*e7be843bSPierre Pronchery 321*e7be843bSPierre Pronchery dup $rk0.4s,$wtmp0 322*e7be843bSPierre Pronchery dup $rk1.4s,$wtmp1 323*e7be843bSPierre Pronchery 324*e7be843bSPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 325*e7be843bSPierre Pronchery eor $rka.16b,@data[0].16b,@data[1].16b 326*e7be843bSPierre Pronchery eor $rk0.16b,@data[3].16b,$rk0.16b 327*e7be843bSPierre Pronchery eor $rk0.16b,$rka.16b,$rk0.16b 328*e7be843bSPierre Pronchery___ 329*e7be843bSPierre Pronchery &sbox($rk0); 330*e7be843bSPierre Pronchery$code.=<<___; 331*e7be843bSPierre Pronchery eor @data[2].16b,@data[2].16b,$rk0.16b 332*e7be843bSPierre Pronchery 333*e7be843bSPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 334*e7be843bSPierre Pronchery eor $rka.16b,$rka.16b,@data[2].16b 335*e7be843bSPierre Pronchery eor $rk1.16b,$rka.16b,$rk1.16b 336*e7be843bSPierre Pronchery___ 337*e7be843bSPierre Pronchery &sbox($rk1); 338*e7be843bSPierre Pronchery$code.=<<___; 339*e7be843bSPierre Pronchery eor @data[3].16b,@data[3].16b,$rk1.16b 340*e7be843bSPierre Pronchery___ 341*e7be843bSPierre Pronchery} 342*e7be843bSPierre Pronchery 343*e7be843bSPierre Pronchery# sm4 for 8 lanes of data, in neon registers 344*e7be843bSPierre Pronchery# data0/data1/data2/data3 datax0/datax1/datax2/datax3 345*e7be843bSPierre Proncherysub sm4_8blks () { 346*e7be843bSPierre Pronchery my $kptr = shift; 347*e7be843bSPierre Pronchery 348*e7be843bSPierre Pronchery$code.=<<___; 349*e7be843bSPierre Pronchery ldp $wtmp0,$wtmp1,[$kptr],8 350*e7be843bSPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 351*e7be843bSPierre Pronchery dup $rk0.4s,$wtmp0 352*e7be843bSPierre Pronchery eor $rka.16b,@data[2].16b,@data[3].16b 353*e7be843bSPierre Pronchery eor $rkb.16b,@datax[2].16b,@datax[3].16b 354*e7be843bSPierre Pronchery eor @vtmp[0].16b,@data[1].16b,$rk0.16b 355*e7be843bSPierre Pronchery eor @vtmp[1].16b,@datax[1].16b,$rk0.16b 356*e7be843bSPierre Pronchery eor $rk0.16b,$rka.16b,@vtmp[0].16b 357*e7be843bSPierre Pronchery eor $rk1.16b,$rkb.16b,@vtmp[1].16b 358*e7be843bSPierre Pronchery___ 359*e7be843bSPierre Pronchery &sbox_double($rk0,$rk1); 360*e7be843bSPierre Pronchery$code.=<<___; 361*e7be843bSPierre Pronchery eor @data[0].16b,@data[0].16b,$rk0.16b 362*e7be843bSPierre Pronchery eor @datax[0].16b,@datax[0].16b,$rk1.16b 363*e7be843bSPierre Pronchery 364*e7be843bSPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 365*e7be843bSPierre Pronchery dup $rk1.4s,$wtmp1 366*e7be843bSPierre Pronchery eor $rka.16b,$rka.16b,@data[0].16b 367*e7be843bSPierre Pronchery eor $rkb.16b,$rkb.16b,@datax[0].16b 368*e7be843bSPierre Pronchery eor $rk0.16b,$rka.16b,$rk1.16b 369*e7be843bSPierre Pronchery eor $rk1.16b,$rkb.16b,$rk1.16b 370*e7be843bSPierre Pronchery___ 371*e7be843bSPierre Pronchery &sbox_double($rk0,$rk1); 372*e7be843bSPierre Pronchery$code.=<<___; 373*e7be843bSPierre Pronchery ldp $wtmp0,$wtmp1,[$kptr],8 374*e7be843bSPierre Pronchery eor @data[1].16b,@data[1].16b,$rk0.16b 375*e7be843bSPierre Pronchery eor @datax[1].16b,@datax[1].16b,$rk1.16b 376*e7be843bSPierre Pronchery 377*e7be843bSPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 378*e7be843bSPierre Pronchery dup $rk0.4s,$wtmp0 379*e7be843bSPierre Pronchery eor $rka.16b,@data[0].16b,@data[1].16b 380*e7be843bSPierre Pronchery eor $rkb.16b,@datax[0].16b,@datax[1].16b 381*e7be843bSPierre Pronchery eor @vtmp[0].16b,@data[3].16b,$rk0.16b 382*e7be843bSPierre Pronchery eor @vtmp[1].16b,@datax[3].16b,$rk0.16b 383*e7be843bSPierre Pronchery eor $rk0.16b,$rka.16b,@vtmp[0].16b 384*e7be843bSPierre Pronchery eor $rk1.16b,$rkb.16b,@vtmp[1].16b 385*e7be843bSPierre Pronchery___ 386*e7be843bSPierre Pronchery &sbox_double($rk0,$rk1); 387*e7be843bSPierre Pronchery$code.=<<___; 388*e7be843bSPierre Pronchery eor @data[2].16b,@data[2].16b,$rk0.16b 389*e7be843bSPierre Pronchery eor @datax[2].16b,@datax[2].16b,$rk1.16b 390*e7be843bSPierre Pronchery 391*e7be843bSPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 392*e7be843bSPierre Pronchery dup $rk1.4s,$wtmp1 393*e7be843bSPierre Pronchery eor $rka.16b,$rka.16b,@data[2].16b 394*e7be843bSPierre Pronchery eor $rkb.16b,$rkb.16b,@datax[2].16b 395*e7be843bSPierre Pronchery eor $rk0.16b,$rka.16b,$rk1.16b 396*e7be843bSPierre Pronchery eor $rk1.16b,$rkb.16b,$rk1.16b 397*e7be843bSPierre Pronchery___ 398*e7be843bSPierre Pronchery &sbox_double($rk0,$rk1); 399*e7be843bSPierre Pronchery$code.=<<___; 400*e7be843bSPierre Pronchery eor @data[3].16b,@data[3].16b,$rk0.16b 401*e7be843bSPierre Pronchery eor @datax[3].16b,@datax[3].16b,$rk1.16b 402*e7be843bSPierre Pronchery___ 403*e7be843bSPierre Pronchery} 404*e7be843bSPierre Pronchery 405*e7be843bSPierre Proncherysub encrypt_1blk_norev() { 406*e7be843bSPierre Pronchery my $dat = shift; 407*e7be843bSPierre Pronchery 408*e7be843bSPierre Pronchery$code.=<<___; 409*e7be843bSPierre Pronchery mov $ptr,$rks 410*e7be843bSPierre Pronchery mov $counter,#8 411*e7be843bSPierre Pronchery mov $word0,$dat.s[0] 412*e7be843bSPierre Pronchery mov $word1,$dat.s[1] 413*e7be843bSPierre Pronchery mov $word2,$dat.s[2] 414*e7be843bSPierre Pronchery mov $word3,$dat.s[3] 415*e7be843bSPierre Pronchery10: 416*e7be843bSPierre Pronchery___ 417*e7be843bSPierre Pronchery &sm4_1blk($ptr); 418*e7be843bSPierre Pronchery$code.=<<___; 419*e7be843bSPierre Pronchery subs $counter,$counter,#1 420*e7be843bSPierre Pronchery b.ne 10b 421*e7be843bSPierre Pronchery mov $dat.s[0],$word3 422*e7be843bSPierre Pronchery mov $dat.s[1],$word2 423*e7be843bSPierre Pronchery mov $dat.s[2],$word1 424*e7be843bSPierre Pronchery mov $dat.s[3],$word0 425*e7be843bSPierre Pronchery___ 426*e7be843bSPierre Pronchery} 427*e7be843bSPierre Pronchery 428*e7be843bSPierre Proncherysub encrypt_1blk() { 429*e7be843bSPierre Pronchery my $dat = shift; 430*e7be843bSPierre Pronchery 431*e7be843bSPierre Pronchery &encrypt_1blk_norev($dat); 432*e7be843bSPierre Pronchery &rev32($dat,$dat); 433*e7be843bSPierre Pronchery} 434*e7be843bSPierre Pronchery 435*e7be843bSPierre Proncherysub encrypt_4blks() { 436*e7be843bSPierre Pronchery$code.=<<___; 437*e7be843bSPierre Pronchery mov $ptr,$rks 438*e7be843bSPierre Pronchery mov $counter,#8 439*e7be843bSPierre Pronchery10: 440*e7be843bSPierre Pronchery___ 441*e7be843bSPierre Pronchery &sm4_4blks($ptr); 442*e7be843bSPierre Pronchery$code.=<<___; 443*e7be843bSPierre Pronchery subs $counter,$counter,#1 444*e7be843bSPierre Pronchery b.ne 10b 445*e7be843bSPierre Pronchery___ 446*e7be843bSPierre Pronchery &rev32(@vtmp[3],@data[0]); 447*e7be843bSPierre Pronchery &rev32(@vtmp[2],@data[1]); 448*e7be843bSPierre Pronchery &rev32(@vtmp[1],@data[2]); 449*e7be843bSPierre Pronchery &rev32(@vtmp[0],@data[3]); 450*e7be843bSPierre Pronchery} 451*e7be843bSPierre Pronchery 452*e7be843bSPierre Proncherysub encrypt_8blks() { 453*e7be843bSPierre Pronchery$code.=<<___; 454*e7be843bSPierre Pronchery mov $ptr,$rks 455*e7be843bSPierre Pronchery mov $counter,#8 456*e7be843bSPierre Pronchery10: 457*e7be843bSPierre Pronchery___ 458*e7be843bSPierre Pronchery &sm4_8blks($ptr); 459*e7be843bSPierre Pronchery$code.=<<___; 460*e7be843bSPierre Pronchery subs $counter,$counter,#1 461*e7be843bSPierre Pronchery b.ne 10b 462*e7be843bSPierre Pronchery___ 463*e7be843bSPierre Pronchery &rev32(@vtmp[3],@data[0]); 464*e7be843bSPierre Pronchery &rev32(@vtmp[2],@data[1]); 465*e7be843bSPierre Pronchery &rev32(@vtmp[1],@data[2]); 466*e7be843bSPierre Pronchery &rev32(@vtmp[0],@data[3]); 467*e7be843bSPierre Pronchery &rev32(@data[3],@datax[0]); 468*e7be843bSPierre Pronchery &rev32(@data[2],@datax[1]); 469*e7be843bSPierre Pronchery &rev32(@data[1],@datax[2]); 470*e7be843bSPierre Pronchery &rev32(@data[0],@datax[3]); 471*e7be843bSPierre Pronchery} 472*e7be843bSPierre Pronchery 473*e7be843bSPierre Proncherysub load_sbox () { 474*e7be843bSPierre Pronchery my $data = shift; 475*e7be843bSPierre Pronchery 476*e7be843bSPierre Pronchery$code.=<<___; 477*e7be843bSPierre Pronchery adrp $ptr,.Lsbox 478*e7be843bSPierre Pronchery add $ptr,$ptr,#:lo12:.Lsbox 479*e7be843bSPierre Pronchery ld1 {@sbox[0].16b,@sbox[1].16b,@sbox[2].16b,@sbox[3].16b},[$ptr],#64 480*e7be843bSPierre Pronchery ld1 {@sbox[4].16b,@sbox[5].16b,@sbox[6].16b,@sbox[7].16b},[$ptr],#64 481*e7be843bSPierre Pronchery ld1 {@sbox[8].16b,@sbox[9].16b,@sbox[10].16b,@sbox[11].16b},[$ptr],#64 482*e7be843bSPierre Pronchery ld1 {@sbox[12].16b,@sbox[13].16b,@sbox[14].16b,@sbox[15].16b},[$ptr] 483*e7be843bSPierre Pronchery___ 484*e7be843bSPierre Pronchery} 485*e7be843bSPierre Pronchery 486*e7be843bSPierre Pronchery 487*e7be843bSPierre Proncherysub mov_reg_to_vec() { 488*e7be843bSPierre Pronchery my $src0 = shift; 489*e7be843bSPierre Pronchery my $src1 = shift; 490*e7be843bSPierre Pronchery my $desv = shift; 491*e7be843bSPierre Pronchery$code.=<<___; 492*e7be843bSPierre Pronchery mov $desv.d[0],$src0 493*e7be843bSPierre Pronchery mov $desv.d[1],$src1 494*e7be843bSPierre Pronchery___ 495*e7be843bSPierre Pronchery &rev32_armeb($desv,$desv); 496*e7be843bSPierre Pronchery} 497*e7be843bSPierre Pronchery 498*e7be843bSPierre Proncherysub mov_vec_to_reg() { 499*e7be843bSPierre Pronchery my $srcv = shift; 500*e7be843bSPierre Pronchery my $des0 = shift; 501*e7be843bSPierre Pronchery my $des1 = shift; 502*e7be843bSPierre Pronchery$code.=<<___; 503*e7be843bSPierre Pronchery mov $des0,$srcv.d[0] 504*e7be843bSPierre Pronchery mov $des1,$srcv.d[1] 505*e7be843bSPierre Pronchery___ 506*e7be843bSPierre Pronchery} 507*e7be843bSPierre Pronchery 508*e7be843bSPierre Proncherysub compute_tweak() { 509*e7be843bSPierre Pronchery my $src0 = shift; 510*e7be843bSPierre Pronchery my $src1 = shift; 511*e7be843bSPierre Pronchery my $des0 = shift; 512*e7be843bSPierre Pronchery my $des1 = shift; 513*e7be843bSPierre Pronchery$code.=<<___; 514*e7be843bSPierre Pronchery mov $wtmp0,0x87 515*e7be843bSPierre Pronchery extr $xtmp2,$src1,$src1,#32 516*e7be843bSPierre Pronchery extr $des1,$src1,$src0,#63 517*e7be843bSPierre Pronchery and $wtmp1,$wtmp0,$wtmp2,asr#31 518*e7be843bSPierre Pronchery eor $des0,$xtmp1,$src0,lsl#1 519*e7be843bSPierre Pronchery___ 520*e7be843bSPierre Pronchery} 521*e7be843bSPierre Pronchery 522*e7be843bSPierre Proncherysub compute_tweak_vec() { 523*e7be843bSPierre Pronchery my $src = shift; 524*e7be843bSPierre Pronchery my $des = shift; 525*e7be843bSPierre Pronchery my $std = shift; 526*e7be843bSPierre Pronchery &rbit(@vtmp[2],$src,$std); 527*e7be843bSPierre Pronchery$code.=<<___; 528*e7be843bSPierre Pronchery adrp $ptr,.Lxts_magic 529*e7be843bSPierre Pronchery ldr @qtmp[0], [$ptr, #:lo12:.Lxts_magic] 530*e7be843bSPierre Pronchery shl $des.16b, @vtmp[2].16b, #1 531*e7be843bSPierre Pronchery ext @vtmp[1].16b, @vtmp[2].16b, @vtmp[2].16b,#15 532*e7be843bSPierre Pronchery ushr @vtmp[1].16b, @vtmp[1].16b, #7 533*e7be843bSPierre Pronchery mul @vtmp[1].16b, @vtmp[1].16b, @vtmp[0].16b 534*e7be843bSPierre Pronchery eor $des.16b, $des.16b, @vtmp[1].16b 535*e7be843bSPierre Pronchery___ 536*e7be843bSPierre Pronchery &rbit($des,$des,$std); 537*e7be843bSPierre Pronchery} 538*e7be843bSPierre Pronchery 539*e7be843bSPierre Pronchery$code=<<___; 540*e7be843bSPierre Pronchery#include "arm_arch.h" 541*e7be843bSPierre Pronchery.arch armv8-a 542*e7be843bSPierre Pronchery.text 543*e7be843bSPierre Pronchery 544*e7be843bSPierre Pronchery.rodata 545*e7be843bSPierre Pronchery.type _${prefix}_consts,%object 546*e7be843bSPierre Pronchery.align 7 547*e7be843bSPierre Pronchery_${prefix}_consts: 548*e7be843bSPierre Pronchery.Lsbox: 549*e7be843bSPierre Pronchery .byte 0xD6,0x90,0xE9,0xFE,0xCC,0xE1,0x3D,0xB7,0x16,0xB6,0x14,0xC2,0x28,0xFB,0x2C,0x05 550*e7be843bSPierre Pronchery .byte 0x2B,0x67,0x9A,0x76,0x2A,0xBE,0x04,0xC3,0xAA,0x44,0x13,0x26,0x49,0x86,0x06,0x99 551*e7be843bSPierre Pronchery .byte 0x9C,0x42,0x50,0xF4,0x91,0xEF,0x98,0x7A,0x33,0x54,0x0B,0x43,0xED,0xCF,0xAC,0x62 552*e7be843bSPierre Pronchery .byte 0xE4,0xB3,0x1C,0xA9,0xC9,0x08,0xE8,0x95,0x80,0xDF,0x94,0xFA,0x75,0x8F,0x3F,0xA6 553*e7be843bSPierre Pronchery .byte 0x47,0x07,0xA7,0xFC,0xF3,0x73,0x17,0xBA,0x83,0x59,0x3C,0x19,0xE6,0x85,0x4F,0xA8 554*e7be843bSPierre Pronchery .byte 0x68,0x6B,0x81,0xB2,0x71,0x64,0xDA,0x8B,0xF8,0xEB,0x0F,0x4B,0x70,0x56,0x9D,0x35 555*e7be843bSPierre Pronchery .byte 0x1E,0x24,0x0E,0x5E,0x63,0x58,0xD1,0xA2,0x25,0x22,0x7C,0x3B,0x01,0x21,0x78,0x87 556*e7be843bSPierre Pronchery .byte 0xD4,0x00,0x46,0x57,0x9F,0xD3,0x27,0x52,0x4C,0x36,0x02,0xE7,0xA0,0xC4,0xC8,0x9E 557*e7be843bSPierre Pronchery .byte 0xEA,0xBF,0x8A,0xD2,0x40,0xC7,0x38,0xB5,0xA3,0xF7,0xF2,0xCE,0xF9,0x61,0x15,0xA1 558*e7be843bSPierre Pronchery .byte 0xE0,0xAE,0x5D,0xA4,0x9B,0x34,0x1A,0x55,0xAD,0x93,0x32,0x30,0xF5,0x8C,0xB1,0xE3 559*e7be843bSPierre Pronchery .byte 0x1D,0xF6,0xE2,0x2E,0x82,0x66,0xCA,0x60,0xC0,0x29,0x23,0xAB,0x0D,0x53,0x4E,0x6F 560*e7be843bSPierre Pronchery .byte 0xD5,0xDB,0x37,0x45,0xDE,0xFD,0x8E,0x2F,0x03,0xFF,0x6A,0x72,0x6D,0x6C,0x5B,0x51 561*e7be843bSPierre Pronchery .byte 0x8D,0x1B,0xAF,0x92,0xBB,0xDD,0xBC,0x7F,0x11,0xD9,0x5C,0x41,0x1F,0x10,0x5A,0xD8 562*e7be843bSPierre Pronchery .byte 0x0A,0xC1,0x31,0x88,0xA5,0xCD,0x7B,0xBD,0x2D,0x74,0xD0,0x12,0xB8,0xE5,0xB4,0xB0 563*e7be843bSPierre Pronchery .byte 0x89,0x69,0x97,0x4A,0x0C,0x96,0x77,0x7E,0x65,0xB9,0xF1,0x09,0xC5,0x6E,0xC6,0x84 564*e7be843bSPierre Pronchery .byte 0x18,0xF0,0x7D,0xEC,0x3A,0xDC,0x4D,0x20,0x79,0xEE,0x5F,0x3E,0xD7,0xCB,0x39,0x48 565*e7be843bSPierre Pronchery.Lck: 566*e7be843bSPierre Pronchery .long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269 567*e7be843bSPierre Pronchery .long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9 568*e7be843bSPierre Pronchery .long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249 569*e7be843bSPierre Pronchery .long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9 570*e7be843bSPierre Pronchery .long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229 571*e7be843bSPierre Pronchery .long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299 572*e7be843bSPierre Pronchery .long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209 573*e7be843bSPierre Pronchery .long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 574*e7be843bSPierre Pronchery.Lfk: 575*e7be843bSPierre Pronchery .quad 0x56aa3350a3b1bac6,0xb27022dc677d9197 576*e7be843bSPierre Pronchery.Lshuffles: 577*e7be843bSPierre Pronchery .quad 0x0B0A090807060504,0x030201000F0E0D0C 578*e7be843bSPierre Pronchery.Lxts_magic: 579*e7be843bSPierre Pronchery .quad 0x0101010101010187,0x0101010101010101 580*e7be843bSPierre Pronchery 581*e7be843bSPierre Pronchery.size _${prefix}_consts,.-_${prefix}_consts 582*e7be843bSPierre Pronchery 583*e7be843bSPierre Pronchery.previous 584*e7be843bSPierre Pronchery 585*e7be843bSPierre Pronchery___ 586*e7be843bSPierre Pronchery 587*e7be843bSPierre Pronchery{{{ 588*e7be843bSPierre Proncherymy ($key,$keys,$enc)=("x0","x1","w2"); 589*e7be843bSPierre Proncherymy ($pointer,$schedules,$wtmp,$roundkey)=("x5","x6","w7","w8"); 590*e7be843bSPierre Proncherymy ($vkey,$vfk,$vmap)=("v5","v6","v7"); 591*e7be843bSPierre Pronchery$code.=<<___; 592*e7be843bSPierre Pronchery.type _vpsm4_set_key,%function 593*e7be843bSPierre Pronchery.align 4 594*e7be843bSPierre Pronchery_vpsm4_set_key: 595*e7be843bSPierre Pronchery AARCH64_VALID_CALL_TARGET 596*e7be843bSPierre Pronchery ld1 {$vkey.4s},[$key] 597*e7be843bSPierre Pronchery___ 598*e7be843bSPierre Pronchery &load_sbox(); 599*e7be843bSPierre Pronchery &rev32($vkey,$vkey); 600*e7be843bSPierre Pronchery$code.=<<___; 601*e7be843bSPierre Pronchery adrp $pointer,.Lshuffles 602*e7be843bSPierre Pronchery add $pointer,$pointer,#:lo12:.Lshuffles 603*e7be843bSPierre Pronchery ld1 {$vmap.2d},[$pointer] 604*e7be843bSPierre Pronchery adrp $pointer,.Lfk 605*e7be843bSPierre Pronchery add $pointer,$pointer,#:lo12:.Lfk 606*e7be843bSPierre Pronchery ld1 {$vfk.2d},[$pointer] 607*e7be843bSPierre Pronchery eor $vkey.16b,$vkey.16b,$vfk.16b 608*e7be843bSPierre Pronchery mov $schedules,#32 609*e7be843bSPierre Pronchery adrp $pointer,.Lck 610*e7be843bSPierre Pronchery add $pointer,$pointer,#:lo12:.Lck 611*e7be843bSPierre Pronchery movi @vtmp[0].16b,#64 612*e7be843bSPierre Pronchery cbnz $enc,1f 613*e7be843bSPierre Pronchery add $keys,$keys,124 614*e7be843bSPierre Pronchery1: 615*e7be843bSPierre Pronchery mov $wtmp,$vkey.s[1] 616*e7be843bSPierre Pronchery ldr $roundkey,[$pointer],#4 617*e7be843bSPierre Pronchery eor $roundkey,$roundkey,$wtmp 618*e7be843bSPierre Pronchery mov $wtmp,$vkey.s[2] 619*e7be843bSPierre Pronchery eor $roundkey,$roundkey,$wtmp 620*e7be843bSPierre Pronchery mov $wtmp,$vkey.s[3] 621*e7be843bSPierre Pronchery eor $roundkey,$roundkey,$wtmp 622*e7be843bSPierre Pronchery // sbox lookup 623*e7be843bSPierre Pronchery mov @data[0].s[0],$roundkey 624*e7be843bSPierre Pronchery tbl @vtmp[1].16b,{@sbox[0].16b,@sbox[1].16b,@sbox[2].16b,@sbox[3].16b},@data[0].16b 625*e7be843bSPierre Pronchery sub @data[0].16b,@data[0].16b,@vtmp[0].16b 626*e7be843bSPierre Pronchery tbx @vtmp[1].16b,{@sbox[4].16b,@sbox[5].16b,@sbox[6].16b,@sbox[7].16b},@data[0].16b 627*e7be843bSPierre Pronchery sub @data[0].16b,@data[0].16b,@vtmp[0].16b 628*e7be843bSPierre Pronchery tbx @vtmp[1].16b,{@sbox[8].16b,@sbox[9].16b,@sbox[10].16b,@sbox[11].16b},@data[0].16b 629*e7be843bSPierre Pronchery sub @data[0].16b,@data[0].16b,@vtmp[0].16b 630*e7be843bSPierre Pronchery tbx @vtmp[1].16b,{@sbox[12].16b,@sbox[13].16b,@sbox[14].16b,@sbox[15].16b},@data[0].16b 631*e7be843bSPierre Pronchery mov $wtmp,@vtmp[1].s[0] 632*e7be843bSPierre Pronchery eor $roundkey,$wtmp,$wtmp,ror #19 633*e7be843bSPierre Pronchery eor $roundkey,$roundkey,$wtmp,ror #9 634*e7be843bSPierre Pronchery mov $wtmp,$vkey.s[0] 635*e7be843bSPierre Pronchery eor $roundkey,$roundkey,$wtmp 636*e7be843bSPierre Pronchery mov $vkey.s[0],$roundkey 637*e7be843bSPierre Pronchery cbz $enc,2f 638*e7be843bSPierre Pronchery str $roundkey,[$keys],#4 639*e7be843bSPierre Pronchery b 3f 640*e7be843bSPierre Pronchery2: 641*e7be843bSPierre Pronchery str $roundkey,[$keys],#-4 642*e7be843bSPierre Pronchery3: 643*e7be843bSPierre Pronchery tbl $vkey.16b,{$vkey.16b},$vmap.16b 644*e7be843bSPierre Pronchery subs $schedules,$schedules,#1 645*e7be843bSPierre Pronchery b.ne 1b 646*e7be843bSPierre Pronchery ret 647*e7be843bSPierre Pronchery.size _vpsm4_set_key,.-_vpsm4_set_key 648*e7be843bSPierre Pronchery___ 649*e7be843bSPierre Pronchery}}} 650*e7be843bSPierre Pronchery 651*e7be843bSPierre Pronchery 652*e7be843bSPierre Pronchery{{{ 653*e7be843bSPierre Pronchery$code.=<<___; 654*e7be843bSPierre Pronchery.type _vpsm4_enc_4blks,%function 655*e7be843bSPierre Pronchery.align 4 656*e7be843bSPierre Pronchery_vpsm4_enc_4blks: 657*e7be843bSPierre Pronchery AARCH64_VALID_CALL_TARGET 658*e7be843bSPierre Pronchery___ 659*e7be843bSPierre Pronchery &encrypt_4blks(); 660*e7be843bSPierre Pronchery$code.=<<___; 661*e7be843bSPierre Pronchery ret 662*e7be843bSPierre Pronchery.size _vpsm4_enc_4blks,.-_vpsm4_enc_4blks 663*e7be843bSPierre Pronchery___ 664*e7be843bSPierre Pronchery}}} 665*e7be843bSPierre Pronchery 666*e7be843bSPierre Pronchery{{{ 667*e7be843bSPierre Pronchery$code.=<<___; 668*e7be843bSPierre Pronchery.type _vpsm4_enc_8blks,%function 669*e7be843bSPierre Pronchery.align 4 670*e7be843bSPierre Pronchery_vpsm4_enc_8blks: 671*e7be843bSPierre Pronchery AARCH64_VALID_CALL_TARGET 672*e7be843bSPierre Pronchery___ 673*e7be843bSPierre Pronchery &encrypt_8blks(); 674*e7be843bSPierre Pronchery$code.=<<___; 675*e7be843bSPierre Pronchery ret 676*e7be843bSPierre Pronchery.size _vpsm4_enc_8blks,.-_vpsm4_enc_8blks 677*e7be843bSPierre Pronchery___ 678*e7be843bSPierre Pronchery}}} 679*e7be843bSPierre Pronchery 680*e7be843bSPierre Pronchery 681*e7be843bSPierre Pronchery{{{ 682*e7be843bSPierre Proncherymy ($key,$keys)=("x0","x1"); 683*e7be843bSPierre Pronchery$code.=<<___; 684*e7be843bSPierre Pronchery.globl ${prefix}_set_encrypt_key 685*e7be843bSPierre Pronchery.type ${prefix}_set_encrypt_key,%function 686*e7be843bSPierre Pronchery.align 5 687*e7be843bSPierre Pronchery${prefix}_set_encrypt_key: 688*e7be843bSPierre Pronchery AARCH64_SIGN_LINK_REGISTER 689*e7be843bSPierre Pronchery stp x29,x30,[sp,#-16]! 690*e7be843bSPierre Pronchery mov w2,1 691*e7be843bSPierre Pronchery bl _vpsm4_set_key 692*e7be843bSPierre Pronchery ldp x29,x30,[sp],#16 693*e7be843bSPierre Pronchery AARCH64_VALIDATE_LINK_REGISTER 694*e7be843bSPierre Pronchery ret 695*e7be843bSPierre Pronchery.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key 696*e7be843bSPierre Pronchery___ 697*e7be843bSPierre Pronchery}}} 698*e7be843bSPierre Pronchery 699*e7be843bSPierre Pronchery{{{ 700*e7be843bSPierre Proncherymy ($key,$keys)=("x0","x1"); 701*e7be843bSPierre Pronchery$code.=<<___; 702*e7be843bSPierre Pronchery.globl ${prefix}_set_decrypt_key 703*e7be843bSPierre Pronchery.type ${prefix}_set_decrypt_key,%function 704*e7be843bSPierre Pronchery.align 5 705*e7be843bSPierre Pronchery${prefix}_set_decrypt_key: 706*e7be843bSPierre Pronchery AARCH64_SIGN_LINK_REGISTER 707*e7be843bSPierre Pronchery stp x29,x30,[sp,#-16]! 708*e7be843bSPierre Pronchery mov w2,0 709*e7be843bSPierre Pronchery bl _vpsm4_set_key 710*e7be843bSPierre Pronchery ldp x29,x30,[sp],#16 711*e7be843bSPierre Pronchery AARCH64_VALIDATE_LINK_REGISTER 712*e7be843bSPierre Pronchery ret 713*e7be843bSPierre Pronchery.size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key 714*e7be843bSPierre Pronchery___ 715*e7be843bSPierre Pronchery}}} 716*e7be843bSPierre Pronchery 717*e7be843bSPierre Pronchery{{{ 718*e7be843bSPierre Proncherysub gen_block () { 719*e7be843bSPierre Pronchery my $dir = shift; 720*e7be843bSPierre Pronchery my ($inp,$outp,$rk)=map("x$_",(0..2)); 721*e7be843bSPierre Pronchery 722*e7be843bSPierre Pronchery$code.=<<___; 723*e7be843bSPierre Pronchery.globl ${prefix}_${dir}crypt 724*e7be843bSPierre Pronchery.type ${prefix}_${dir}crypt,%function 725*e7be843bSPierre Pronchery.align 5 726*e7be843bSPierre Pronchery${prefix}_${dir}crypt: 727*e7be843bSPierre Pronchery AARCH64_VALID_CALL_TARGET 728*e7be843bSPierre Pronchery ld1 {@data[0].4s},[$inp] 729*e7be843bSPierre Pronchery___ 730*e7be843bSPierre Pronchery &load_sbox(); 731*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 732*e7be843bSPierre Pronchery$code.=<<___; 733*e7be843bSPierre Pronchery mov $rks,x2 734*e7be843bSPierre Pronchery___ 735*e7be843bSPierre Pronchery &encrypt_1blk(@data[0]); 736*e7be843bSPierre Pronchery$code.=<<___; 737*e7be843bSPierre Pronchery st1 {@data[0].4s},[$outp] 738*e7be843bSPierre Pronchery ret 739*e7be843bSPierre Pronchery.size ${prefix}_${dir}crypt,.-${prefix}_${dir}crypt 740*e7be843bSPierre Pronchery___ 741*e7be843bSPierre Pronchery} 742*e7be843bSPierre Pronchery&gen_block("en"); 743*e7be843bSPierre Pronchery&gen_block("de"); 744*e7be843bSPierre Pronchery}}} 745*e7be843bSPierre Pronchery 746*e7be843bSPierre Pronchery{{{ 747*e7be843bSPierre Proncherymy ($enc) = ("w4"); 748*e7be843bSPierre Proncherymy @dat=map("v$_",(16..23)); 749*e7be843bSPierre Pronchery 750*e7be843bSPierre Pronchery$code.=<<___; 751*e7be843bSPierre Pronchery.globl ${prefix}_ecb_encrypt 752*e7be843bSPierre Pronchery.type ${prefix}_ecb_encrypt,%function 753*e7be843bSPierre Pronchery.align 5 754*e7be843bSPierre Pronchery${prefix}_ecb_encrypt: 755*e7be843bSPierre Pronchery AARCH64_SIGN_LINK_REGISTER 756*e7be843bSPierre Pronchery // convert length into blocks 757*e7be843bSPierre Pronchery lsr x2,x2,4 758*e7be843bSPierre Pronchery stp d8,d9,[sp,#-80]! 759*e7be843bSPierre Pronchery stp d10,d11,[sp,#16] 760*e7be843bSPierre Pronchery stp d12,d13,[sp,#32] 761*e7be843bSPierre Pronchery stp d14,d15,[sp,#48] 762*e7be843bSPierre Pronchery stp x29,x30,[sp,#64] 763*e7be843bSPierre Pronchery___ 764*e7be843bSPierre Pronchery &load_sbox(); 765*e7be843bSPierre Pronchery$code.=<<___; 766*e7be843bSPierre Pronchery.Lecb_8_blocks_process: 767*e7be843bSPierre Pronchery cmp $blocks,#8 768*e7be843bSPierre Pronchery b.lt .Lecb_4_blocks_process 769*e7be843bSPierre Pronchery ld4 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$inp],#64 770*e7be843bSPierre Pronchery ld4 {@datax[0].4s,$datax[1].4s,@datax[2].4s,@datax[3].4s},[$inp],#64 771*e7be843bSPierre Pronchery___ 772*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 773*e7be843bSPierre Pronchery &rev32(@data[1],@data[1]); 774*e7be843bSPierre Pronchery &rev32(@data[2],@data[2]); 775*e7be843bSPierre Pronchery &rev32(@data[3],@data[3]); 776*e7be843bSPierre Pronchery &rev32(@datax[0],@datax[0]); 777*e7be843bSPierre Pronchery &rev32(@datax[1],@datax[1]); 778*e7be843bSPierre Pronchery &rev32(@datax[2],@datax[2]); 779*e7be843bSPierre Pronchery &rev32(@datax[3],@datax[3]); 780*e7be843bSPierre Pronchery$code.=<<___; 781*e7be843bSPierre Pronchery bl _vpsm4_enc_8blks 782*e7be843bSPierre Pronchery st4 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s,@vtmp[3].4s},[$outp],#64 783*e7be843bSPierre Pronchery st4 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$outp],#64 784*e7be843bSPierre Pronchery subs $blocks,$blocks,#8 785*e7be843bSPierre Pronchery b.gt .Lecb_8_blocks_process 786*e7be843bSPierre Pronchery b 100f 787*e7be843bSPierre Pronchery.Lecb_4_blocks_process: 788*e7be843bSPierre Pronchery cmp $blocks,#4 789*e7be843bSPierre Pronchery b.lt 1f 790*e7be843bSPierre Pronchery ld4 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$inp],#64 791*e7be843bSPierre Pronchery___ 792*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 793*e7be843bSPierre Pronchery &rev32(@data[1],@data[1]); 794*e7be843bSPierre Pronchery &rev32(@data[2],@data[2]); 795*e7be843bSPierre Pronchery &rev32(@data[3],@data[3]); 796*e7be843bSPierre Pronchery$code.=<<___; 797*e7be843bSPierre Pronchery bl _vpsm4_enc_4blks 798*e7be843bSPierre Pronchery st4 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s,@vtmp[3].4s},[$outp],#64 799*e7be843bSPierre Pronchery sub $blocks,$blocks,#4 800*e7be843bSPierre Pronchery1: 801*e7be843bSPierre Pronchery // process last block 802*e7be843bSPierre Pronchery cmp $blocks,#1 803*e7be843bSPierre Pronchery b.lt 100f 804*e7be843bSPierre Pronchery b.gt 1f 805*e7be843bSPierre Pronchery ld1 {@data[0].4s},[$inp] 806*e7be843bSPierre Pronchery___ 807*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 808*e7be843bSPierre Pronchery &encrypt_1blk(@data[0]); 809*e7be843bSPierre Pronchery$code.=<<___; 810*e7be843bSPierre Pronchery st1 {@data[0].4s},[$outp] 811*e7be843bSPierre Pronchery b 100f 812*e7be843bSPierre Pronchery1: // process last 2 blocks 813*e7be843bSPierre Pronchery ld4 {@data[0].s,@data[1].s,@data[2].s,@data[3].s}[0],[$inp],#16 814*e7be843bSPierre Pronchery ld4 {@data[0].s,@data[1].s,@data[2].s,@data[3].s}[1],[$inp],#16 815*e7be843bSPierre Pronchery cmp $blocks,#2 816*e7be843bSPierre Pronchery b.gt 1f 817*e7be843bSPierre Pronchery___ 818*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 819*e7be843bSPierre Pronchery &rev32(@data[1],@data[1]); 820*e7be843bSPierre Pronchery &rev32(@data[2],@data[2]); 821*e7be843bSPierre Pronchery &rev32(@data[3],@data[3]); 822*e7be843bSPierre Pronchery$code.=<<___; 823*e7be843bSPierre Pronchery bl _vpsm4_enc_4blks 824*e7be843bSPierre Pronchery st4 {@vtmp[0].s-@vtmp[3].s}[0],[$outp],#16 825*e7be843bSPierre Pronchery st4 {@vtmp[0].s-@vtmp[3].s}[1],[$outp] 826*e7be843bSPierre Pronchery b 100f 827*e7be843bSPierre Pronchery1: // process last 3 blocks 828*e7be843bSPierre Pronchery ld4 {@data[0].s,@data[1].s,@data[2].s,@data[3].s}[2],[$inp],#16 829*e7be843bSPierre Pronchery___ 830*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 831*e7be843bSPierre Pronchery &rev32(@data[1],@data[1]); 832*e7be843bSPierre Pronchery &rev32(@data[2],@data[2]); 833*e7be843bSPierre Pronchery &rev32(@data[3],@data[3]); 834*e7be843bSPierre Pronchery$code.=<<___; 835*e7be843bSPierre Pronchery bl _vpsm4_enc_4blks 836*e7be843bSPierre Pronchery st4 {@vtmp[0].s-@vtmp[3].s}[0],[$outp],#16 837*e7be843bSPierre Pronchery st4 {@vtmp[0].s-@vtmp[3].s}[1],[$outp],#16 838*e7be843bSPierre Pronchery st4 {@vtmp[0].s-@vtmp[3].s}[2],[$outp] 839*e7be843bSPierre Pronchery100: 840*e7be843bSPierre Pronchery ldp d10,d11,[sp,#16] 841*e7be843bSPierre Pronchery ldp d12,d13,[sp,#32] 842*e7be843bSPierre Pronchery ldp d14,d15,[sp,#48] 843*e7be843bSPierre Pronchery ldp x29,x30,[sp,#64] 844*e7be843bSPierre Pronchery ldp d8,d9,[sp],#80 845*e7be843bSPierre Pronchery AARCH64_VALIDATE_LINK_REGISTER 846*e7be843bSPierre Pronchery ret 847*e7be843bSPierre Pronchery.size ${prefix}_ecb_encrypt,.-${prefix}_ecb_encrypt 848*e7be843bSPierre Pronchery___ 849*e7be843bSPierre Pronchery}}} 850*e7be843bSPierre Pronchery 851*e7be843bSPierre Pronchery{{{ 852*e7be843bSPierre Proncherymy ($len,$ivp,$enc)=("x2","x4","w5"); 853*e7be843bSPierre Proncherymy $ivec0=("v3"); 854*e7be843bSPierre Proncherymy $ivec1=("v15"); 855*e7be843bSPierre Pronchery 856*e7be843bSPierre Pronchery$code.=<<___; 857*e7be843bSPierre Pronchery.globl ${prefix}_cbc_encrypt 858*e7be843bSPierre Pronchery.type ${prefix}_cbc_encrypt,%function 859*e7be843bSPierre Pronchery.align 5 860*e7be843bSPierre Pronchery${prefix}_cbc_encrypt: 861*e7be843bSPierre Pronchery AARCH64_VALID_CALL_TARGET 862*e7be843bSPierre Pronchery lsr $len,$len,4 863*e7be843bSPierre Pronchery___ 864*e7be843bSPierre Pronchery &load_sbox(); 865*e7be843bSPierre Pronchery$code.=<<___; 866*e7be843bSPierre Pronchery cbz $enc,.Ldec 867*e7be843bSPierre Pronchery ld1 {$ivec0.4s},[$ivp] 868*e7be843bSPierre Pronchery.Lcbc_4_blocks_enc: 869*e7be843bSPierre Pronchery cmp $blocks,#4 870*e7be843bSPierre Pronchery b.lt 1f 871*e7be843bSPierre Pronchery ld1 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$inp],#64 872*e7be843bSPierre Pronchery eor @data[0].16b,@data[0].16b,$ivec0.16b 873*e7be843bSPierre Pronchery___ 874*e7be843bSPierre Pronchery &rev32(@data[1],@data[1]); 875*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 876*e7be843bSPierre Pronchery &rev32(@data[2],@data[2]); 877*e7be843bSPierre Pronchery &rev32(@data[3],@data[3]); 878*e7be843bSPierre Pronchery &encrypt_1blk_norev(@data[0]); 879*e7be843bSPierre Pronchery$code.=<<___; 880*e7be843bSPierre Pronchery eor @data[1].16b,@data[1].16b,@data[0].16b 881*e7be843bSPierre Pronchery___ 882*e7be843bSPierre Pronchery &encrypt_1blk_norev(@data[1]); 883*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 884*e7be843bSPierre Pronchery 885*e7be843bSPierre Pronchery$code.=<<___; 886*e7be843bSPierre Pronchery eor @data[2].16b,@data[2].16b,@data[1].16b 887*e7be843bSPierre Pronchery___ 888*e7be843bSPierre Pronchery &encrypt_1blk_norev(@data[2]); 889*e7be843bSPierre Pronchery &rev32(@data[1],@data[1]); 890*e7be843bSPierre Pronchery$code.=<<___; 891*e7be843bSPierre Pronchery eor @data[3].16b,@data[3].16b,@data[2].16b 892*e7be843bSPierre Pronchery___ 893*e7be843bSPierre Pronchery &encrypt_1blk_norev(@data[3]); 894*e7be843bSPierre Pronchery &rev32(@data[2],@data[2]); 895*e7be843bSPierre Pronchery &rev32(@data[3],@data[3]); 896*e7be843bSPierre Pronchery$code.=<<___; 897*e7be843bSPierre Pronchery orr $ivec0.16b,@data[3].16b,@data[3].16b 898*e7be843bSPierre Pronchery st1 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$outp],#64 899*e7be843bSPierre Pronchery subs $blocks,$blocks,#4 900*e7be843bSPierre Pronchery b.ne .Lcbc_4_blocks_enc 901*e7be843bSPierre Pronchery b 2f 902*e7be843bSPierre Pronchery1: 903*e7be843bSPierre Pronchery subs $blocks,$blocks,#1 904*e7be843bSPierre Pronchery b.lt 2f 905*e7be843bSPierre Pronchery ld1 {@data[0].4s},[$inp],#16 906*e7be843bSPierre Pronchery eor $ivec0.16b,$ivec0.16b,@data[0].16b 907*e7be843bSPierre Pronchery___ 908*e7be843bSPierre Pronchery &rev32($ivec0,$ivec0); 909*e7be843bSPierre Pronchery &encrypt_1blk($ivec0); 910*e7be843bSPierre Pronchery$code.=<<___; 911*e7be843bSPierre Pronchery st1 {$ivec0.4s},[$outp],#16 912*e7be843bSPierre Pronchery b 1b 913*e7be843bSPierre Pronchery2: 914*e7be843bSPierre Pronchery // save back IV 915*e7be843bSPierre Pronchery st1 {$ivec0.4s},[$ivp] 916*e7be843bSPierre Pronchery ret 917*e7be843bSPierre Pronchery 918*e7be843bSPierre Pronchery.Ldec: 919*e7be843bSPierre Pronchery // decryption mode starts 920*e7be843bSPierre Pronchery AARCH64_SIGN_LINK_REGISTER 921*e7be843bSPierre Pronchery stp d8,d9,[sp,#-80]! 922*e7be843bSPierre Pronchery stp d10,d11,[sp,#16] 923*e7be843bSPierre Pronchery stp d12,d13,[sp,#32] 924*e7be843bSPierre Pronchery stp d14,d15,[sp,#48] 925*e7be843bSPierre Pronchery stp x29,x30,[sp,#64] 926*e7be843bSPierre Pronchery.Lcbc_8_blocks_dec: 927*e7be843bSPierre Pronchery cmp $blocks,#8 928*e7be843bSPierre Pronchery b.lt 1f 929*e7be843bSPierre Pronchery ld4 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$inp] 930*e7be843bSPierre Pronchery add $ptr,$inp,#64 931*e7be843bSPierre Pronchery ld4 {@datax[0].4s,@datax[1].4s,@datax[2].4s,@datax[3].4s},[$ptr] 932*e7be843bSPierre Pronchery___ 933*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 934*e7be843bSPierre Pronchery &rev32(@data[1],@data[1]); 935*e7be843bSPierre Pronchery &rev32(@data[2],@data[2]); 936*e7be843bSPierre Pronchery &rev32(@data[3],$data[3]); 937*e7be843bSPierre Pronchery &rev32(@datax[0],@datax[0]); 938*e7be843bSPierre Pronchery &rev32(@datax[1],@datax[1]); 939*e7be843bSPierre Pronchery &rev32(@datax[2],@datax[2]); 940*e7be843bSPierre Pronchery &rev32(@datax[3],$datax[3]); 941*e7be843bSPierre Pronchery$code.=<<___; 942*e7be843bSPierre Pronchery bl _vpsm4_enc_8blks 943*e7be843bSPierre Pronchery___ 944*e7be843bSPierre Pronchery &transpose(@vtmp,@datax); 945*e7be843bSPierre Pronchery &transpose(@data,@datax); 946*e7be843bSPierre Pronchery$code.=<<___; 947*e7be843bSPierre Pronchery ld1 {$ivec1.4s},[$ivp] 948*e7be843bSPierre Pronchery ld1 {@datax[0].4s,@datax[1].4s,@datax[2].4s,@datax[3].4s},[$inp],#64 949*e7be843bSPierre Pronchery // note ivec1 and vtmpx[3] are reusing the same register 950*e7be843bSPierre Pronchery // care needs to be taken to avoid conflict 951*e7be843bSPierre Pronchery eor @vtmp[0].16b,@vtmp[0].16b,$ivec1.16b 952*e7be843bSPierre Pronchery ld1 {@vtmpx[0].4s,@vtmpx[1].4s,@vtmpx[2].4s,@vtmpx[3].4s},[$inp],#64 953*e7be843bSPierre Pronchery eor @vtmp[1].16b,@vtmp[1].16b,@datax[0].16b 954*e7be843bSPierre Pronchery eor @vtmp[2].16b,@vtmp[2].16b,@datax[1].16b 955*e7be843bSPierre Pronchery eor @vtmp[3].16b,$vtmp[3].16b,@datax[2].16b 956*e7be843bSPierre Pronchery // save back IV 957*e7be843bSPierre Pronchery st1 {$vtmpx[3].4s}, [$ivp] 958*e7be843bSPierre Pronchery eor @data[0].16b,@data[0].16b,$datax[3].16b 959*e7be843bSPierre Pronchery eor @data[1].16b,@data[1].16b,@vtmpx[0].16b 960*e7be843bSPierre Pronchery eor @data[2].16b,@data[2].16b,@vtmpx[1].16b 961*e7be843bSPierre Pronchery eor @data[3].16b,$data[3].16b,@vtmpx[2].16b 962*e7be843bSPierre Pronchery st1 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s,@vtmp[3].4s},[$outp],#64 963*e7be843bSPierre Pronchery st1 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$outp],#64 964*e7be843bSPierre Pronchery subs $blocks,$blocks,#8 965*e7be843bSPierre Pronchery b.gt .Lcbc_8_blocks_dec 966*e7be843bSPierre Pronchery b.eq 100f 967*e7be843bSPierre Pronchery1: 968*e7be843bSPierre Pronchery ld1 {$ivec1.4s},[$ivp] 969*e7be843bSPierre Pronchery.Lcbc_4_blocks_dec: 970*e7be843bSPierre Pronchery cmp $blocks,#4 971*e7be843bSPierre Pronchery b.lt 1f 972*e7be843bSPierre Pronchery ld4 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$inp] 973*e7be843bSPierre Pronchery___ 974*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 975*e7be843bSPierre Pronchery &rev32(@data[1],@data[1]); 976*e7be843bSPierre Pronchery &rev32(@data[2],@data[2]); 977*e7be843bSPierre Pronchery &rev32(@data[3],$data[3]); 978*e7be843bSPierre Pronchery$code.=<<___; 979*e7be843bSPierre Pronchery bl _vpsm4_enc_4blks 980*e7be843bSPierre Pronchery ld1 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$inp],#64 981*e7be843bSPierre Pronchery___ 982*e7be843bSPierre Pronchery &transpose(@vtmp,@datax); 983*e7be843bSPierre Pronchery$code.=<<___; 984*e7be843bSPierre Pronchery eor @vtmp[0].16b,@vtmp[0].16b,$ivec1.16b 985*e7be843bSPierre Pronchery eor @vtmp[1].16b,@vtmp[1].16b,@data[0].16b 986*e7be843bSPierre Pronchery orr $ivec1.16b,@data[3].16b,@data[3].16b 987*e7be843bSPierre Pronchery eor @vtmp[2].16b,@vtmp[2].16b,@data[1].16b 988*e7be843bSPierre Pronchery eor @vtmp[3].16b,$vtmp[3].16b,@data[2].16b 989*e7be843bSPierre Pronchery st1 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s,@vtmp[3].4s},[$outp],#64 990*e7be843bSPierre Pronchery subs $blocks,$blocks,#4 991*e7be843bSPierre Pronchery b.gt .Lcbc_4_blocks_dec 992*e7be843bSPierre Pronchery // save back IV 993*e7be843bSPierre Pronchery st1 {@data[3].4s}, [$ivp] 994*e7be843bSPierre Pronchery b 100f 995*e7be843bSPierre Pronchery1: // last block 996*e7be843bSPierre Pronchery subs $blocks,$blocks,#1 997*e7be843bSPierre Pronchery b.lt 100f 998*e7be843bSPierre Pronchery b.gt 1f 999*e7be843bSPierre Pronchery ld1 {@data[0].4s},[$inp],#16 1000*e7be843bSPierre Pronchery // save back IV 1001*e7be843bSPierre Pronchery st1 {$data[0].4s}, [$ivp] 1002*e7be843bSPierre Pronchery___ 1003*e7be843bSPierre Pronchery &rev32(@datax[0],@data[0]); 1004*e7be843bSPierre Pronchery &encrypt_1blk(@datax[0]); 1005*e7be843bSPierre Pronchery$code.=<<___; 1006*e7be843bSPierre Pronchery eor @datax[0].16b,@datax[0].16b,$ivec1.16b 1007*e7be843bSPierre Pronchery st1 {@datax[0].4s},[$outp],#16 1008*e7be843bSPierre Pronchery b 100f 1009*e7be843bSPierre Pronchery1: // last two blocks 1010*e7be843bSPierre Pronchery ld4 {@data[0].s,@data[1].s,@data[2].s,@data[3].s}[0],[$inp] 1011*e7be843bSPierre Pronchery add $ptr,$inp,#16 1012*e7be843bSPierre Pronchery ld4 {@data[0].s,@data[1].s,@data[2].s,@data[3].s}[1],[$ptr],#16 1013*e7be843bSPierre Pronchery subs $blocks,$blocks,1 1014*e7be843bSPierre Pronchery b.gt 1f 1015*e7be843bSPierre Pronchery___ 1016*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 1017*e7be843bSPierre Pronchery &rev32(@data[1],@data[1]); 1018*e7be843bSPierre Pronchery &rev32(@data[2],@data[2]); 1019*e7be843bSPierre Pronchery &rev32(@data[3],@data[3]); 1020*e7be843bSPierre Pronchery$code.=<<___; 1021*e7be843bSPierre Pronchery bl _vpsm4_enc_4blks 1022*e7be843bSPierre Pronchery ld1 {@data[0].4s,@data[1].4s},[$inp],#32 1023*e7be843bSPierre Pronchery___ 1024*e7be843bSPierre Pronchery &transpose(@vtmp,@datax); 1025*e7be843bSPierre Pronchery$code.=<<___; 1026*e7be843bSPierre Pronchery eor @vtmp[0].16b,@vtmp[0].16b,$ivec1.16b 1027*e7be843bSPierre Pronchery eor @vtmp[1].16b,@vtmp[1].16b,@data[0].16b 1028*e7be843bSPierre Pronchery st1 {@vtmp[0].4s,@vtmp[1].4s},[$outp],#32 1029*e7be843bSPierre Pronchery // save back IV 1030*e7be843bSPierre Pronchery st1 {@data[1].4s}, [$ivp] 1031*e7be843bSPierre Pronchery b 100f 1032*e7be843bSPierre Pronchery1: // last 3 blocks 1033*e7be843bSPierre Pronchery ld4 {@data[0].s,@data[1].s,@data[2].s,@data[3].s}[2],[$ptr] 1034*e7be843bSPierre Pronchery___ 1035*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 1036*e7be843bSPierre Pronchery &rev32(@data[1],@data[1]); 1037*e7be843bSPierre Pronchery &rev32(@data[2],@data[2]); 1038*e7be843bSPierre Pronchery &rev32(@data[3],@data[3]); 1039*e7be843bSPierre Pronchery$code.=<<___; 1040*e7be843bSPierre Pronchery bl _vpsm4_enc_4blks 1041*e7be843bSPierre Pronchery ld1 {@data[0].4s,@data[1].4s,@data[2].4s},[$inp],#48 1042*e7be843bSPierre Pronchery___ 1043*e7be843bSPierre Pronchery &transpose(@vtmp,@datax); 1044*e7be843bSPierre Pronchery$code.=<<___; 1045*e7be843bSPierre Pronchery eor @vtmp[0].16b,@vtmp[0].16b,$ivec1.16b 1046*e7be843bSPierre Pronchery eor @vtmp[1].16b,@vtmp[1].16b,@data[0].16b 1047*e7be843bSPierre Pronchery eor @vtmp[2].16b,@vtmp[2].16b,@data[1].16b 1048*e7be843bSPierre Pronchery st1 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s},[$outp],#48 1049*e7be843bSPierre Pronchery // save back IV 1050*e7be843bSPierre Pronchery st1 {@data[2].4s}, [$ivp] 1051*e7be843bSPierre Pronchery100: 1052*e7be843bSPierre Pronchery ldp d10,d11,[sp,#16] 1053*e7be843bSPierre Pronchery ldp d12,d13,[sp,#32] 1054*e7be843bSPierre Pronchery ldp d14,d15,[sp,#48] 1055*e7be843bSPierre Pronchery ldp x29,x30,[sp,#64] 1056*e7be843bSPierre Pronchery ldp d8,d9,[sp],#80 1057*e7be843bSPierre Pronchery AARCH64_VALIDATE_LINK_REGISTER 1058*e7be843bSPierre Pronchery ret 1059*e7be843bSPierre Pronchery.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt 1060*e7be843bSPierre Pronchery___ 1061*e7be843bSPierre Pronchery}}} 1062*e7be843bSPierre Pronchery 1063*e7be843bSPierre Pronchery{{{ 1064*e7be843bSPierre Proncherymy ($ivp)=("x4"); 1065*e7be843bSPierre Proncherymy ($ctr)=("w5"); 1066*e7be843bSPierre Proncherymy $ivec=("v3"); 1067*e7be843bSPierre Pronchery 1068*e7be843bSPierre Pronchery$code.=<<___; 1069*e7be843bSPierre Pronchery.globl ${prefix}_ctr32_encrypt_blocks 1070*e7be843bSPierre Pronchery.type ${prefix}_ctr32_encrypt_blocks,%function 1071*e7be843bSPierre Pronchery.align 5 1072*e7be843bSPierre Pronchery${prefix}_ctr32_encrypt_blocks: 1073*e7be843bSPierre Pronchery AARCH64_VALID_CALL_TARGET 1074*e7be843bSPierre Pronchery ld1 {$ivec.4s},[$ivp] 1075*e7be843bSPierre Pronchery___ 1076*e7be843bSPierre Pronchery &rev32($ivec,$ivec); 1077*e7be843bSPierre Pronchery &load_sbox(); 1078*e7be843bSPierre Pronchery$code.=<<___; 1079*e7be843bSPierre Pronchery cmp $blocks,#1 1080*e7be843bSPierre Pronchery b.ne 1f 1081*e7be843bSPierre Pronchery // fast processing for one single block without 1082*e7be843bSPierre Pronchery // context saving overhead 1083*e7be843bSPierre Pronchery___ 1084*e7be843bSPierre Pronchery &encrypt_1blk($ivec); 1085*e7be843bSPierre Pronchery$code.=<<___; 1086*e7be843bSPierre Pronchery ld1 {@data[0].4s},[$inp] 1087*e7be843bSPierre Pronchery eor @data[0].16b,@data[0].16b,$ivec.16b 1088*e7be843bSPierre Pronchery st1 {@data[0].4s},[$outp] 1089*e7be843bSPierre Pronchery ret 1090*e7be843bSPierre Pronchery1: 1091*e7be843bSPierre Pronchery AARCH64_SIGN_LINK_REGISTER 1092*e7be843bSPierre Pronchery stp d8,d9,[sp,#-80]! 1093*e7be843bSPierre Pronchery stp d10,d11,[sp,#16] 1094*e7be843bSPierre Pronchery stp d12,d13,[sp,#32] 1095*e7be843bSPierre Pronchery stp d14,d15,[sp,#48] 1096*e7be843bSPierre Pronchery stp x29,x30,[sp,#64] 1097*e7be843bSPierre Pronchery mov $word0,$ivec.s[0] 1098*e7be843bSPierre Pronchery mov $word1,$ivec.s[1] 1099*e7be843bSPierre Pronchery mov $word2,$ivec.s[2] 1100*e7be843bSPierre Pronchery mov $ctr,$ivec.s[3] 1101*e7be843bSPierre Pronchery.Lctr32_4_blocks_process: 1102*e7be843bSPierre Pronchery cmp $blocks,#4 1103*e7be843bSPierre Pronchery b.lt 1f 1104*e7be843bSPierre Pronchery dup @data[0].4s,$word0 1105*e7be843bSPierre Pronchery dup @data[1].4s,$word1 1106*e7be843bSPierre Pronchery dup @data[2].4s,$word2 1107*e7be843bSPierre Pronchery mov @data[3].s[0],$ctr 1108*e7be843bSPierre Pronchery add $ctr,$ctr,#1 1109*e7be843bSPierre Pronchery mov $data[3].s[1],$ctr 1110*e7be843bSPierre Pronchery add $ctr,$ctr,#1 1111*e7be843bSPierre Pronchery mov @data[3].s[2],$ctr 1112*e7be843bSPierre Pronchery add $ctr,$ctr,#1 1113*e7be843bSPierre Pronchery mov @data[3].s[3],$ctr 1114*e7be843bSPierre Pronchery add $ctr,$ctr,#1 1115*e7be843bSPierre Pronchery cmp $blocks,#8 1116*e7be843bSPierre Pronchery b.ge .Lctr32_8_blocks_process 1117*e7be843bSPierre Pronchery bl _vpsm4_enc_4blks 1118*e7be843bSPierre Pronchery ld4 {@vtmpx[0].4s,@vtmpx[1].4s,@vtmpx[2].4s,@vtmpx[3].4s},[$inp],#64 1119*e7be843bSPierre Pronchery eor @vtmp[0].16b,@vtmp[0].16b,@vtmpx[0].16b 1120*e7be843bSPierre Pronchery eor @vtmp[1].16b,@vtmp[1].16b,@vtmpx[1].16b 1121*e7be843bSPierre Pronchery eor @vtmp[2].16b,@vtmp[2].16b,@vtmpx[2].16b 1122*e7be843bSPierre Pronchery eor @vtmp[3].16b,@vtmp[3].16b,@vtmpx[3].16b 1123*e7be843bSPierre Pronchery st4 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s,@vtmp[3].4s},[$outp],#64 1124*e7be843bSPierre Pronchery subs $blocks,$blocks,#4 1125*e7be843bSPierre Pronchery b.ne .Lctr32_4_blocks_process 1126*e7be843bSPierre Pronchery b 100f 1127*e7be843bSPierre Pronchery.Lctr32_8_blocks_process: 1128*e7be843bSPierre Pronchery dup @datax[0].4s,$word0 1129*e7be843bSPierre Pronchery dup @datax[1].4s,$word1 1130*e7be843bSPierre Pronchery dup @datax[2].4s,$word2 1131*e7be843bSPierre Pronchery mov @datax[3].s[0],$ctr 1132*e7be843bSPierre Pronchery add $ctr,$ctr,#1 1133*e7be843bSPierre Pronchery mov $datax[3].s[1],$ctr 1134*e7be843bSPierre Pronchery add $ctr,$ctr,#1 1135*e7be843bSPierre Pronchery mov @datax[3].s[2],$ctr 1136*e7be843bSPierre Pronchery add $ctr,$ctr,#1 1137*e7be843bSPierre Pronchery mov @datax[3].s[3],$ctr 1138*e7be843bSPierre Pronchery add $ctr,$ctr,#1 1139*e7be843bSPierre Pronchery bl _vpsm4_enc_8blks 1140*e7be843bSPierre Pronchery ld4 {@vtmpx[0].4s,@vtmpx[1].4s,@vtmpx[2].4s,@vtmpx[3].4s},[$inp],#64 1141*e7be843bSPierre Pronchery ld4 {@datax[0].4s,@datax[1].4s,@datax[2].4s,@datax[3].4s},[$inp],#64 1142*e7be843bSPierre Pronchery eor @vtmp[0].16b,@vtmp[0].16b,@vtmpx[0].16b 1143*e7be843bSPierre Pronchery eor @vtmp[1].16b,@vtmp[1].16b,@vtmpx[1].16b 1144*e7be843bSPierre Pronchery eor @vtmp[2].16b,@vtmp[2].16b,@vtmpx[2].16b 1145*e7be843bSPierre Pronchery eor @vtmp[3].16b,@vtmp[3].16b,@vtmpx[3].16b 1146*e7be843bSPierre Pronchery eor @data[0].16b,@data[0].16b,@datax[0].16b 1147*e7be843bSPierre Pronchery eor @data[1].16b,@data[1].16b,@datax[1].16b 1148*e7be843bSPierre Pronchery eor @data[2].16b,@data[2].16b,@datax[2].16b 1149*e7be843bSPierre Pronchery eor @data[3].16b,@data[3].16b,@datax[3].16b 1150*e7be843bSPierre Pronchery st4 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s,@vtmp[3].4s},[$outp],#64 1151*e7be843bSPierre Pronchery st4 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$outp],#64 1152*e7be843bSPierre Pronchery subs $blocks,$blocks,#8 1153*e7be843bSPierre Pronchery b.ne .Lctr32_4_blocks_process 1154*e7be843bSPierre Pronchery b 100f 1155*e7be843bSPierre Pronchery1: // last block processing 1156*e7be843bSPierre Pronchery subs $blocks,$blocks,#1 1157*e7be843bSPierre Pronchery b.lt 100f 1158*e7be843bSPierre Pronchery b.gt 1f 1159*e7be843bSPierre Pronchery mov $ivec.s[0],$word0 1160*e7be843bSPierre Pronchery mov $ivec.s[1],$word1 1161*e7be843bSPierre Pronchery mov $ivec.s[2],$word2 1162*e7be843bSPierre Pronchery mov $ivec.s[3],$ctr 1163*e7be843bSPierre Pronchery___ 1164*e7be843bSPierre Pronchery &encrypt_1blk($ivec); 1165*e7be843bSPierre Pronchery$code.=<<___; 1166*e7be843bSPierre Pronchery ld1 {@data[0].4s},[$inp] 1167*e7be843bSPierre Pronchery eor @data[0].16b,@data[0].16b,$ivec.16b 1168*e7be843bSPierre Pronchery st1 {@data[0].4s},[$outp] 1169*e7be843bSPierre Pronchery b 100f 1170*e7be843bSPierre Pronchery1: // last 2 blocks processing 1171*e7be843bSPierre Pronchery dup @data[0].4s,$word0 1172*e7be843bSPierre Pronchery dup @data[1].4s,$word1 1173*e7be843bSPierre Pronchery dup @data[2].4s,$word2 1174*e7be843bSPierre Pronchery mov @data[3].s[0],$ctr 1175*e7be843bSPierre Pronchery add $ctr,$ctr,#1 1176*e7be843bSPierre Pronchery mov @data[3].s[1],$ctr 1177*e7be843bSPierre Pronchery subs $blocks,$blocks,#1 1178*e7be843bSPierre Pronchery b.ne 1f 1179*e7be843bSPierre Pronchery bl _vpsm4_enc_4blks 1180*e7be843bSPierre Pronchery ld4 {@vtmpx[0].s,@vtmpx[1].s,@vtmpx[2].s,@vtmpx[3].s}[0],[$inp],#16 1181*e7be843bSPierre Pronchery ld4 {@vtmpx[0].s,@vtmpx[1].s,@vtmpx[2].s,@vtmpx[3].s}[1],[$inp],#16 1182*e7be843bSPierre Pronchery eor @vtmp[0].16b,@vtmp[0].16b,@vtmpx[0].16b 1183*e7be843bSPierre Pronchery eor @vtmp[1].16b,@vtmp[1].16b,@vtmpx[1].16b 1184*e7be843bSPierre Pronchery eor @vtmp[2].16b,@vtmp[2].16b,@vtmpx[2].16b 1185*e7be843bSPierre Pronchery eor @vtmp[3].16b,@vtmp[3].16b,@vtmpx[3].16b 1186*e7be843bSPierre Pronchery st4 {@vtmp[0].s,@vtmp[1].s,@vtmp[2].s,@vtmp[3].s}[0],[$outp],#16 1187*e7be843bSPierre Pronchery st4 {@vtmp[0].s,@vtmp[1].s,@vtmp[2].s,@vtmp[3].s}[1],[$outp],#16 1188*e7be843bSPierre Pronchery b 100f 1189*e7be843bSPierre Pronchery1: // last 3 blocks processing 1190*e7be843bSPierre Pronchery add $ctr,$ctr,#1 1191*e7be843bSPierre Pronchery mov @data[3].s[2],$ctr 1192*e7be843bSPierre Pronchery bl _vpsm4_enc_4blks 1193*e7be843bSPierre Pronchery ld4 {@vtmpx[0].s,@vtmpx[1].s,@vtmpx[2].s,@vtmpx[3].s}[0],[$inp],#16 1194*e7be843bSPierre Pronchery ld4 {@vtmpx[0].s,@vtmpx[1].s,@vtmpx[2].s,@vtmpx[3].s}[1],[$inp],#16 1195*e7be843bSPierre Pronchery ld4 {@vtmpx[0].s,@vtmpx[1].s,@vtmpx[2].s,@vtmpx[3].s}[2],[$inp],#16 1196*e7be843bSPierre Pronchery eor @vtmp[0].16b,@vtmp[0].16b,@vtmpx[0].16b 1197*e7be843bSPierre Pronchery eor @vtmp[1].16b,@vtmp[1].16b,@vtmpx[1].16b 1198*e7be843bSPierre Pronchery eor @vtmp[2].16b,@vtmp[2].16b,@vtmpx[2].16b 1199*e7be843bSPierre Pronchery eor @vtmp[3].16b,@vtmp[3].16b,@vtmpx[3].16b 1200*e7be843bSPierre Pronchery st4 {@vtmp[0].s,@vtmp[1].s,@vtmp[2].s,@vtmp[3].s}[0],[$outp],#16 1201*e7be843bSPierre Pronchery st4 {@vtmp[0].s,@vtmp[1].s,@vtmp[2].s,@vtmp[3].s}[1],[$outp],#16 1202*e7be843bSPierre Pronchery st4 {@vtmp[0].s,@vtmp[1].s,@vtmp[2].s,@vtmp[3].s}[2],[$outp],#16 1203*e7be843bSPierre Pronchery100: 1204*e7be843bSPierre Pronchery ldp d10,d11,[sp,#16] 1205*e7be843bSPierre Pronchery ldp d12,d13,[sp,#32] 1206*e7be843bSPierre Pronchery ldp d14,d15,[sp,#48] 1207*e7be843bSPierre Pronchery ldp x29,x30,[sp,#64] 1208*e7be843bSPierre Pronchery ldp d8,d9,[sp],#80 1209*e7be843bSPierre Pronchery AARCH64_VALIDATE_LINK_REGISTER 1210*e7be843bSPierre Pronchery ret 1211*e7be843bSPierre Pronchery.size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks 1212*e7be843bSPierre Pronchery___ 1213*e7be843bSPierre Pronchery}}} 1214*e7be843bSPierre Pronchery 1215*e7be843bSPierre Pronchery{{{ 1216*e7be843bSPierre Proncherymy ($blocks,$len)=("x2","x2"); 1217*e7be843bSPierre Proncherymy $ivp=("x5"); 1218*e7be843bSPierre Proncherymy @twx=map("x$_",(12..27)); 1219*e7be843bSPierre Proncherymy ($rks1,$rks2)=("x26","x27"); 1220*e7be843bSPierre Proncherymy $lastBlk=("x26"); 1221*e7be843bSPierre Proncherymy $enc=("w28"); 1222*e7be843bSPierre Proncherymy $remain=("x29"); 1223*e7be843bSPierre Pronchery 1224*e7be843bSPierre Proncherymy @tweak=@datax; 1225*e7be843bSPierre Pronchery 1226*e7be843bSPierre Proncherysub gen_xts_cipher() { 1227*e7be843bSPierre Pronchery my $std = shift; 1228*e7be843bSPierre Pronchery$code.=<<___; 1229*e7be843bSPierre Pronchery.globl ${prefix}_xts_encrypt${std} 1230*e7be843bSPierre Pronchery.type ${prefix}_xts_encrypt${std},%function 1231*e7be843bSPierre Pronchery.align 5 1232*e7be843bSPierre Pronchery${prefix}_xts_encrypt${std}: 1233*e7be843bSPierre Pronchery AARCH64_SIGN_LINK_REGISTER 1234*e7be843bSPierre Pronchery stp x15, x16, [sp, #-0x10]! 1235*e7be843bSPierre Pronchery stp x17, x18, [sp, #-0x10]! 1236*e7be843bSPierre Pronchery stp x19, x20, [sp, #-0x10]! 1237*e7be843bSPierre Pronchery stp x21, x22, [sp, #-0x10]! 1238*e7be843bSPierre Pronchery stp x23, x24, [sp, #-0x10]! 1239*e7be843bSPierre Pronchery stp x25, x26, [sp, #-0x10]! 1240*e7be843bSPierre Pronchery stp x27, x28, [sp, #-0x10]! 1241*e7be843bSPierre Pronchery stp x29, x30, [sp, #-0x10]! 1242*e7be843bSPierre Pronchery stp d8, d9, [sp, #-0x10]! 1243*e7be843bSPierre Pronchery stp d10, d11, [sp, #-0x10]! 1244*e7be843bSPierre Pronchery stp d12, d13, [sp, #-0x10]! 1245*e7be843bSPierre Pronchery stp d14, d15, [sp, #-0x10]! 1246*e7be843bSPierre Pronchery mov $rks1,x3 1247*e7be843bSPierre Pronchery mov $rks2,x4 1248*e7be843bSPierre Pronchery mov $enc,w6 1249*e7be843bSPierre Pronchery ld1 {@tweak[0].4s}, [$ivp] 1250*e7be843bSPierre Pronchery mov $rks,$rks2 1251*e7be843bSPierre Pronchery___ 1252*e7be843bSPierre Pronchery &load_sbox(); 1253*e7be843bSPierre Pronchery &rev32(@tweak[0],@tweak[0]); 1254*e7be843bSPierre Pronchery &encrypt_1blk(@tweak[0]); 1255*e7be843bSPierre Pronchery$code.=<<___; 1256*e7be843bSPierre Pronchery mov $rks,$rks1 1257*e7be843bSPierre Pronchery and $remain,$len,#0x0F 1258*e7be843bSPierre Pronchery // convert length into blocks 1259*e7be843bSPierre Pronchery lsr $blocks,$len,4 1260*e7be843bSPierre Pronchery cmp $blocks,#1 1261*e7be843bSPierre Pronchery b.lt .return${std} 1262*e7be843bSPierre Pronchery 1263*e7be843bSPierre Pronchery cmp $remain,0 1264*e7be843bSPierre Pronchery // If the encryption/decryption Length is N times of 16, 1265*e7be843bSPierre Pronchery // the all blocks are encrypted/decrypted in .xts_encrypt_blocks${std} 1266*e7be843bSPierre Pronchery b.eq .xts_encrypt_blocks${std} 1267*e7be843bSPierre Pronchery 1268*e7be843bSPierre Pronchery // If the encryption/decryption length is not N times of 16, 1269*e7be843bSPierre Pronchery // the last two blocks are encrypted/decrypted in .last_2blks_tweak${std} or .only_2blks_tweak${std} 1270*e7be843bSPierre Pronchery // the other blocks are encrypted/decrypted in .xts_encrypt_blocks${std} 1271*e7be843bSPierre Pronchery subs $blocks,$blocks,#1 1272*e7be843bSPierre Pronchery b.eq .only_2blks_tweak${std} 1273*e7be843bSPierre Pronchery.xts_encrypt_blocks${std}: 1274*e7be843bSPierre Pronchery___ 1275*e7be843bSPierre Pronchery &rbit(@tweak[0],@tweak[0],$std); 1276*e7be843bSPierre Pronchery &rev32_armeb(@tweak[0],@tweak[0]); 1277*e7be843bSPierre Pronchery &mov_vec_to_reg(@tweak[0],@twx[0],@twx[1]); 1278*e7be843bSPierre Pronchery &compute_tweak(@twx[0],@twx[1],@twx[2],@twx[3]); 1279*e7be843bSPierre Pronchery &compute_tweak(@twx[2],@twx[3],@twx[4],@twx[5]); 1280*e7be843bSPierre Pronchery &compute_tweak(@twx[4],@twx[5],@twx[6],@twx[7]); 1281*e7be843bSPierre Pronchery &compute_tweak(@twx[6],@twx[7],@twx[8],@twx[9]); 1282*e7be843bSPierre Pronchery &compute_tweak(@twx[8],@twx[9],@twx[10],@twx[11]); 1283*e7be843bSPierre Pronchery &compute_tweak(@twx[10],@twx[11],@twx[12],@twx[13]); 1284*e7be843bSPierre Pronchery &compute_tweak(@twx[12],@twx[13],@twx[14],@twx[15]); 1285*e7be843bSPierre Pronchery$code.=<<___; 1286*e7be843bSPierre Pronchery.Lxts_8_blocks_process${std}: 1287*e7be843bSPierre Pronchery cmp $blocks,#8 1288*e7be843bSPierre Pronchery b.lt .Lxts_4_blocks_process${std} 1289*e7be843bSPierre Pronchery___ 1290*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[0],@twx[1],@vtmp[0]); 1291*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[2],@twx[3],@vtmp[1]); 1292*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[4],@twx[5],@vtmp[2]); 1293*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[6],@twx[7],@vtmp[3]); 1294*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[8],@twx[9],@vtmpx[0]); 1295*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[10],@twx[11],@vtmpx[1]); 1296*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[12],@twx[13],@vtmpx[2]); 1297*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[14],@twx[15],@vtmpx[3]); 1298*e7be843bSPierre Pronchery$code.=<<___; 1299*e7be843bSPierre Pronchery ld1 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$inp],#64 1300*e7be843bSPierre Pronchery___ 1301*e7be843bSPierre Pronchery &rbit(@vtmp[0],@vtmp[0],$std); 1302*e7be843bSPierre Pronchery &rbit(@vtmp[1],@vtmp[1],$std); 1303*e7be843bSPierre Pronchery &rbit(@vtmp[2],@vtmp[2],$std); 1304*e7be843bSPierre Pronchery &rbit(@vtmp[3],@vtmp[3],$std); 1305*e7be843bSPierre Pronchery$code.=<<___; 1306*e7be843bSPierre Pronchery eor @data[0].16b, @data[0].16b, @vtmp[0].16b 1307*e7be843bSPierre Pronchery eor @data[1].16b, @data[1].16b, @vtmp[1].16b 1308*e7be843bSPierre Pronchery eor @data[2].16b, @data[2].16b, @vtmp[2].16b 1309*e7be843bSPierre Pronchery eor @data[3].16b, @data[3].16b, @vtmp[3].16b 1310*e7be843bSPierre Pronchery ld1 {@datax[0].4s,$datax[1].4s,@datax[2].4s,@datax[3].4s},[$inp],#64 1311*e7be843bSPierre Pronchery___ 1312*e7be843bSPierre Pronchery &rbit(@vtmpx[0],@vtmpx[0],$std); 1313*e7be843bSPierre Pronchery &rbit(@vtmpx[1],@vtmpx[1],$std); 1314*e7be843bSPierre Pronchery &rbit(@vtmpx[2],@vtmpx[2],$std); 1315*e7be843bSPierre Pronchery &rbit(@vtmpx[3],@vtmpx[3],$std); 1316*e7be843bSPierre Pronchery$code.=<<___; 1317*e7be843bSPierre Pronchery eor @datax[0].16b, @datax[0].16b, @vtmpx[0].16b 1318*e7be843bSPierre Pronchery eor @datax[1].16b, @datax[1].16b, @vtmpx[1].16b 1319*e7be843bSPierre Pronchery eor @datax[2].16b, @datax[2].16b, @vtmpx[2].16b 1320*e7be843bSPierre Pronchery eor @datax[3].16b, @datax[3].16b, @vtmpx[3].16b 1321*e7be843bSPierre Pronchery___ 1322*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 1323*e7be843bSPierre Pronchery &rev32(@data[1],@data[1]); 1324*e7be843bSPierre Pronchery &rev32(@data[2],@data[2]); 1325*e7be843bSPierre Pronchery &rev32(@data[3],@data[3]); 1326*e7be843bSPierre Pronchery &rev32(@datax[0],@datax[0]); 1327*e7be843bSPierre Pronchery &rev32(@datax[1],@datax[1]); 1328*e7be843bSPierre Pronchery &rev32(@datax[2],@datax[2]); 1329*e7be843bSPierre Pronchery &rev32(@datax[3],@datax[3]); 1330*e7be843bSPierre Pronchery &transpose(@data,@vtmp); 1331*e7be843bSPierre Pronchery &transpose(@datax,@vtmp); 1332*e7be843bSPierre Pronchery$code.=<<___; 1333*e7be843bSPierre Pronchery bl _${prefix}_enc_8blks 1334*e7be843bSPierre Pronchery___ 1335*e7be843bSPierre Pronchery &transpose(@vtmp,@datax); 1336*e7be843bSPierre Pronchery &transpose(@data,@datax); 1337*e7be843bSPierre Pronchery 1338*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[0],@twx[1],@vtmpx[0]); 1339*e7be843bSPierre Pronchery &compute_tweak(@twx[14],@twx[15],@twx[0],@twx[1]); 1340*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[2],@twx[3],@vtmpx[1]); 1341*e7be843bSPierre Pronchery &compute_tweak(@twx[0],@twx[1],@twx[2],@twx[3]); 1342*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[4],@twx[5],@vtmpx[2]); 1343*e7be843bSPierre Pronchery &compute_tweak(@twx[2],@twx[3],@twx[4],@twx[5]); 1344*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[6],@twx[7],@vtmpx[3]); 1345*e7be843bSPierre Pronchery &compute_tweak(@twx[4],@twx[5],@twx[6],@twx[7]); 1346*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[8],@twx[9],@tweak[0]); 1347*e7be843bSPierre Pronchery &compute_tweak(@twx[6],@twx[7],@twx[8],@twx[9]); 1348*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[10],@twx[11],@tweak[1]); 1349*e7be843bSPierre Pronchery &compute_tweak(@twx[8],@twx[9],@twx[10],@twx[11]); 1350*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[12],@twx[13],@tweak[2]); 1351*e7be843bSPierre Pronchery &compute_tweak(@twx[10],@twx[11],@twx[12],@twx[13]); 1352*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[14],@twx[15],@tweak[3]); 1353*e7be843bSPierre Pronchery &compute_tweak(@twx[12],@twx[13],@twx[14],@twx[15]); 1354*e7be843bSPierre Pronchery$code.=<<___; 1355*e7be843bSPierre Pronchery eor @vtmp[0].16b, @vtmp[0].16b, @vtmpx[0].16b 1356*e7be843bSPierre Pronchery eor @vtmp[1].16b, @vtmp[1].16b, @vtmpx[1].16b 1357*e7be843bSPierre Pronchery eor @vtmp[2].16b, @vtmp[2].16b, @vtmpx[2].16b 1358*e7be843bSPierre Pronchery eor @vtmp[3].16b, @vtmp[3].16b, @vtmpx[3].16b 1359*e7be843bSPierre Pronchery eor @data[0].16b, @data[0].16b, @tweak[0].16b 1360*e7be843bSPierre Pronchery eor @data[1].16b, @data[1].16b, @tweak[1].16b 1361*e7be843bSPierre Pronchery eor @data[2].16b, @data[2].16b, @tweak[2].16b 1362*e7be843bSPierre Pronchery eor @data[3].16b, @data[3].16b, @tweak[3].16b 1363*e7be843bSPierre Pronchery 1364*e7be843bSPierre Pronchery // save the last tweak 1365*e7be843bSPierre Pronchery st1 {@tweak[3].4s},[$ivp] 1366*e7be843bSPierre Pronchery st1 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s,@vtmp[3].4s},[$outp],#64 1367*e7be843bSPierre Pronchery st1 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$outp],#64 1368*e7be843bSPierre Pronchery subs $blocks,$blocks,#8 1369*e7be843bSPierre Pronchery b.gt .Lxts_8_blocks_process${std} 1370*e7be843bSPierre Pronchery b 100f 1371*e7be843bSPierre Pronchery.Lxts_4_blocks_process${std}: 1372*e7be843bSPierre Pronchery___ 1373*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[0],@twx[1],@tweak[0]); 1374*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[2],@twx[3],@tweak[1]); 1375*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[4],@twx[5],@tweak[2]); 1376*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[6],@twx[7],@tweak[3]); 1377*e7be843bSPierre Pronchery$code.=<<___; 1378*e7be843bSPierre Pronchery cmp $blocks,#4 1379*e7be843bSPierre Pronchery b.lt 1f 1380*e7be843bSPierre Pronchery ld1 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$inp],#64 1381*e7be843bSPierre Pronchery___ 1382*e7be843bSPierre Pronchery &rbit(@tweak[0],@tweak[0],$std); 1383*e7be843bSPierre Pronchery &rbit(@tweak[1],@tweak[1],$std); 1384*e7be843bSPierre Pronchery &rbit(@tweak[2],@tweak[2],$std); 1385*e7be843bSPierre Pronchery &rbit(@tweak[3],@tweak[3],$std); 1386*e7be843bSPierre Pronchery$code.=<<___; 1387*e7be843bSPierre Pronchery eor @data[0].16b, @data[0].16b, @tweak[0].16b 1388*e7be843bSPierre Pronchery eor @data[1].16b, @data[1].16b, @tweak[1].16b 1389*e7be843bSPierre Pronchery eor @data[2].16b, @data[2].16b, @tweak[2].16b 1390*e7be843bSPierre Pronchery eor @data[3].16b, @data[3].16b, @tweak[3].16b 1391*e7be843bSPierre Pronchery___ 1392*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 1393*e7be843bSPierre Pronchery &rev32(@data[1],@data[1]); 1394*e7be843bSPierre Pronchery &rev32(@data[2],@data[2]); 1395*e7be843bSPierre Pronchery &rev32(@data[3],@data[3]); 1396*e7be843bSPierre Pronchery &transpose(@data,@vtmp); 1397*e7be843bSPierre Pronchery$code.=<<___; 1398*e7be843bSPierre Pronchery bl _${prefix}_enc_4blks 1399*e7be843bSPierre Pronchery___ 1400*e7be843bSPierre Pronchery &transpose(@vtmp,@data); 1401*e7be843bSPierre Pronchery$code.=<<___; 1402*e7be843bSPierre Pronchery eor @vtmp[0].16b, @vtmp[0].16b, @tweak[0].16b 1403*e7be843bSPierre Pronchery eor @vtmp[1].16b, @vtmp[1].16b, @tweak[1].16b 1404*e7be843bSPierre Pronchery eor @vtmp[2].16b, @vtmp[2].16b, @tweak[2].16b 1405*e7be843bSPierre Pronchery eor @vtmp[3].16b, @vtmp[3].16b, @tweak[3].16b 1406*e7be843bSPierre Pronchery st1 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s,@vtmp[3].4s},[$outp],#64 1407*e7be843bSPierre Pronchery sub $blocks,$blocks,#4 1408*e7be843bSPierre Pronchery___ 1409*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[8],@twx[9],@tweak[0]); 1410*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[10],@twx[11],@tweak[1]); 1411*e7be843bSPierre Pronchery &mov_reg_to_vec(@twx[12],@twx[13],@tweak[2]); 1412*e7be843bSPierre Pronchery$code.=<<___; 1413*e7be843bSPierre Pronchery // save the last tweak 1414*e7be843bSPierre Pronchery st1 {@tweak[3].4s},[$ivp] 1415*e7be843bSPierre Pronchery1: 1416*e7be843bSPierre Pronchery // process last block 1417*e7be843bSPierre Pronchery cmp $blocks,#1 1418*e7be843bSPierre Pronchery b.lt 100f 1419*e7be843bSPierre Pronchery b.gt 1f 1420*e7be843bSPierre Pronchery ld1 {@data[0].4s},[$inp],#16 1421*e7be843bSPierre Pronchery___ 1422*e7be843bSPierre Pronchery &rbit(@tweak[0],@tweak[0],$std); 1423*e7be843bSPierre Pronchery$code.=<<___; 1424*e7be843bSPierre Pronchery eor @data[0].16b, @data[0].16b, @tweak[0].16b 1425*e7be843bSPierre Pronchery___ 1426*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 1427*e7be843bSPierre Pronchery &encrypt_1blk(@data[0]); 1428*e7be843bSPierre Pronchery$code.=<<___; 1429*e7be843bSPierre Pronchery eor @data[0].16b, @data[0].16b, @tweak[0].16b 1430*e7be843bSPierre Pronchery st1 {@data[0].4s},[$outp],#16 1431*e7be843bSPierre Pronchery // save the last tweak 1432*e7be843bSPierre Pronchery st1 {@tweak[0].4s},[$ivp] 1433*e7be843bSPierre Pronchery b 100f 1434*e7be843bSPierre Pronchery1: // process last 2 blocks 1435*e7be843bSPierre Pronchery cmp $blocks,#2 1436*e7be843bSPierre Pronchery b.gt 1f 1437*e7be843bSPierre Pronchery ld1 {@data[0].4s,@data[1].4s},[$inp],#32 1438*e7be843bSPierre Pronchery___ 1439*e7be843bSPierre Pronchery &rbit(@tweak[0],@tweak[0],$std); 1440*e7be843bSPierre Pronchery &rbit(@tweak[1],@tweak[1],$std); 1441*e7be843bSPierre Pronchery$code.=<<___; 1442*e7be843bSPierre Pronchery eor @data[0].16b, @data[0].16b, @tweak[0].16b 1443*e7be843bSPierre Pronchery eor @data[1].16b, @data[1].16b, @tweak[1].16b 1444*e7be843bSPierre Pronchery___ 1445*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 1446*e7be843bSPierre Pronchery &rev32(@data[1],@data[1]); 1447*e7be843bSPierre Pronchery &transpose(@data,@vtmp); 1448*e7be843bSPierre Pronchery$code.=<<___; 1449*e7be843bSPierre Pronchery bl _${prefix}_enc_4blks 1450*e7be843bSPierre Pronchery___ 1451*e7be843bSPierre Pronchery &transpose(@vtmp,@data); 1452*e7be843bSPierre Pronchery$code.=<<___; 1453*e7be843bSPierre Pronchery eor @vtmp[0].16b, @vtmp[0].16b, @tweak[0].16b 1454*e7be843bSPierre Pronchery eor @vtmp[1].16b, @vtmp[1].16b, @tweak[1].16b 1455*e7be843bSPierre Pronchery st1 {@vtmp[0].4s,@vtmp[1].4s},[$outp],#32 1456*e7be843bSPierre Pronchery // save the last tweak 1457*e7be843bSPierre Pronchery st1 {@tweak[1].4s},[$ivp] 1458*e7be843bSPierre Pronchery b 100f 1459*e7be843bSPierre Pronchery1: // process last 3 blocks 1460*e7be843bSPierre Pronchery ld1 {@data[0].4s,@data[1].4s,@data[2].4s},[$inp],#48 1461*e7be843bSPierre Pronchery___ 1462*e7be843bSPierre Pronchery &rbit(@tweak[0],@tweak[0],$std); 1463*e7be843bSPierre Pronchery &rbit(@tweak[1],@tweak[1],$std); 1464*e7be843bSPierre Pronchery &rbit(@tweak[2],@tweak[2],$std); 1465*e7be843bSPierre Pronchery$code.=<<___; 1466*e7be843bSPierre Pronchery eor @data[0].16b, @data[0].16b, @tweak[0].16b 1467*e7be843bSPierre Pronchery eor @data[1].16b, @data[1].16b, @tweak[1].16b 1468*e7be843bSPierre Pronchery eor @data[2].16b, @data[2].16b, @tweak[2].16b 1469*e7be843bSPierre Pronchery___ 1470*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 1471*e7be843bSPierre Pronchery &rev32(@data[1],@data[1]); 1472*e7be843bSPierre Pronchery &rev32(@data[2],@data[2]); 1473*e7be843bSPierre Pronchery &transpose(@data,@vtmp); 1474*e7be843bSPierre Pronchery$code.=<<___; 1475*e7be843bSPierre Pronchery bl _${prefix}_enc_4blks 1476*e7be843bSPierre Pronchery___ 1477*e7be843bSPierre Pronchery &transpose(@vtmp,@data); 1478*e7be843bSPierre Pronchery$code.=<<___; 1479*e7be843bSPierre Pronchery eor @vtmp[0].16b, @vtmp[0].16b, @tweak[0].16b 1480*e7be843bSPierre Pronchery eor @vtmp[1].16b, @vtmp[1].16b, @tweak[1].16b 1481*e7be843bSPierre Pronchery eor @vtmp[2].16b, @vtmp[2].16b, @tweak[2].16b 1482*e7be843bSPierre Pronchery st1 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s},[$outp],#48 1483*e7be843bSPierre Pronchery // save the last tweak 1484*e7be843bSPierre Pronchery st1 {@tweak[2].4s},[$ivp] 1485*e7be843bSPierre Pronchery100: 1486*e7be843bSPierre Pronchery cmp $remain,0 1487*e7be843bSPierre Pronchery b.eq .return${std} 1488*e7be843bSPierre Pronchery 1489*e7be843bSPierre Pronchery// This branch calculates the last two tweaks, 1490*e7be843bSPierre Pronchery// while the encryption/decryption length is larger than 32 1491*e7be843bSPierre Pronchery.last_2blks_tweak${std}: 1492*e7be843bSPierre Pronchery ld1 {@tweak[0].4s},[$ivp] 1493*e7be843bSPierre Pronchery___ 1494*e7be843bSPierre Pronchery &rev32_armeb(@tweak[0],@tweak[0]); 1495*e7be843bSPierre Pronchery &compute_tweak_vec(@tweak[0],@tweak[1],$std); 1496*e7be843bSPierre Pronchery &compute_tweak_vec(@tweak[1],@tweak[2],$std); 1497*e7be843bSPierre Pronchery$code.=<<___; 1498*e7be843bSPierre Pronchery b .check_dec${std} 1499*e7be843bSPierre Pronchery 1500*e7be843bSPierre Pronchery 1501*e7be843bSPierre Pronchery// This branch calculates the last two tweaks, 1502*e7be843bSPierre Pronchery// while the encryption/decryption length is equal to 32, who only need two tweaks 1503*e7be843bSPierre Pronchery.only_2blks_tweak${std}: 1504*e7be843bSPierre Pronchery mov @tweak[1].16b,@tweak[0].16b 1505*e7be843bSPierre Pronchery___ 1506*e7be843bSPierre Pronchery &rev32_armeb(@tweak[1],@tweak[1]); 1507*e7be843bSPierre Pronchery &compute_tweak_vec(@tweak[1],@tweak[2],$std); 1508*e7be843bSPierre Pronchery$code.=<<___; 1509*e7be843bSPierre Pronchery b .check_dec${std} 1510*e7be843bSPierre Pronchery 1511*e7be843bSPierre Pronchery 1512*e7be843bSPierre Pronchery// Determine whether encryption or decryption is required. 1513*e7be843bSPierre Pronchery// The last two tweaks need to be swapped for decryption. 1514*e7be843bSPierre Pronchery.check_dec${std}: 1515*e7be843bSPierre Pronchery // encryption:1 decryption:0 1516*e7be843bSPierre Pronchery cmp $enc,1 1517*e7be843bSPierre Pronchery b.eq .process_last_2blks${std} 1518*e7be843bSPierre Pronchery mov @vtmp[0].16B,@tweak[1].16b 1519*e7be843bSPierre Pronchery mov @tweak[1].16B,@tweak[2].16b 1520*e7be843bSPierre Pronchery mov @tweak[2].16B,@vtmp[0].16b 1521*e7be843bSPierre Pronchery 1522*e7be843bSPierre Pronchery.process_last_2blks${std}: 1523*e7be843bSPierre Pronchery___ 1524*e7be843bSPierre Pronchery &rev32_armeb(@tweak[1],@tweak[1]); 1525*e7be843bSPierre Pronchery &rev32_armeb(@tweak[2],@tweak[2]); 1526*e7be843bSPierre Pronchery$code.=<<___; 1527*e7be843bSPierre Pronchery ld1 {@data[0].4s},[$inp],#16 1528*e7be843bSPierre Pronchery eor @data[0].16b, @data[0].16b, @tweak[1].16b 1529*e7be843bSPierre Pronchery___ 1530*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 1531*e7be843bSPierre Pronchery &encrypt_1blk(@data[0]); 1532*e7be843bSPierre Pronchery$code.=<<___; 1533*e7be843bSPierre Pronchery eor @data[0].16b, @data[0].16b, @tweak[1].16b 1534*e7be843bSPierre Pronchery st1 {@data[0].4s},[$outp],#16 1535*e7be843bSPierre Pronchery 1536*e7be843bSPierre Pronchery sub $lastBlk,$outp,16 1537*e7be843bSPierre Pronchery .loop${std}: 1538*e7be843bSPierre Pronchery subs $remain,$remain,1 1539*e7be843bSPierre Pronchery ldrb $wtmp0,[$lastBlk,$remain] 1540*e7be843bSPierre Pronchery ldrb $wtmp1,[$inp,$remain] 1541*e7be843bSPierre Pronchery strb $wtmp1,[$lastBlk,$remain] 1542*e7be843bSPierre Pronchery strb $wtmp0,[$outp,$remain] 1543*e7be843bSPierre Pronchery b.gt .loop${std} 1544*e7be843bSPierre Pronchery ld1 {@data[0].4s}, [$lastBlk] 1545*e7be843bSPierre Pronchery eor @data[0].16b, @data[0].16b, @tweak[2].16b 1546*e7be843bSPierre Pronchery___ 1547*e7be843bSPierre Pronchery &rev32(@data[0],@data[0]); 1548*e7be843bSPierre Pronchery &encrypt_1blk(@data[0]); 1549*e7be843bSPierre Pronchery$code.=<<___; 1550*e7be843bSPierre Pronchery eor @data[0].16b, @data[0].16b, @tweak[2].16b 1551*e7be843bSPierre Pronchery st1 {@data[0].4s}, [$lastBlk] 1552*e7be843bSPierre Pronchery.return${std}: 1553*e7be843bSPierre Pronchery ldp d14, d15, [sp], #0x10 1554*e7be843bSPierre Pronchery ldp d12, d13, [sp], #0x10 1555*e7be843bSPierre Pronchery ldp d10, d11, [sp], #0x10 1556*e7be843bSPierre Pronchery ldp d8, d9, [sp], #0x10 1557*e7be843bSPierre Pronchery ldp x29, x30, [sp], #0x10 1558*e7be843bSPierre Pronchery ldp x27, x28, [sp], #0x10 1559*e7be843bSPierre Pronchery ldp x25, x26, [sp], #0x10 1560*e7be843bSPierre Pronchery ldp x23, x24, [sp], #0x10 1561*e7be843bSPierre Pronchery ldp x21, x22, [sp], #0x10 1562*e7be843bSPierre Pronchery ldp x19, x20, [sp], #0x10 1563*e7be843bSPierre Pronchery ldp x17, x18, [sp], #0x10 1564*e7be843bSPierre Pronchery ldp x15, x16, [sp], #0x10 1565*e7be843bSPierre Pronchery AARCH64_VALIDATE_LINK_REGISTER 1566*e7be843bSPierre Pronchery ret 1567*e7be843bSPierre Pronchery.size ${prefix}_xts_encrypt${std},.-${prefix}_xts_encrypt${std} 1568*e7be843bSPierre Pronchery___ 1569*e7be843bSPierre Pronchery} # end of gen_xts_cipher 1570*e7be843bSPierre Pronchery&gen_xts_cipher("_gb"); 1571*e7be843bSPierre Pronchery&gen_xts_cipher(""); 1572*e7be843bSPierre Pronchery}}} 1573*e7be843bSPierre Pronchery######################################## 1574*e7be843bSPierre Proncheryopen SELF,$0; 1575*e7be843bSPierre Proncherywhile(<SELF>) { 1576*e7be843bSPierre Pronchery next if (/^#!/); 1577*e7be843bSPierre Pronchery last if (!s/^#/\/\// and !/^$/); 1578*e7be843bSPierre Pronchery print; 1579*e7be843bSPierre Pronchery} 1580*e7be843bSPierre Proncheryclose SELF; 1581*e7be843bSPierre Pronchery 1582*e7be843bSPierre Proncheryforeach(split("\n",$code)) { 1583*e7be843bSPierre Pronchery s/\`([^\`]*)\`/eval($1)/ge; 1584*e7be843bSPierre Pronchery print $_,"\n"; 1585*e7be843bSPierre Pronchery} 1586*e7be843bSPierre Pronchery 1587*e7be843bSPierre Proncheryclose STDOUT or die "error closing STDOUT: $!"; 1588