1e71b7053SJung-uk Kim#! /usr/bin/env perl 217f01e99SJung-uk Kim# Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved. 3e71b7053SJung-uk Kim# 4*b077aed3SPierre Pronchery# Licensed under the Apache License 2.0 (the "License"). You may not use 5e71b7053SJung-uk Kim# this file except in compliance with the License. You can obtain a copy 6e71b7053SJung-uk Kim# in the file LICENSE in the source distribution or at 7e71b7053SJung-uk Kim# https://www.openssl.org/source/license.html 8e71b7053SJung-uk Kim 9e71b7053SJung-uk Kim# 10e71b7053SJung-uk Kim# ==================================================================== 11e71b7053SJung-uk Kim# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12e71b7053SJung-uk Kim# project. The module is, however, dual licensed under OpenSSL and 13e71b7053SJung-uk Kim# CRYPTOGAMS licenses depending on where you obtain it. For further 14e71b7053SJung-uk Kim# details see http://www.openssl.org/~appro/cryptogams/. 15e71b7053SJung-uk Kim# ==================================================================== 16e71b7053SJung-uk Kim# 17e71b7053SJung-uk Kim# February 2012 18e71b7053SJung-uk Kim# 19e71b7053SJung-uk Kim# The module implements bn_GF2m_mul_2x2 polynomial multiplication 20e71b7053SJung-uk Kim# used in bn_gf2m.c. It's kind of low-hanging mechanical port from 21e71b7053SJung-uk Kim# C for the time being... The subroutine runs in 37 cycles, which is 22e71b7053SJung-uk Kim# 4.5x faster than compiler-generated code. Though comparison is 23e71b7053SJung-uk Kim# totally unfair, because this module utilizes Galois Field Multiply 24e71b7053SJung-uk Kim# instruction. 25e71b7053SJung-uk Kim 26*b077aed3SPierre Pronchery$output = pop and open STDOUT,">$output"; 27e71b7053SJung-uk Kim 28e71b7053SJung-uk Kim($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8"); # argument vector 29e71b7053SJung-uk Kim 30e71b7053SJung-uk Kim($Alo,$Alox0,$Alox1,$Alox2,$Alox3)=map("A$_",(16..20)); 31e71b7053SJung-uk Kim($Ahi,$Ahix0,$Ahix1,$Ahix2,$Ahix3)=map("B$_",(16..20)); 32e71b7053SJung-uk Kim($B_0,$B_1,$B_2,$B_3)=("B5","A5","A7","B7"); 33e71b7053SJung-uk Kim($A,$B)=($Alo,$B_1); 34e71b7053SJung-uk Kim$xFF="B1"; 35e71b7053SJung-uk Kim 36e71b7053SJung-uk Kimsub mul_1x1_upper { 37e71b7053SJung-uk Kimmy ($A,$B)=@_; 38e71b7053SJung-uk Kim$code.=<<___; 39e71b7053SJung-uk Kim EXTU $B,8,24,$B_2 ; smash $B to 4 bytes 40e71b7053SJung-uk Kim|| AND $B,$xFF,$B_0 41e71b7053SJung-uk Kim|| SHRU $B,24,$B_3 42e71b7053SJung-uk Kim SHRU $A,16, $Ahi ; smash $A to two halfwords 43e71b7053SJung-uk Kim|| EXTU $A,16,16,$Alo 44e71b7053SJung-uk Kim 45e71b7053SJung-uk Kim XORMPY $Alo,$B_2,$Alox2 ; 16x8 bits multiplication 46e71b7053SJung-uk Kim|| XORMPY $Ahi,$B_2,$Ahix2 47e71b7053SJung-uk Kim|| EXTU $B,16,24,$B_1 48e71b7053SJung-uk Kim XORMPY $Alo,$B_0,$Alox0 49e71b7053SJung-uk Kim|| XORMPY $Ahi,$B_0,$Ahix0 50e71b7053SJung-uk Kim XORMPY $Alo,$B_3,$Alox3 51e71b7053SJung-uk Kim|| XORMPY $Ahi,$B_3,$Ahix3 52e71b7053SJung-uk Kim XORMPY $Alo,$B_1,$Alox1 53e71b7053SJung-uk Kim|| XORMPY $Ahi,$B_1,$Ahix1 54e71b7053SJung-uk Kim___ 55e71b7053SJung-uk Kim} 56e71b7053SJung-uk Kimsub mul_1x1_merged { 57e71b7053SJung-uk Kimmy ($OUTlo,$OUThi,$A,$B)=@_; 58e71b7053SJung-uk Kim$code.=<<___; 59e71b7053SJung-uk Kim EXTU $B,8,24,$B_2 ; smash $B to 4 bytes 60e71b7053SJung-uk Kim|| AND $B,$xFF,$B_0 61e71b7053SJung-uk Kim|| SHRU $B,24,$B_3 62e71b7053SJung-uk Kim SHRU $A,16, $Ahi ; smash $A to two halfwords 63e71b7053SJung-uk Kim|| EXTU $A,16,16,$Alo 64e71b7053SJung-uk Kim 65e71b7053SJung-uk Kim XOR $Ahix0,$Alox2,$Ahix0 66e71b7053SJung-uk Kim|| MV $Ahix2,$OUThi 67e71b7053SJung-uk Kim|| XORMPY $Alo,$B_2,$Alox2 68e71b7053SJung-uk Kim XORMPY $Ahi,$B_2,$Ahix2 69e71b7053SJung-uk Kim|| EXTU $B,16,24,$B_1 70e71b7053SJung-uk Kim|| XORMPY $Alo,$B_0,A1 ; $Alox0 71e71b7053SJung-uk Kim XOR $Ahix1,$Alox3,$Ahix1 72e71b7053SJung-uk Kim|| SHL $Ahix0,16,$OUTlo 73e71b7053SJung-uk Kim|| SHRU $Ahix0,16,$Ahix0 74e71b7053SJung-uk Kim XOR $Alox0,$OUTlo,$OUTlo 75e71b7053SJung-uk Kim|| XOR $Ahix0,$OUThi,$OUThi 76e71b7053SJung-uk Kim|| XORMPY $Ahi,$B_0,$Ahix0 77e71b7053SJung-uk Kim|| XORMPY $Alo,$B_3,$Alox3 78e71b7053SJung-uk Kim|| SHL $Alox1,8,$Alox1 79e71b7053SJung-uk Kim|| SHL $Ahix3,8,$Ahix3 80e71b7053SJung-uk Kim XOR $Alox1,$OUTlo,$OUTlo 81e71b7053SJung-uk Kim|| XOR $Ahix3,$OUThi,$OUThi 82e71b7053SJung-uk Kim|| XORMPY $Ahi,$B_3,$Ahix3 83e71b7053SJung-uk Kim|| SHL $Ahix1,24,$Alox1 84e71b7053SJung-uk Kim|| SHRU $Ahix1,8, $Ahix1 85e71b7053SJung-uk Kim XOR $Alox1,$OUTlo,$OUTlo 86e71b7053SJung-uk Kim|| XOR $Ahix1,$OUThi,$OUThi 87e71b7053SJung-uk Kim|| XORMPY $Alo,$B_1,$Alox1 88e71b7053SJung-uk Kim|| XORMPY $Ahi,$B_1,$Ahix1 89e71b7053SJung-uk Kim|| MV A1,$Alox0 90e71b7053SJung-uk Kim___ 91e71b7053SJung-uk Kim} 92e71b7053SJung-uk Kimsub mul_1x1_lower { 93e71b7053SJung-uk Kimmy ($OUTlo,$OUThi)=@_; 94e71b7053SJung-uk Kim$code.=<<___; 95e71b7053SJung-uk Kim ;NOP 96e71b7053SJung-uk Kim XOR $Ahix0,$Alox2,$Ahix0 97e71b7053SJung-uk Kim|| MV $Ahix2,$OUThi 98e71b7053SJung-uk Kim NOP 99e71b7053SJung-uk Kim XOR $Ahix1,$Alox3,$Ahix1 100e71b7053SJung-uk Kim|| SHL $Ahix0,16,$OUTlo 101e71b7053SJung-uk Kim|| SHRU $Ahix0,16,$Ahix0 102e71b7053SJung-uk Kim XOR $Alox0,$OUTlo,$OUTlo 103e71b7053SJung-uk Kim|| XOR $Ahix0,$OUThi,$OUThi 104e71b7053SJung-uk Kim|| SHL $Alox1,8,$Alox1 105e71b7053SJung-uk Kim|| SHL $Ahix3,8,$Ahix3 106e71b7053SJung-uk Kim XOR $Alox1,$OUTlo,$OUTlo 107e71b7053SJung-uk Kim|| XOR $Ahix3,$OUThi,$OUThi 108e71b7053SJung-uk Kim|| SHL $Ahix1,24,$Alox1 109e71b7053SJung-uk Kim|| SHRU $Ahix1,8, $Ahix1 110e71b7053SJung-uk Kim XOR $Alox1,$OUTlo,$OUTlo 111e71b7053SJung-uk Kim|| XOR $Ahix1,$OUThi,$OUThi 112e71b7053SJung-uk Kim___ 113e71b7053SJung-uk Kim} 114e71b7053SJung-uk Kim$code.=<<___; 115e71b7053SJung-uk Kim .text 116e71b7053SJung-uk Kim 117e71b7053SJung-uk Kim .if .ASSEMBLER_VERSION<7000000 118e71b7053SJung-uk Kim .asg 0,__TI_EABI__ 119e71b7053SJung-uk Kim .endif 120e71b7053SJung-uk Kim .if __TI_EABI__ 121e71b7053SJung-uk Kim .asg bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2 122e71b7053SJung-uk Kim .endif 123e71b7053SJung-uk Kim 124e71b7053SJung-uk Kim .global _bn_GF2m_mul_2x2 125e71b7053SJung-uk Kim_bn_GF2m_mul_2x2: 126e71b7053SJung-uk Kim .asmfunc 127e71b7053SJung-uk Kim MVK 0xFF,$xFF 128e71b7053SJung-uk Kim___ 129e71b7053SJung-uk Kim &mul_1x1_upper($a0,$b0); # a0·b0 130e71b7053SJung-uk Kim$code.=<<___; 131e71b7053SJung-uk Kim|| MV $b1,$B 132e71b7053SJung-uk Kim MV $a1,$A 133e71b7053SJung-uk Kim___ 134e71b7053SJung-uk Kim &mul_1x1_merged("A28","B28",$A,$B); # a0·b0/a1·b1 135e71b7053SJung-uk Kim$code.=<<___; 136e71b7053SJung-uk Kim|| XOR $b0,$b1,$B 137e71b7053SJung-uk Kim XOR $a0,$a1,$A 138e71b7053SJung-uk Kim___ 139e71b7053SJung-uk Kim &mul_1x1_merged("A31","B31",$A,$B); # a1·b1/(a0+a1)·(b0+b1) 140e71b7053SJung-uk Kim$code.=<<___; 141e71b7053SJung-uk Kim XOR A28,A31,A29 142e71b7053SJung-uk Kim|| XOR B28,B31,B29 ; a0·b0+a1·b1 143e71b7053SJung-uk Kim___ 144e71b7053SJung-uk Kim &mul_1x1_lower("A30","B30"); # (a0+a1)·(b0+b1) 145e71b7053SJung-uk Kim$code.=<<___; 146e71b7053SJung-uk Kim|| BNOP B3 147e71b7053SJung-uk Kim XOR A29,A30,A30 148e71b7053SJung-uk Kim|| XOR B29,B30,B30 ; (a0+a1)·(b0+b1)-a0·b0-a1·b1 149e71b7053SJung-uk Kim XOR B28,A30,A30 150e71b7053SJung-uk Kim|| STW A28,*${rp}[0] 151e71b7053SJung-uk Kim XOR B30,A31,A31 152e71b7053SJung-uk Kim|| STW A30,*${rp}[1] 153e71b7053SJung-uk Kim STW A31,*${rp}[2] 154e71b7053SJung-uk Kim STW B31,*${rp}[3] 155e71b7053SJung-uk Kim .endasmfunc 156e71b7053SJung-uk Kim___ 157e71b7053SJung-uk Kim 158e71b7053SJung-uk Kimprint $code; 15917f01e99SJung-uk Kimclose STDOUT or die "error closing STDOUT: $!"; 160