xref: /freebsd/crypto/openssl/crypto/bn/asm/c64xplus-gf2m.pl (revision b077aed33b7b6aefca7b17ddb250cf521f938613)
1e71b7053SJung-uk Kim#! /usr/bin/env perl
217f01e99SJung-uk Kim# Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
3e71b7053SJung-uk Kim#
4*b077aed3SPierre Pronchery# Licensed under the Apache License 2.0 (the "License").  You may not use
5e71b7053SJung-uk Kim# this file except in compliance with the License.  You can obtain a copy
6e71b7053SJung-uk Kim# in the file LICENSE in the source distribution or at
7e71b7053SJung-uk Kim# https://www.openssl.org/source/license.html
8e71b7053SJung-uk Kim
9e71b7053SJung-uk Kim#
10e71b7053SJung-uk Kim# ====================================================================
11e71b7053SJung-uk Kim# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12e71b7053SJung-uk Kim# project. The module is, however, dual licensed under OpenSSL and
13e71b7053SJung-uk Kim# CRYPTOGAMS licenses depending on where you obtain it. For further
14e71b7053SJung-uk Kim# details see http://www.openssl.org/~appro/cryptogams/.
15e71b7053SJung-uk Kim# ====================================================================
16e71b7053SJung-uk Kim#
17e71b7053SJung-uk Kim# February 2012
18e71b7053SJung-uk Kim#
19e71b7053SJung-uk Kim# The module implements bn_GF2m_mul_2x2 polynomial multiplication
20e71b7053SJung-uk Kim# used in bn_gf2m.c. It's kind of low-hanging mechanical port from
21e71b7053SJung-uk Kim# C for the time being... The subroutine runs in 37 cycles, which is
22e71b7053SJung-uk Kim# 4.5x faster than compiler-generated code. Though comparison is
23e71b7053SJung-uk Kim# totally unfair, because this module utilizes Galois Field Multiply
24e71b7053SJung-uk Kim# instruction.
25e71b7053SJung-uk Kim
26*b077aed3SPierre Pronchery$output = pop and open STDOUT,">$output";
27e71b7053SJung-uk Kim
28e71b7053SJung-uk Kim($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8");   # argument vector
29e71b7053SJung-uk Kim
30e71b7053SJung-uk Kim($Alo,$Alox0,$Alox1,$Alox2,$Alox3)=map("A$_",(16..20));
31e71b7053SJung-uk Kim($Ahi,$Ahix0,$Ahix1,$Ahix2,$Ahix3)=map("B$_",(16..20));
32e71b7053SJung-uk Kim($B_0,$B_1,$B_2,$B_3)=("B5","A5","A7","B7");
33e71b7053SJung-uk Kim($A,$B)=($Alo,$B_1);
34e71b7053SJung-uk Kim$xFF="B1";
35e71b7053SJung-uk Kim
36e71b7053SJung-uk Kimsub mul_1x1_upper {
37e71b7053SJung-uk Kimmy ($A,$B)=@_;
38e71b7053SJung-uk Kim$code.=<<___;
39e71b7053SJung-uk Kim	EXTU	$B,8,24,$B_2		; smash $B to 4 bytes
40e71b7053SJung-uk Kim||	AND	$B,$xFF,$B_0
41e71b7053SJung-uk Kim||	SHRU	$B,24,$B_3
42e71b7053SJung-uk Kim	SHRU	$A,16,   $Ahi		; smash $A to two halfwords
43e71b7053SJung-uk Kim||	EXTU	$A,16,16,$Alo
44e71b7053SJung-uk Kim
45e71b7053SJung-uk Kim	XORMPY	$Alo,$B_2,$Alox2	; 16x8 bits multiplication
46e71b7053SJung-uk Kim||	XORMPY	$Ahi,$B_2,$Ahix2
47e71b7053SJung-uk Kim||	EXTU	$B,16,24,$B_1
48e71b7053SJung-uk Kim	XORMPY	$Alo,$B_0,$Alox0
49e71b7053SJung-uk Kim||	XORMPY	$Ahi,$B_0,$Ahix0
50e71b7053SJung-uk Kim	XORMPY	$Alo,$B_3,$Alox3
51e71b7053SJung-uk Kim||	XORMPY	$Ahi,$B_3,$Ahix3
52e71b7053SJung-uk Kim	XORMPY	$Alo,$B_1,$Alox1
53e71b7053SJung-uk Kim||	XORMPY	$Ahi,$B_1,$Ahix1
54e71b7053SJung-uk Kim___
55e71b7053SJung-uk Kim}
56e71b7053SJung-uk Kimsub mul_1x1_merged {
57e71b7053SJung-uk Kimmy ($OUTlo,$OUThi,$A,$B)=@_;
58e71b7053SJung-uk Kim$code.=<<___;
59e71b7053SJung-uk Kim	 EXTU	$B,8,24,$B_2		; smash $B to 4 bytes
60e71b7053SJung-uk Kim||	 AND	$B,$xFF,$B_0
61e71b7053SJung-uk Kim||	 SHRU	$B,24,$B_3
62e71b7053SJung-uk Kim	 SHRU	$A,16,   $Ahi		; smash $A to two halfwords
63e71b7053SJung-uk Kim||	 EXTU	$A,16,16,$Alo
64e71b7053SJung-uk Kim
65e71b7053SJung-uk Kim	XOR	$Ahix0,$Alox2,$Ahix0
66e71b7053SJung-uk Kim||	MV	$Ahix2,$OUThi
67e71b7053SJung-uk Kim||	 XORMPY	$Alo,$B_2,$Alox2
68e71b7053SJung-uk Kim	 XORMPY	$Ahi,$B_2,$Ahix2
69e71b7053SJung-uk Kim||	 EXTU	$B,16,24,$B_1
70e71b7053SJung-uk Kim||	 XORMPY	$Alo,$B_0,A1		; $Alox0
71e71b7053SJung-uk Kim	XOR	$Ahix1,$Alox3,$Ahix1
72e71b7053SJung-uk Kim||	SHL	$Ahix0,16,$OUTlo
73e71b7053SJung-uk Kim||	SHRU	$Ahix0,16,$Ahix0
74e71b7053SJung-uk Kim	XOR	$Alox0,$OUTlo,$OUTlo
75e71b7053SJung-uk Kim||	XOR	$Ahix0,$OUThi,$OUThi
76e71b7053SJung-uk Kim||	 XORMPY	$Ahi,$B_0,$Ahix0
77e71b7053SJung-uk Kim||	 XORMPY	$Alo,$B_3,$Alox3
78e71b7053SJung-uk Kim||	SHL	$Alox1,8,$Alox1
79e71b7053SJung-uk Kim||	SHL	$Ahix3,8,$Ahix3
80e71b7053SJung-uk Kim	XOR	$Alox1,$OUTlo,$OUTlo
81e71b7053SJung-uk Kim||	XOR	$Ahix3,$OUThi,$OUThi
82e71b7053SJung-uk Kim||	 XORMPY	$Ahi,$B_3,$Ahix3
83e71b7053SJung-uk Kim||	SHL	$Ahix1,24,$Alox1
84e71b7053SJung-uk Kim||	SHRU	$Ahix1,8, $Ahix1
85e71b7053SJung-uk Kim	XOR	$Alox1,$OUTlo,$OUTlo
86e71b7053SJung-uk Kim||	XOR	$Ahix1,$OUThi,$OUThi
87e71b7053SJung-uk Kim||	 XORMPY	$Alo,$B_1,$Alox1
88e71b7053SJung-uk Kim||	 XORMPY	$Ahi,$B_1,$Ahix1
89e71b7053SJung-uk Kim||	 MV	A1,$Alox0
90e71b7053SJung-uk Kim___
91e71b7053SJung-uk Kim}
92e71b7053SJung-uk Kimsub mul_1x1_lower {
93e71b7053SJung-uk Kimmy ($OUTlo,$OUThi)=@_;
94e71b7053SJung-uk Kim$code.=<<___;
95e71b7053SJung-uk Kim	;NOP
96e71b7053SJung-uk Kim	XOR	$Ahix0,$Alox2,$Ahix0
97e71b7053SJung-uk Kim||	MV	$Ahix2,$OUThi
98e71b7053SJung-uk Kim	NOP
99e71b7053SJung-uk Kim	XOR	$Ahix1,$Alox3,$Ahix1
100e71b7053SJung-uk Kim||	SHL	$Ahix0,16,$OUTlo
101e71b7053SJung-uk Kim||	SHRU	$Ahix0,16,$Ahix0
102e71b7053SJung-uk Kim	XOR	$Alox0,$OUTlo,$OUTlo
103e71b7053SJung-uk Kim||	XOR	$Ahix0,$OUThi,$OUThi
104e71b7053SJung-uk Kim||	SHL	$Alox1,8,$Alox1
105e71b7053SJung-uk Kim||	SHL	$Ahix3,8,$Ahix3
106e71b7053SJung-uk Kim	XOR	$Alox1,$OUTlo,$OUTlo
107e71b7053SJung-uk Kim||	XOR	$Ahix3,$OUThi,$OUThi
108e71b7053SJung-uk Kim||	SHL	$Ahix1,24,$Alox1
109e71b7053SJung-uk Kim||	SHRU	$Ahix1,8, $Ahix1
110e71b7053SJung-uk Kim	XOR	$Alox1,$OUTlo,$OUTlo
111e71b7053SJung-uk Kim||	XOR	$Ahix1,$OUThi,$OUThi
112e71b7053SJung-uk Kim___
113e71b7053SJung-uk Kim}
114e71b7053SJung-uk Kim$code.=<<___;
115e71b7053SJung-uk Kim	.text
116e71b7053SJung-uk Kim
117e71b7053SJung-uk Kim	.if	.ASSEMBLER_VERSION<7000000
118e71b7053SJung-uk Kim	.asg	0,__TI_EABI__
119e71b7053SJung-uk Kim	.endif
120e71b7053SJung-uk Kim	.if	__TI_EABI__
121e71b7053SJung-uk Kim	.asg	bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2
122e71b7053SJung-uk Kim	.endif
123e71b7053SJung-uk Kim
124e71b7053SJung-uk Kim	.global	_bn_GF2m_mul_2x2
125e71b7053SJung-uk Kim_bn_GF2m_mul_2x2:
126e71b7053SJung-uk Kim	.asmfunc
127e71b7053SJung-uk Kim	MVK	0xFF,$xFF
128e71b7053SJung-uk Kim___
129e71b7053SJung-uk Kim	&mul_1x1_upper($a0,$b0);		# a0·b0
130e71b7053SJung-uk Kim$code.=<<___;
131e71b7053SJung-uk Kim||	MV	$b1,$B
132e71b7053SJung-uk Kim	MV	$a1,$A
133e71b7053SJung-uk Kim___
134e71b7053SJung-uk Kim	&mul_1x1_merged("A28","B28",$A,$B);	# a0·b0/a1·b1
135e71b7053SJung-uk Kim$code.=<<___;
136e71b7053SJung-uk Kim||	XOR	$b0,$b1,$B
137e71b7053SJung-uk Kim	XOR	$a0,$a1,$A
138e71b7053SJung-uk Kim___
139e71b7053SJung-uk Kim	&mul_1x1_merged("A31","B31",$A,$B);	# a1·b1/(a0+a1)·(b0+b1)
140e71b7053SJung-uk Kim$code.=<<___;
141e71b7053SJung-uk Kim	XOR	A28,A31,A29
142e71b7053SJung-uk Kim||	XOR	B28,B31,B29			; a0·b0+a1·b1
143e71b7053SJung-uk Kim___
144e71b7053SJung-uk Kim	&mul_1x1_lower("A30","B30");		# (a0+a1)·(b0+b1)
145e71b7053SJung-uk Kim$code.=<<___;
146e71b7053SJung-uk Kim||	BNOP	B3
147e71b7053SJung-uk Kim	XOR	A29,A30,A30
148e71b7053SJung-uk Kim||	XOR	B29,B30,B30			; (a0+a1)·(b0+b1)-a0·b0-a1·b1
149e71b7053SJung-uk Kim	XOR	B28,A30,A30
150e71b7053SJung-uk Kim||	STW	A28,*${rp}[0]
151e71b7053SJung-uk Kim	XOR	B30,A31,A31
152e71b7053SJung-uk Kim||	STW	A30,*${rp}[1]
153e71b7053SJung-uk Kim	STW	A31,*${rp}[2]
154e71b7053SJung-uk Kim	STW	B31,*${rp}[3]
155e71b7053SJung-uk Kim	.endasmfunc
156e71b7053SJung-uk Kim___
157e71b7053SJung-uk Kim
158e71b7053SJung-uk Kimprint $code;
15917f01e99SJung-uk Kimclose STDOUT or die "error closing STDOUT: $!";
160