1#! /usr/bin/env perl 2# Copyright 2021-2025 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8# 9# This module implements support for Armv8 SM3 instructions 10 11# $output is the last argument if it looks like a file (it has an extension) 12# $flavour is the first argument if it doesn't look like a file 13$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 14$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 15 16$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 17( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or 18( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or 19die "can't locate arm-xlate.pl"; 20 21open OUT,"| \"$^X\" $xlate $flavour \"$output\"" 22 or die "can't call $xlate: $!"; 23*STDOUT=*OUT; 24 25$prefix="sm3"; 26# Message expanding: 27# Wj <- P1(W[j-16]^W[j-9]^(W[j-3]<<<15))^(W[j-13]<<<7)^W[j-6] 28# Input: s0, s1, s2, s3 29# s0 = w0 | w1 | w2 | w3 30# s1 = w4 | w5 | w6 | w7 31# s2 = w8 | w9 | w10 | w11 32# s3 = w12 | w13 | w14 | w15 33# Output: s4 34sub msg_exp () { 35my $s0 = shift; 36my $s1 = shift; 37my $s2 = shift; 38my $s3 = shift; 39my $s4 = shift; 40my $vtmp1 = shift; 41my $vtmp2 = shift; 42$code.=<<___; 43 // s4 = w7 | w8 | w9 | w10 44 ext $s4.16b, $s1.16b, $s2.16b, #12 45 // vtmp1 = w3 | w4 | w5 | w6 46 ext $vtmp1.16b, $s0.16b, $s1.16b, #12 47 // vtmp2 = w10 | w11 | w12 | w13 48 ext $vtmp2.16b, $s2.16b, $s3.16b, #8 49 sm3partw1 $s4.4s, $s0.4s, $s3.4s 50 sm3partw2 $s4.4s, $vtmp2.4s, $vtmp1.4s 51___ 52} 53 54# A round of compresson function 55# Input: 56# ab - choose instruction among sm3tt1a, sm3tt1b, sm3tt2a, sm3tt2b 57# vstate0 - vstate1, store digest status(A - H) 58# vconst0 - vconst1, interleaved used to store Tj <<< j 59# vtmp - temporary register 60# vw - for sm3tt1ab, vw = s0 eor s1 61# s0 - for sm3tt2ab, just be s0 62# i, choose wj' or wj from vw 63sub round () { 64my $ab = shift; 65my $vstate0 = shift; 66my $vstate1 = shift; 67my $vconst0 = shift; 68my $vconst1 = shift; 69my $vtmp = shift; 70my $vw = shift; 71my $s0 = shift; 72my $i = shift; 73$code.=<<___; 74 sm3ss1 $vtmp.4s, $vstate0.4s, $vconst0.4s, $vstate1.4s 75 shl $vconst1.4s, $vconst0.4s, #1 76 sri $vconst1.4s, $vconst0.4s, #31 77 sm3tt1$ab $vstate0.4s, $vtmp.4s, $vw.4s[$i] 78 sm3tt2$ab $vstate1.4s, $vtmp.4s, $s0.4s[$i] 79___ 80} 81 82sub qround () { 83my $ab = shift; 84my $vstate0 = shift; 85my $vstate1 = shift; 86my $vconst0 = shift; 87my $vconst1 = shift; 88my $vtmp1 = shift; 89my $vtmp2 = shift; 90my $s0 = shift; 91my $s1 = shift; 92my $s2 = shift; 93my $s3 = shift; 94my $s4 = shift; 95 if($s4) { 96 &msg_exp($s0, $s1, $s2, $s3, $s4, $vtmp1, $vtmp2); 97 } 98$code.=<<___; 99 eor $vtmp1.16b, $s0.16b, $s1.16b 100___ 101 &round($ab, $vstate0, $vstate1, $vconst0, $vconst1, $vtmp2, 102 $vtmp1, $s0, 0); 103 &round($ab, $vstate0, $vstate1, $vconst1, $vconst0, $vtmp2, 104 $vtmp1, $s0, 1); 105 &round($ab, $vstate0, $vstate1, $vconst0, $vconst1, $vtmp2, 106 $vtmp1, $s0, 2); 107 &round($ab, $vstate0, $vstate1, $vconst1, $vconst0, $vtmp2, 108 $vtmp1, $s0, 3); 109} 110 111$code=<<___; 112#include "arm_arch.h" 113.text 114___ 115 116{{{ 117my ($pstate,$pdata,$num)=("x0","x1","w2"); 118my ($state1,$state2)=("v5","v6"); 119my ($sconst1, $sconst2)=("s16","s17"); 120my ($vconst1, $vconst2)=("v16","v17"); 121my ($s0,$s1,$s2,$s3,$s4)=map("v$_",(0..4)); 122my ($bkstate1,$bkstate2)=("v18","v19"); 123my ($vconst_tmp1,$vconst_tmp2)=("v20","v21"); 124my ($vtmp1,$vtmp2)=("v22","v23"); 125my $constaddr="x8"; 126# void ossl_hwsm3_block_data_order(SM3_CTX *c, const void *p, size_t num) 127$code.=<<___; 128.globl ossl_hwsm3_block_data_order 129.type ossl_hwsm3_block_data_order,%function 130.align 5 131ossl_hwsm3_block_data_order: 132 AARCH64_VALID_CALL_TARGET 133 // load state 134 ld1 {$state1.4s-$state2.4s}, [$pstate] 135 rev64 $state1.4s, $state1.4s 136 rev64 $state2.4s, $state2.4s 137 ext $state1.16b, $state1.16b, $state1.16b, #8 138 ext $state2.16b, $state2.16b, $state2.16b, #8 139___ 140if ($flavour =~ /linux64/) 141{ 142$code.=<<___; 143 adrp $constaddr, .Tj 144 add $constaddr, $constaddr, #:lo12:.Tj 145___ 146} else { 147$code.=<<___; 148 adr $constaddr, .Tj 149___ 150} 151$code.=<<___; 152 ldp $sconst1, $sconst2, [$constaddr] 153 154.Loop: 155 // load input 156 ld1 {$s0.4s-$s3.4s}, [$pdata], #64 157 sub $num, $num, #1 158 159 mov $bkstate1.16b, $state1.16b 160 mov $bkstate2.16b, $state2.16b 161 162#ifndef __AARCH64EB__ 163 rev32 $s0.16b, $s0.16b 164 rev32 $s1.16b, $s1.16b 165 rev32 $s2.16b, $s2.16b 166 rev32 $s3.16b, $s3.16b 167#endif 168 169 ext $vconst_tmp1.16b, $vconst1.16b, $vconst1.16b, #4 170___ 171 &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 172 $s0,$s1,$s2,$s3,$s4); 173 &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 174 $s1,$s2,$s3,$s4,$s0); 175 &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 176 $s2,$s3,$s4,$s0,$s1); 177 &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 178 $s3,$s4,$s0,$s1,$s2); 179 180$code.=<<___; 181 ext $vconst_tmp1.16b, $vconst2.16b, $vconst2.16b, #4 182___ 183 184 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 185 $s4,$s0,$s1,$s2,$s3); 186 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 187 $s0,$s1,$s2,$s3,$s4); 188 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 189 $s1,$s2,$s3,$s4,$s0); 190 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 191 $s2,$s3,$s4,$s0,$s1); 192 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 193 $s3,$s4,$s0,$s1,$s2); 194 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 195 $s4,$s0,$s1,$s2,$s3); 196 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 197 $s0,$s1,$s2,$s3,$s4); 198 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 199 $s1,$s2,$s3,$s4,$s0); 200 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 201 $s2,$s3,$s4,$s0,$s1); 202 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 203 $s3,$s4); 204 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 205 $s4,$s0); 206 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 207 $s0,$s1); 208 209$code.=<<___; 210 eor $state1.16b, $state1.16b, $bkstate1.16b 211 eor $state2.16b, $state2.16b, $bkstate2.16b 212 213 // any remained blocks? 214 cbnz $num, .Loop 215 216 // save state 217 rev64 $state1.4s, $state1.4s 218 rev64 $state2.4s, $state2.4s 219 ext $state1.16b, $state1.16b, $state1.16b, #8 220 ext $state2.16b, $state2.16b, $state2.16b, #8 221 st1 {$state1.4s-$state2.4s}, [$pstate] 222 ret 223.size ossl_hwsm3_block_data_order,.-ossl_hwsm3_block_data_order 224___ 225 226$code.=".rodata\n" if ($flavour =~ /linux64/); 227 228$code.=<<___; 229 230.type _${prefix}_consts,%object 231.align 3 232_${prefix}_consts: 233.Tj: 234.word 0x79cc4519, 0x9d8a7a87 235.size _${prefix}_consts,.-_${prefix}_consts 236___ 237 238$code.=".previous\n" if ($flavour =~ /linux64/); 239 240}}} 241 242######################################### 243my %sm3partopcode = ( 244 "sm3partw1" => 0xce60C000, 245 "sm3partw2" => 0xce60C400); 246 247my %sm3ss1opcode = ( 248 "sm3ss1" => 0xce400000); 249 250my %sm3ttopcode = ( 251 "sm3tt1a" => 0xce408000, 252 "sm3tt1b" => 0xce408400, 253 "sm3tt2a" => 0xce408800, 254 "sm3tt2b" => 0xce408C00); 255 256sub unsm3part { 257 my ($mnemonic,$arg)=@_; 258 259 $arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)/o 260 && 261 sprintf ".inst\t0x%08x\t//%s %s", 262 $sm3partopcode{$mnemonic}|$1|($2<<5)|($3<<16), 263 $mnemonic,$arg; 264} 265 266sub unsm3ss1 { 267 my ($mnemonic,$arg)=@_; 268 269 $arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)/o 270 && 271 sprintf ".inst\t0x%08x\t//%s %s", 272 $sm3ss1opcode{$mnemonic}|$1|($2<<5)|($3<<16)|($4<<10), 273 $mnemonic,$arg; 274} 275 276sub unsm3tt { 277 my ($mnemonic,$arg)=@_; 278 279 $arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*\[([0-3])\]/o 280 && 281 sprintf ".inst\t0x%08x\t//%s %s", 282 $sm3ttopcode{$mnemonic}|$1|($2<<5)|($3<<16)|($4<<12), 283 $mnemonic,$arg; 284} 285 286open SELF,$0; 287while(<SELF>) { 288 next if (/^#!/); 289 last if (!s/^#/\/\// and !/^$/); 290 print; 291} 292close SELF; 293 294foreach(split("\n",$code)) { 295 s/\`([^\`]*)\`/eval($1)/ge; 296 297 s/\b(sm3partw[1-2])\s+([qv].*)/unsm3part($1,$2)/ge; 298 s/\b(sm3ss1)\s+([qv].*)/unsm3ss1($1,$2)/ge; 299 s/\b(sm3tt[1-2][a-b])\s+([qv].*)/unsm3tt($1,$2)/ge; 300 print $_,"\n"; 301} 302 303close STDOUT or die "error closing STDOUT: $!"; 304