1*4757b351SPierre Pronchery/* Do not modify. This file is auto-generated from sm3-armv8.pl. */ 2*4757b351SPierre Pronchery// Copyright 2021-2025 The OpenSSL Project Authors. All Rights Reserved. 3*4757b351SPierre Pronchery// 4*4757b351SPierre Pronchery// Licensed under the Apache License 2.0 (the "License"). You may not use 5*4757b351SPierre Pronchery// this file except in compliance with the License. You can obtain a copy 6*4757b351SPierre Pronchery// in the file LICENSE in the source distribution or at 7*4757b351SPierre Pronchery// https://www.openssl.org/source/license.html 8*4757b351SPierre Pronchery// 9*4757b351SPierre Pronchery// This module implements support for Armv8 SM3 instructions 10*4757b351SPierre Pronchery 11*4757b351SPierre Pronchery// $output is the last argument if it looks like a file (it has an extension) 12*4757b351SPierre Pronchery// $flavour is the first argument if it doesn't look like a file 13*4757b351SPierre Pronchery#include "arm_arch.h" 14*4757b351SPierre Pronchery.text 15*4757b351SPierre Pronchery.globl ossl_hwsm3_block_data_order 16*4757b351SPierre Pronchery.type ossl_hwsm3_block_data_order,%function 17*4757b351SPierre Pronchery.align 5 18*4757b351SPierre Proncheryossl_hwsm3_block_data_order: 19*4757b351SPierre Pronchery AARCH64_VALID_CALL_TARGET 20*4757b351SPierre Pronchery // load state 21*4757b351SPierre Pronchery ld1 {v5.4s,v6.4s}, [x0] 22*4757b351SPierre Pronchery rev64 v5.4s, v5.4s 23*4757b351SPierre Pronchery rev64 v6.4s, v6.4s 24*4757b351SPierre Pronchery ext v5.16b, v5.16b, v5.16b, #8 25*4757b351SPierre Pronchery ext v6.16b, v6.16b, v6.16b, #8 26*4757b351SPierre Pronchery adrp x8, .Tj 27*4757b351SPierre Pronchery add x8, x8, #:lo12:.Tj 28*4757b351SPierre Pronchery ldp s16, s17, [x8] 29*4757b351SPierre Pronchery 30*4757b351SPierre Pronchery.Loop: 31*4757b351SPierre Pronchery // load input 32*4757b351SPierre Pronchery ld1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x1], #64 33*4757b351SPierre Pronchery sub w2, w2, #1 34*4757b351SPierre Pronchery 35*4757b351SPierre Pronchery mov v18.16b, v5.16b 36*4757b351SPierre Pronchery mov v19.16b, v6.16b 37*4757b351SPierre Pronchery 38*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 39*4757b351SPierre Pronchery rev32 v0.16b, v0.16b 40*4757b351SPierre Pronchery rev32 v1.16b, v1.16b 41*4757b351SPierre Pronchery rev32 v2.16b, v2.16b 42*4757b351SPierre Pronchery rev32 v3.16b, v3.16b 43*4757b351SPierre Pronchery#endif 44*4757b351SPierre Pronchery 45*4757b351SPierre Pronchery ext v20.16b, v16.16b, v16.16b, #4 46*4757b351SPierre Pronchery // s4 = w7 | w8 | w9 | w10 47*4757b351SPierre Pronchery ext v4.16b, v1.16b, v2.16b, #12 48*4757b351SPierre Pronchery // vtmp1 = w3 | w4 | w5 | w6 49*4757b351SPierre Pronchery ext v22.16b, v0.16b, v1.16b, #12 50*4757b351SPierre Pronchery // vtmp2 = w10 | w11 | w12 | w13 51*4757b351SPierre Pronchery ext v23.16b, v2.16b, v3.16b, #8 52*4757b351SPierre Pronchery.inst 0xce63c004 //sm3partw1 v4.4s, v0.4s, v3.4s 53*4757b351SPierre Pronchery.inst 0xce76c6e4 //sm3partw2 v4.4s, v23.4s, v22.4s 54*4757b351SPierre Pronchery eor v22.16b, v0.16b, v1.16b 55*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 56*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 57*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 58*4757b351SPierre Pronchery.inst 0xce5682e5 //sm3tt1a v5.4s, v23.4s, v22.4s[0] 59*4757b351SPierre Pronchery.inst 0xce408ae6 //sm3tt2a v6.4s, v23.4s, v0.4s[0] 60*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 61*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 62*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 63*4757b351SPierre Pronchery.inst 0xce5692e5 //sm3tt1a v5.4s, v23.4s, v22.4s[1] 64*4757b351SPierre Pronchery.inst 0xce409ae6 //sm3tt2a v6.4s, v23.4s, v0.4s[1] 65*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 66*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 67*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 68*4757b351SPierre Pronchery.inst 0xce56a2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[2] 69*4757b351SPierre Pronchery.inst 0xce40aae6 //sm3tt2a v6.4s, v23.4s, v0.4s[2] 70*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 71*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 72*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 73*4757b351SPierre Pronchery.inst 0xce56b2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[3] 74*4757b351SPierre Pronchery.inst 0xce40bae6 //sm3tt2a v6.4s, v23.4s, v0.4s[3] 75*4757b351SPierre Pronchery // s4 = w7 | w8 | w9 | w10 76*4757b351SPierre Pronchery ext v0.16b, v2.16b, v3.16b, #12 77*4757b351SPierre Pronchery // vtmp1 = w3 | w4 | w5 | w6 78*4757b351SPierre Pronchery ext v22.16b, v1.16b, v2.16b, #12 79*4757b351SPierre Pronchery // vtmp2 = w10 | w11 | w12 | w13 80*4757b351SPierre Pronchery ext v23.16b, v3.16b, v4.16b, #8 81*4757b351SPierre Pronchery.inst 0xce64c020 //sm3partw1 v0.4s, v1.4s, v4.4s 82*4757b351SPierre Pronchery.inst 0xce76c6e0 //sm3partw2 v0.4s, v23.4s, v22.4s 83*4757b351SPierre Pronchery eor v22.16b, v1.16b, v2.16b 84*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 85*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 86*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 87*4757b351SPierre Pronchery.inst 0xce5682e5 //sm3tt1a v5.4s, v23.4s, v22.4s[0] 88*4757b351SPierre Pronchery.inst 0xce418ae6 //sm3tt2a v6.4s, v23.4s, v1.4s[0] 89*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 90*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 91*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 92*4757b351SPierre Pronchery.inst 0xce5692e5 //sm3tt1a v5.4s, v23.4s, v22.4s[1] 93*4757b351SPierre Pronchery.inst 0xce419ae6 //sm3tt2a v6.4s, v23.4s, v1.4s[1] 94*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 95*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 96*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 97*4757b351SPierre Pronchery.inst 0xce56a2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[2] 98*4757b351SPierre Pronchery.inst 0xce41aae6 //sm3tt2a v6.4s, v23.4s, v1.4s[2] 99*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 100*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 101*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 102*4757b351SPierre Pronchery.inst 0xce56b2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[3] 103*4757b351SPierre Pronchery.inst 0xce41bae6 //sm3tt2a v6.4s, v23.4s, v1.4s[3] 104*4757b351SPierre Pronchery // s4 = w7 | w8 | w9 | w10 105*4757b351SPierre Pronchery ext v1.16b, v3.16b, v4.16b, #12 106*4757b351SPierre Pronchery // vtmp1 = w3 | w4 | w5 | w6 107*4757b351SPierre Pronchery ext v22.16b, v2.16b, v3.16b, #12 108*4757b351SPierre Pronchery // vtmp2 = w10 | w11 | w12 | w13 109*4757b351SPierre Pronchery ext v23.16b, v4.16b, v0.16b, #8 110*4757b351SPierre Pronchery.inst 0xce60c041 //sm3partw1 v1.4s, v2.4s, v0.4s 111*4757b351SPierre Pronchery.inst 0xce76c6e1 //sm3partw2 v1.4s, v23.4s, v22.4s 112*4757b351SPierre Pronchery eor v22.16b, v2.16b, v3.16b 113*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 114*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 115*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 116*4757b351SPierre Pronchery.inst 0xce5682e5 //sm3tt1a v5.4s, v23.4s, v22.4s[0] 117*4757b351SPierre Pronchery.inst 0xce428ae6 //sm3tt2a v6.4s, v23.4s, v2.4s[0] 118*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 119*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 120*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 121*4757b351SPierre Pronchery.inst 0xce5692e5 //sm3tt1a v5.4s, v23.4s, v22.4s[1] 122*4757b351SPierre Pronchery.inst 0xce429ae6 //sm3tt2a v6.4s, v23.4s, v2.4s[1] 123*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 124*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 125*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 126*4757b351SPierre Pronchery.inst 0xce56a2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[2] 127*4757b351SPierre Pronchery.inst 0xce42aae6 //sm3tt2a v6.4s, v23.4s, v2.4s[2] 128*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 129*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 130*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 131*4757b351SPierre Pronchery.inst 0xce56b2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[3] 132*4757b351SPierre Pronchery.inst 0xce42bae6 //sm3tt2a v6.4s, v23.4s, v2.4s[3] 133*4757b351SPierre Pronchery // s4 = w7 | w8 | w9 | w10 134*4757b351SPierre Pronchery ext v2.16b, v4.16b, v0.16b, #12 135*4757b351SPierre Pronchery // vtmp1 = w3 | w4 | w5 | w6 136*4757b351SPierre Pronchery ext v22.16b, v3.16b, v4.16b, #12 137*4757b351SPierre Pronchery // vtmp2 = w10 | w11 | w12 | w13 138*4757b351SPierre Pronchery ext v23.16b, v0.16b, v1.16b, #8 139*4757b351SPierre Pronchery.inst 0xce61c062 //sm3partw1 v2.4s, v3.4s, v1.4s 140*4757b351SPierre Pronchery.inst 0xce76c6e2 //sm3partw2 v2.4s, v23.4s, v22.4s 141*4757b351SPierre Pronchery eor v22.16b, v3.16b, v4.16b 142*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 143*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 144*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 145*4757b351SPierre Pronchery.inst 0xce5682e5 //sm3tt1a v5.4s, v23.4s, v22.4s[0] 146*4757b351SPierre Pronchery.inst 0xce438ae6 //sm3tt2a v6.4s, v23.4s, v3.4s[0] 147*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 148*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 149*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 150*4757b351SPierre Pronchery.inst 0xce5692e5 //sm3tt1a v5.4s, v23.4s, v22.4s[1] 151*4757b351SPierre Pronchery.inst 0xce439ae6 //sm3tt2a v6.4s, v23.4s, v3.4s[1] 152*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 153*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 154*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 155*4757b351SPierre Pronchery.inst 0xce56a2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[2] 156*4757b351SPierre Pronchery.inst 0xce43aae6 //sm3tt2a v6.4s, v23.4s, v3.4s[2] 157*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 158*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 159*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 160*4757b351SPierre Pronchery.inst 0xce56b2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[3] 161*4757b351SPierre Pronchery.inst 0xce43bae6 //sm3tt2a v6.4s, v23.4s, v3.4s[3] 162*4757b351SPierre Pronchery ext v20.16b, v17.16b, v17.16b, #4 163*4757b351SPierre Pronchery // s4 = w7 | w8 | w9 | w10 164*4757b351SPierre Pronchery ext v3.16b, v0.16b, v1.16b, #12 165*4757b351SPierre Pronchery // vtmp1 = w3 | w4 | w5 | w6 166*4757b351SPierre Pronchery ext v22.16b, v4.16b, v0.16b, #12 167*4757b351SPierre Pronchery // vtmp2 = w10 | w11 | w12 | w13 168*4757b351SPierre Pronchery ext v23.16b, v1.16b, v2.16b, #8 169*4757b351SPierre Pronchery.inst 0xce62c083 //sm3partw1 v3.4s, v4.4s, v2.4s 170*4757b351SPierre Pronchery.inst 0xce76c6e3 //sm3partw2 v3.4s, v23.4s, v22.4s 171*4757b351SPierre Pronchery eor v22.16b, v4.16b, v0.16b 172*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 173*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 174*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 175*4757b351SPierre Pronchery.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] 176*4757b351SPierre Pronchery.inst 0xce448ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[0] 177*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 178*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 179*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 180*4757b351SPierre Pronchery.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] 181*4757b351SPierre Pronchery.inst 0xce449ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[1] 182*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 183*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 184*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 185*4757b351SPierre Pronchery.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] 186*4757b351SPierre Pronchery.inst 0xce44aee6 //sm3tt2b v6.4s, v23.4s, v4.4s[2] 187*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 188*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 189*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 190*4757b351SPierre Pronchery.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] 191*4757b351SPierre Pronchery.inst 0xce44bee6 //sm3tt2b v6.4s, v23.4s, v4.4s[3] 192*4757b351SPierre Pronchery // s4 = w7 | w8 | w9 | w10 193*4757b351SPierre Pronchery ext v4.16b, v1.16b, v2.16b, #12 194*4757b351SPierre Pronchery // vtmp1 = w3 | w4 | w5 | w6 195*4757b351SPierre Pronchery ext v22.16b, v0.16b, v1.16b, #12 196*4757b351SPierre Pronchery // vtmp2 = w10 | w11 | w12 | w13 197*4757b351SPierre Pronchery ext v23.16b, v2.16b, v3.16b, #8 198*4757b351SPierre Pronchery.inst 0xce63c004 //sm3partw1 v4.4s, v0.4s, v3.4s 199*4757b351SPierre Pronchery.inst 0xce76c6e4 //sm3partw2 v4.4s, v23.4s, v22.4s 200*4757b351SPierre Pronchery eor v22.16b, v0.16b, v1.16b 201*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 202*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 203*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 204*4757b351SPierre Pronchery.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] 205*4757b351SPierre Pronchery.inst 0xce408ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[0] 206*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 207*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 208*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 209*4757b351SPierre Pronchery.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] 210*4757b351SPierre Pronchery.inst 0xce409ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[1] 211*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 212*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 213*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 214*4757b351SPierre Pronchery.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] 215*4757b351SPierre Pronchery.inst 0xce40aee6 //sm3tt2b v6.4s, v23.4s, v0.4s[2] 216*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 217*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 218*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 219*4757b351SPierre Pronchery.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] 220*4757b351SPierre Pronchery.inst 0xce40bee6 //sm3tt2b v6.4s, v23.4s, v0.4s[3] 221*4757b351SPierre Pronchery // s4 = w7 | w8 | w9 | w10 222*4757b351SPierre Pronchery ext v0.16b, v2.16b, v3.16b, #12 223*4757b351SPierre Pronchery // vtmp1 = w3 | w4 | w5 | w6 224*4757b351SPierre Pronchery ext v22.16b, v1.16b, v2.16b, #12 225*4757b351SPierre Pronchery // vtmp2 = w10 | w11 | w12 | w13 226*4757b351SPierre Pronchery ext v23.16b, v3.16b, v4.16b, #8 227*4757b351SPierre Pronchery.inst 0xce64c020 //sm3partw1 v0.4s, v1.4s, v4.4s 228*4757b351SPierre Pronchery.inst 0xce76c6e0 //sm3partw2 v0.4s, v23.4s, v22.4s 229*4757b351SPierre Pronchery eor v22.16b, v1.16b, v2.16b 230*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 231*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 232*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 233*4757b351SPierre Pronchery.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] 234*4757b351SPierre Pronchery.inst 0xce418ee6 //sm3tt2b v6.4s, v23.4s, v1.4s[0] 235*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 236*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 237*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 238*4757b351SPierre Pronchery.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] 239*4757b351SPierre Pronchery.inst 0xce419ee6 //sm3tt2b v6.4s, v23.4s, v1.4s[1] 240*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 241*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 242*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 243*4757b351SPierre Pronchery.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] 244*4757b351SPierre Pronchery.inst 0xce41aee6 //sm3tt2b v6.4s, v23.4s, v1.4s[2] 245*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 246*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 247*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 248*4757b351SPierre Pronchery.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] 249*4757b351SPierre Pronchery.inst 0xce41bee6 //sm3tt2b v6.4s, v23.4s, v1.4s[3] 250*4757b351SPierre Pronchery // s4 = w7 | w8 | w9 | w10 251*4757b351SPierre Pronchery ext v1.16b, v3.16b, v4.16b, #12 252*4757b351SPierre Pronchery // vtmp1 = w3 | w4 | w5 | w6 253*4757b351SPierre Pronchery ext v22.16b, v2.16b, v3.16b, #12 254*4757b351SPierre Pronchery // vtmp2 = w10 | w11 | w12 | w13 255*4757b351SPierre Pronchery ext v23.16b, v4.16b, v0.16b, #8 256*4757b351SPierre Pronchery.inst 0xce60c041 //sm3partw1 v1.4s, v2.4s, v0.4s 257*4757b351SPierre Pronchery.inst 0xce76c6e1 //sm3partw2 v1.4s, v23.4s, v22.4s 258*4757b351SPierre Pronchery eor v22.16b, v2.16b, v3.16b 259*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 260*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 261*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 262*4757b351SPierre Pronchery.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] 263*4757b351SPierre Pronchery.inst 0xce428ee6 //sm3tt2b v6.4s, v23.4s, v2.4s[0] 264*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 265*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 266*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 267*4757b351SPierre Pronchery.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] 268*4757b351SPierre Pronchery.inst 0xce429ee6 //sm3tt2b v6.4s, v23.4s, v2.4s[1] 269*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 270*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 271*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 272*4757b351SPierre Pronchery.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] 273*4757b351SPierre Pronchery.inst 0xce42aee6 //sm3tt2b v6.4s, v23.4s, v2.4s[2] 274*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 275*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 276*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 277*4757b351SPierre Pronchery.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] 278*4757b351SPierre Pronchery.inst 0xce42bee6 //sm3tt2b v6.4s, v23.4s, v2.4s[3] 279*4757b351SPierre Pronchery // s4 = w7 | w8 | w9 | w10 280*4757b351SPierre Pronchery ext v2.16b, v4.16b, v0.16b, #12 281*4757b351SPierre Pronchery // vtmp1 = w3 | w4 | w5 | w6 282*4757b351SPierre Pronchery ext v22.16b, v3.16b, v4.16b, #12 283*4757b351SPierre Pronchery // vtmp2 = w10 | w11 | w12 | w13 284*4757b351SPierre Pronchery ext v23.16b, v0.16b, v1.16b, #8 285*4757b351SPierre Pronchery.inst 0xce61c062 //sm3partw1 v2.4s, v3.4s, v1.4s 286*4757b351SPierre Pronchery.inst 0xce76c6e2 //sm3partw2 v2.4s, v23.4s, v22.4s 287*4757b351SPierre Pronchery eor v22.16b, v3.16b, v4.16b 288*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 289*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 290*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 291*4757b351SPierre Pronchery.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] 292*4757b351SPierre Pronchery.inst 0xce438ee6 //sm3tt2b v6.4s, v23.4s, v3.4s[0] 293*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 294*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 295*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 296*4757b351SPierre Pronchery.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] 297*4757b351SPierre Pronchery.inst 0xce439ee6 //sm3tt2b v6.4s, v23.4s, v3.4s[1] 298*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 299*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 300*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 301*4757b351SPierre Pronchery.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] 302*4757b351SPierre Pronchery.inst 0xce43aee6 //sm3tt2b v6.4s, v23.4s, v3.4s[2] 303*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 304*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 305*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 306*4757b351SPierre Pronchery.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] 307*4757b351SPierre Pronchery.inst 0xce43bee6 //sm3tt2b v6.4s, v23.4s, v3.4s[3] 308*4757b351SPierre Pronchery // s4 = w7 | w8 | w9 | w10 309*4757b351SPierre Pronchery ext v3.16b, v0.16b, v1.16b, #12 310*4757b351SPierre Pronchery // vtmp1 = w3 | w4 | w5 | w6 311*4757b351SPierre Pronchery ext v22.16b, v4.16b, v0.16b, #12 312*4757b351SPierre Pronchery // vtmp2 = w10 | w11 | w12 | w13 313*4757b351SPierre Pronchery ext v23.16b, v1.16b, v2.16b, #8 314*4757b351SPierre Pronchery.inst 0xce62c083 //sm3partw1 v3.4s, v4.4s, v2.4s 315*4757b351SPierre Pronchery.inst 0xce76c6e3 //sm3partw2 v3.4s, v23.4s, v22.4s 316*4757b351SPierre Pronchery eor v22.16b, v4.16b, v0.16b 317*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 318*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 319*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 320*4757b351SPierre Pronchery.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] 321*4757b351SPierre Pronchery.inst 0xce448ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[0] 322*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 323*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 324*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 325*4757b351SPierre Pronchery.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] 326*4757b351SPierre Pronchery.inst 0xce449ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[1] 327*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 328*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 329*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 330*4757b351SPierre Pronchery.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] 331*4757b351SPierre Pronchery.inst 0xce44aee6 //sm3tt2b v6.4s, v23.4s, v4.4s[2] 332*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 333*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 334*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 335*4757b351SPierre Pronchery.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] 336*4757b351SPierre Pronchery.inst 0xce44bee6 //sm3tt2b v6.4s, v23.4s, v4.4s[3] 337*4757b351SPierre Pronchery // s4 = w7 | w8 | w9 | w10 338*4757b351SPierre Pronchery ext v4.16b, v1.16b, v2.16b, #12 339*4757b351SPierre Pronchery // vtmp1 = w3 | w4 | w5 | w6 340*4757b351SPierre Pronchery ext v22.16b, v0.16b, v1.16b, #12 341*4757b351SPierre Pronchery // vtmp2 = w10 | w11 | w12 | w13 342*4757b351SPierre Pronchery ext v23.16b, v2.16b, v3.16b, #8 343*4757b351SPierre Pronchery.inst 0xce63c004 //sm3partw1 v4.4s, v0.4s, v3.4s 344*4757b351SPierre Pronchery.inst 0xce76c6e4 //sm3partw2 v4.4s, v23.4s, v22.4s 345*4757b351SPierre Pronchery eor v22.16b, v0.16b, v1.16b 346*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 347*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 348*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 349*4757b351SPierre Pronchery.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] 350*4757b351SPierre Pronchery.inst 0xce408ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[0] 351*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 352*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 353*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 354*4757b351SPierre Pronchery.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] 355*4757b351SPierre Pronchery.inst 0xce409ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[1] 356*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 357*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 358*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 359*4757b351SPierre Pronchery.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] 360*4757b351SPierre Pronchery.inst 0xce40aee6 //sm3tt2b v6.4s, v23.4s, v0.4s[2] 361*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 362*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 363*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 364*4757b351SPierre Pronchery.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] 365*4757b351SPierre Pronchery.inst 0xce40bee6 //sm3tt2b v6.4s, v23.4s, v0.4s[3] 366*4757b351SPierre Pronchery // s4 = w7 | w8 | w9 | w10 367*4757b351SPierre Pronchery ext v0.16b, v2.16b, v3.16b, #12 368*4757b351SPierre Pronchery // vtmp1 = w3 | w4 | w5 | w6 369*4757b351SPierre Pronchery ext v22.16b, v1.16b, v2.16b, #12 370*4757b351SPierre Pronchery // vtmp2 = w10 | w11 | w12 | w13 371*4757b351SPierre Pronchery ext v23.16b, v3.16b, v4.16b, #8 372*4757b351SPierre Pronchery.inst 0xce64c020 //sm3partw1 v0.4s, v1.4s, v4.4s 373*4757b351SPierre Pronchery.inst 0xce76c6e0 //sm3partw2 v0.4s, v23.4s, v22.4s 374*4757b351SPierre Pronchery eor v22.16b, v1.16b, v2.16b 375*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 376*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 377*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 378*4757b351SPierre Pronchery.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] 379*4757b351SPierre Pronchery.inst 0xce418ee6 //sm3tt2b v6.4s, v23.4s, v1.4s[0] 380*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 381*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 382*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 383*4757b351SPierre Pronchery.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] 384*4757b351SPierre Pronchery.inst 0xce419ee6 //sm3tt2b v6.4s, v23.4s, v1.4s[1] 385*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 386*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 387*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 388*4757b351SPierre Pronchery.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] 389*4757b351SPierre Pronchery.inst 0xce41aee6 //sm3tt2b v6.4s, v23.4s, v1.4s[2] 390*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 391*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 392*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 393*4757b351SPierre Pronchery.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] 394*4757b351SPierre Pronchery.inst 0xce41bee6 //sm3tt2b v6.4s, v23.4s, v1.4s[3] 395*4757b351SPierre Pronchery // s4 = w7 | w8 | w9 | w10 396*4757b351SPierre Pronchery ext v1.16b, v3.16b, v4.16b, #12 397*4757b351SPierre Pronchery // vtmp1 = w3 | w4 | w5 | w6 398*4757b351SPierre Pronchery ext v22.16b, v2.16b, v3.16b, #12 399*4757b351SPierre Pronchery // vtmp2 = w10 | w11 | w12 | w13 400*4757b351SPierre Pronchery ext v23.16b, v4.16b, v0.16b, #8 401*4757b351SPierre Pronchery.inst 0xce60c041 //sm3partw1 v1.4s, v2.4s, v0.4s 402*4757b351SPierre Pronchery.inst 0xce76c6e1 //sm3partw2 v1.4s, v23.4s, v22.4s 403*4757b351SPierre Pronchery eor v22.16b, v2.16b, v3.16b 404*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 405*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 406*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 407*4757b351SPierre Pronchery.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] 408*4757b351SPierre Pronchery.inst 0xce428ee6 //sm3tt2b v6.4s, v23.4s, v2.4s[0] 409*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 410*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 411*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 412*4757b351SPierre Pronchery.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] 413*4757b351SPierre Pronchery.inst 0xce429ee6 //sm3tt2b v6.4s, v23.4s, v2.4s[1] 414*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 415*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 416*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 417*4757b351SPierre Pronchery.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] 418*4757b351SPierre Pronchery.inst 0xce42aee6 //sm3tt2b v6.4s, v23.4s, v2.4s[2] 419*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 420*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 421*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 422*4757b351SPierre Pronchery.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] 423*4757b351SPierre Pronchery.inst 0xce42bee6 //sm3tt2b v6.4s, v23.4s, v2.4s[3] 424*4757b351SPierre Pronchery eor v22.16b, v3.16b, v4.16b 425*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 426*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 427*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 428*4757b351SPierre Pronchery.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] 429*4757b351SPierre Pronchery.inst 0xce438ee6 //sm3tt2b v6.4s, v23.4s, v3.4s[0] 430*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 431*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 432*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 433*4757b351SPierre Pronchery.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] 434*4757b351SPierre Pronchery.inst 0xce439ee6 //sm3tt2b v6.4s, v23.4s, v3.4s[1] 435*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 436*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 437*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 438*4757b351SPierre Pronchery.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] 439*4757b351SPierre Pronchery.inst 0xce43aee6 //sm3tt2b v6.4s, v23.4s, v3.4s[2] 440*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 441*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 442*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 443*4757b351SPierre Pronchery.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] 444*4757b351SPierre Pronchery.inst 0xce43bee6 //sm3tt2b v6.4s, v23.4s, v3.4s[3] 445*4757b351SPierre Pronchery eor v22.16b, v4.16b, v0.16b 446*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 447*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 448*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 449*4757b351SPierre Pronchery.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] 450*4757b351SPierre Pronchery.inst 0xce448ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[0] 451*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 452*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 453*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 454*4757b351SPierre Pronchery.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] 455*4757b351SPierre Pronchery.inst 0xce449ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[1] 456*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 457*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 458*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 459*4757b351SPierre Pronchery.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] 460*4757b351SPierre Pronchery.inst 0xce44aee6 //sm3tt2b v6.4s, v23.4s, v4.4s[2] 461*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 462*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 463*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 464*4757b351SPierre Pronchery.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] 465*4757b351SPierre Pronchery.inst 0xce44bee6 //sm3tt2b v6.4s, v23.4s, v4.4s[3] 466*4757b351SPierre Pronchery eor v22.16b, v0.16b, v1.16b 467*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 468*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 469*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 470*4757b351SPierre Pronchery.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] 471*4757b351SPierre Pronchery.inst 0xce408ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[0] 472*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 473*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 474*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 475*4757b351SPierre Pronchery.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] 476*4757b351SPierre Pronchery.inst 0xce409ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[1] 477*4757b351SPierre Pronchery.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s 478*4757b351SPierre Pronchery shl v21.4s, v20.4s, #1 479*4757b351SPierre Pronchery sri v21.4s, v20.4s, #31 480*4757b351SPierre Pronchery.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] 481*4757b351SPierre Pronchery.inst 0xce40aee6 //sm3tt2b v6.4s, v23.4s, v0.4s[2] 482*4757b351SPierre Pronchery.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s 483*4757b351SPierre Pronchery shl v20.4s, v21.4s, #1 484*4757b351SPierre Pronchery sri v20.4s, v21.4s, #31 485*4757b351SPierre Pronchery.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] 486*4757b351SPierre Pronchery.inst 0xce40bee6 //sm3tt2b v6.4s, v23.4s, v0.4s[3] 487*4757b351SPierre Pronchery eor v5.16b, v5.16b, v18.16b 488*4757b351SPierre Pronchery eor v6.16b, v6.16b, v19.16b 489*4757b351SPierre Pronchery 490*4757b351SPierre Pronchery // any remained blocks? 491*4757b351SPierre Pronchery cbnz w2, .Loop 492*4757b351SPierre Pronchery 493*4757b351SPierre Pronchery // save state 494*4757b351SPierre Pronchery rev64 v5.4s, v5.4s 495*4757b351SPierre Pronchery rev64 v6.4s, v6.4s 496*4757b351SPierre Pronchery ext v5.16b, v5.16b, v5.16b, #8 497*4757b351SPierre Pronchery ext v6.16b, v6.16b, v6.16b, #8 498*4757b351SPierre Pronchery st1 {v5.4s,v6.4s}, [x0] 499*4757b351SPierre Pronchery ret 500*4757b351SPierre Pronchery.size ossl_hwsm3_block_data_order,.-ossl_hwsm3_block_data_order 501*4757b351SPierre Pronchery.section .rodata 502*4757b351SPierre Pronchery 503*4757b351SPierre Pronchery.type _sm3_consts,%object 504*4757b351SPierre Pronchery.align 3 505*4757b351SPierre Pronchery_sm3_consts: 506*4757b351SPierre Pronchery.Tj: 507*4757b351SPierre Pronchery.word 0x79cc4519, 0x9d8a7a87 508*4757b351SPierre Pronchery.size _sm3_consts,.-_sm3_consts 509*4757b351SPierre Pronchery.previous 510