1*9f69f52bSEric Biggers// SPDX-License-Identifier: GPL-2.0-or-later 2*9f69f52bSEric Biggers/* 3*9f69f52bSEric Biggers * sm3-neon-core.S - SM3 secure hash using NEON instructions 4*9f69f52bSEric Biggers * 5*9f69f52bSEric Biggers * Linux/arm64 port of the libgcrypt SM3 implementation for AArch64 6*9f69f52bSEric Biggers * 7*9f69f52bSEric Biggers * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi> 8*9f69f52bSEric Biggers * Copyright (c) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> 9*9f69f52bSEric Biggers */ 10*9f69f52bSEric Biggers 11*9f69f52bSEric Biggers#include <linux/linkage.h> 12*9f69f52bSEric Biggers#include <asm/assembler.h> 13*9f69f52bSEric Biggers 14*9f69f52bSEric Biggers/* Context structure */ 15*9f69f52bSEric Biggers 16*9f69f52bSEric Biggers#define state_h0 0 17*9f69f52bSEric Biggers#define state_h1 4 18*9f69f52bSEric Biggers#define state_h2 8 19*9f69f52bSEric Biggers#define state_h3 12 20*9f69f52bSEric Biggers#define state_h4 16 21*9f69f52bSEric Biggers#define state_h5 20 22*9f69f52bSEric Biggers#define state_h6 24 23*9f69f52bSEric Biggers#define state_h7 28 24*9f69f52bSEric Biggers 25*9f69f52bSEric Biggers/* Stack structure */ 26*9f69f52bSEric Biggers 27*9f69f52bSEric Biggers#define STACK_W_SIZE (32 * 2 * 3) 28*9f69f52bSEric Biggers 29*9f69f52bSEric Biggers#define STACK_W (0) 30*9f69f52bSEric Biggers#define STACK_SIZE (STACK_W + STACK_W_SIZE) 31*9f69f52bSEric Biggers 32*9f69f52bSEric Biggers/* Register macros */ 33*9f69f52bSEric Biggers 34*9f69f52bSEric Biggers#define RSTATE x0 35*9f69f52bSEric Biggers#define RDATA x1 36*9f69f52bSEric Biggers#define RNBLKS x2 37*9f69f52bSEric Biggers#define RKPTR x28 38*9f69f52bSEric Biggers#define RFRAME x29 39*9f69f52bSEric Biggers 40*9f69f52bSEric Biggers#define ra w3 41*9f69f52bSEric Biggers#define rb w4 42*9f69f52bSEric Biggers#define rc w5 43*9f69f52bSEric Biggers#define rd w6 44*9f69f52bSEric Biggers#define re w7 45*9f69f52bSEric Biggers#define rf w8 46*9f69f52bSEric Biggers#define rg w9 47*9f69f52bSEric Biggers#define rh w10 48*9f69f52bSEric Biggers 49*9f69f52bSEric Biggers#define t0 w11 50*9f69f52bSEric Biggers#define t1 w12 51*9f69f52bSEric Biggers#define t2 w13 52*9f69f52bSEric Biggers#define t3 w14 53*9f69f52bSEric Biggers#define t4 w15 54*9f69f52bSEric Biggers#define t5 w16 55*9f69f52bSEric Biggers#define t6 w17 56*9f69f52bSEric Biggers 57*9f69f52bSEric Biggers#define k_even w19 58*9f69f52bSEric Biggers#define k_odd w20 59*9f69f52bSEric Biggers 60*9f69f52bSEric Biggers#define addr0 x21 61*9f69f52bSEric Biggers#define addr1 x22 62*9f69f52bSEric Biggers 63*9f69f52bSEric Biggers#define s0 w23 64*9f69f52bSEric Biggers#define s1 w24 65*9f69f52bSEric Biggers#define s2 w25 66*9f69f52bSEric Biggers#define s3 w26 67*9f69f52bSEric Biggers 68*9f69f52bSEric Biggers#define W0 v0 69*9f69f52bSEric Biggers#define W1 v1 70*9f69f52bSEric Biggers#define W2 v2 71*9f69f52bSEric Biggers#define W3 v3 72*9f69f52bSEric Biggers#define W4 v4 73*9f69f52bSEric Biggers#define W5 v5 74*9f69f52bSEric Biggers 75*9f69f52bSEric Biggers#define XTMP0 v6 76*9f69f52bSEric Biggers#define XTMP1 v7 77*9f69f52bSEric Biggers#define XTMP2 v16 78*9f69f52bSEric Biggers#define XTMP3 v17 79*9f69f52bSEric Biggers#define XTMP4 v18 80*9f69f52bSEric Biggers#define XTMP5 v19 81*9f69f52bSEric Biggers#define XTMP6 v20 82*9f69f52bSEric Biggers 83*9f69f52bSEric Biggers/* Helper macros. */ 84*9f69f52bSEric Biggers 85*9f69f52bSEric Biggers#define _(...) /*_*/ 86*9f69f52bSEric Biggers 87*9f69f52bSEric Biggers#define clear_vec(x) \ 88*9f69f52bSEric Biggers movi x.8h, #0; 89*9f69f52bSEric Biggers 90*9f69f52bSEric Biggers#define rolw(o, a, n) \ 91*9f69f52bSEric Biggers ror o, a, #(32 - n); 92*9f69f52bSEric Biggers 93*9f69f52bSEric Biggers/* Round function macros. */ 94*9f69f52bSEric Biggers 95*9f69f52bSEric Biggers#define GG1_1(x, y, z, o, t) \ 96*9f69f52bSEric Biggers eor o, x, y; 97*9f69f52bSEric Biggers#define GG1_2(x, y, z, o, t) \ 98*9f69f52bSEric Biggers eor o, o, z; 99*9f69f52bSEric Biggers#define GG1_3(x, y, z, o, t) 100*9f69f52bSEric Biggers 101*9f69f52bSEric Biggers#define FF1_1(x, y, z, o, t) GG1_1(x, y, z, o, t) 102*9f69f52bSEric Biggers#define FF1_2(x, y, z, o, t) 103*9f69f52bSEric Biggers#define FF1_3(x, y, z, o, t) GG1_2(x, y, z, o, t) 104*9f69f52bSEric Biggers 105*9f69f52bSEric Biggers#define GG2_1(x, y, z, o, t) \ 106*9f69f52bSEric Biggers bic o, z, x; 107*9f69f52bSEric Biggers#define GG2_2(x, y, z, o, t) \ 108*9f69f52bSEric Biggers and t, y, x; 109*9f69f52bSEric Biggers#define GG2_3(x, y, z, o, t) \ 110*9f69f52bSEric Biggers eor o, o, t; 111*9f69f52bSEric Biggers 112*9f69f52bSEric Biggers#define FF2_1(x, y, z, o, t) \ 113*9f69f52bSEric Biggers eor o, x, y; 114*9f69f52bSEric Biggers#define FF2_2(x, y, z, o, t) \ 115*9f69f52bSEric Biggers and t, x, y; \ 116*9f69f52bSEric Biggers and o, o, z; 117*9f69f52bSEric Biggers#define FF2_3(x, y, z, o, t) \ 118*9f69f52bSEric Biggers eor o, o, t; 119*9f69f52bSEric Biggers 120*9f69f52bSEric Biggers#define R(i, a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \ 121*9f69f52bSEric Biggers K_LOAD(round); \ 122*9f69f52bSEric Biggers ldr t5, [sp, #(wtype##_W1_ADDR(round, widx))]; \ 123*9f69f52bSEric Biggers rolw(t0, a, 12); /* rol(a, 12) => t0 */ \ 124*9f69f52bSEric Biggers IOP(1, iop_param); \ 125*9f69f52bSEric Biggers FF##i##_1(a, b, c, t1, t2); \ 126*9f69f52bSEric Biggers ldr t6, [sp, #(wtype##_W1W2_ADDR(round, widx))]; \ 127*9f69f52bSEric Biggers add k, k, e; \ 128*9f69f52bSEric Biggers IOP(2, iop_param); \ 129*9f69f52bSEric Biggers GG##i##_1(e, f, g, t3, t4); \ 130*9f69f52bSEric Biggers FF##i##_2(a, b, c, t1, t2); \ 131*9f69f52bSEric Biggers IOP(3, iop_param); \ 132*9f69f52bSEric Biggers add k, k, t0; \ 133*9f69f52bSEric Biggers add h, h, t5; \ 134*9f69f52bSEric Biggers add d, d, t6; /* w1w2 + d => d */ \ 135*9f69f52bSEric Biggers IOP(4, iop_param); \ 136*9f69f52bSEric Biggers rolw(k, k, 7); /* rol (t0 + e + t), 7) => k */ \ 137*9f69f52bSEric Biggers GG##i##_2(e, f, g, t3, t4); \ 138*9f69f52bSEric Biggers add h, h, k; /* h + w1 + k => h */ \ 139*9f69f52bSEric Biggers IOP(5, iop_param); \ 140*9f69f52bSEric Biggers FF##i##_3(a, b, c, t1, t2); \ 141*9f69f52bSEric Biggers eor t0, t0, k; /* k ^ t0 => t0 */ \ 142*9f69f52bSEric Biggers GG##i##_3(e, f, g, t3, t4); \ 143*9f69f52bSEric Biggers add d, d, t1; /* FF(a,b,c) + d => d */ \ 144*9f69f52bSEric Biggers IOP(6, iop_param); \ 145*9f69f52bSEric Biggers add t3, t3, h; /* GG(e,f,g) + h => t3 */ \ 146*9f69f52bSEric Biggers rolw(b, b, 9); /* rol(b, 9) => b */ \ 147*9f69f52bSEric Biggers eor h, t3, t3, ror #(32-9); \ 148*9f69f52bSEric Biggers IOP(7, iop_param); \ 149*9f69f52bSEric Biggers add d, d, t0; /* t0 + d => d */ \ 150*9f69f52bSEric Biggers rolw(f, f, 19); /* rol(f, 19) => f */ \ 151*9f69f52bSEric Biggers IOP(8, iop_param); \ 152*9f69f52bSEric Biggers eor h, h, t3, ror #(32-17); /* P0(t3) => h */ 153*9f69f52bSEric Biggers 154*9f69f52bSEric Biggers#define R1(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \ 155*9f69f52bSEric Biggers R(1, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param) 156*9f69f52bSEric Biggers 157*9f69f52bSEric Biggers#define R2(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \ 158*9f69f52bSEric Biggers R(2, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param) 159*9f69f52bSEric Biggers 160*9f69f52bSEric Biggers#define KL(round) \ 161*9f69f52bSEric Biggers ldp k_even, k_odd, [RKPTR, #(4*(round))]; 162*9f69f52bSEric Biggers 163*9f69f52bSEric Biggers/* Input expansion macros. */ 164*9f69f52bSEric Biggers 165*9f69f52bSEric Biggers/* Byte-swapped input address. */ 166*9f69f52bSEric Biggers#define IW_W_ADDR(round, widx, offs) \ 167*9f69f52bSEric Biggers (STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4)) 168*9f69f52bSEric Biggers 169*9f69f52bSEric Biggers/* Expanded input address. */ 170*9f69f52bSEric Biggers#define XW_W_ADDR(round, widx, offs) \ 171*9f69f52bSEric Biggers (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4)) 172*9f69f52bSEric Biggers 173*9f69f52bSEric Biggers/* Rounds 1-12, byte-swapped input block addresses. */ 174*9f69f52bSEric Biggers#define IW_W1_ADDR(round, widx) IW_W_ADDR(round, widx, 32) 175*9f69f52bSEric Biggers#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 48) 176*9f69f52bSEric Biggers 177*9f69f52bSEric Biggers/* Rounds 1-12, expanded input block addresses. */ 178*9f69f52bSEric Biggers#define XW_W1_ADDR(round, widx) XW_W_ADDR(round, widx, 0) 179*9f69f52bSEric Biggers#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 16) 180*9f69f52bSEric Biggers 181*9f69f52bSEric Biggers/* Input block loading. 182*9f69f52bSEric Biggers * Interleaving within round function needed for in-order CPUs. */ 183*9f69f52bSEric Biggers#define LOAD_W_VEC_1_1() \ 184*9f69f52bSEric Biggers add addr0, sp, #IW_W1_ADDR(0, 0); 185*9f69f52bSEric Biggers#define LOAD_W_VEC_1_2() \ 186*9f69f52bSEric Biggers add addr1, sp, #IW_W1_ADDR(4, 0); 187*9f69f52bSEric Biggers#define LOAD_W_VEC_1_3() \ 188*9f69f52bSEric Biggers ld1 {W0.16b}, [RDATA], #16; 189*9f69f52bSEric Biggers#define LOAD_W_VEC_1_4() \ 190*9f69f52bSEric Biggers ld1 {W1.16b}, [RDATA], #16; 191*9f69f52bSEric Biggers#define LOAD_W_VEC_1_5() \ 192*9f69f52bSEric Biggers ld1 {W2.16b}, [RDATA], #16; 193*9f69f52bSEric Biggers#define LOAD_W_VEC_1_6() \ 194*9f69f52bSEric Biggers ld1 {W3.16b}, [RDATA], #16; 195*9f69f52bSEric Biggers#define LOAD_W_VEC_1_7() \ 196*9f69f52bSEric Biggers rev32 XTMP0.16b, W0.16b; 197*9f69f52bSEric Biggers#define LOAD_W_VEC_1_8() \ 198*9f69f52bSEric Biggers rev32 XTMP1.16b, W1.16b; 199*9f69f52bSEric Biggers#define LOAD_W_VEC_2_1() \ 200*9f69f52bSEric Biggers rev32 XTMP2.16b, W2.16b; 201*9f69f52bSEric Biggers#define LOAD_W_VEC_2_2() \ 202*9f69f52bSEric Biggers rev32 XTMP3.16b, W3.16b; 203*9f69f52bSEric Biggers#define LOAD_W_VEC_2_3() \ 204*9f69f52bSEric Biggers eor XTMP4.16b, XTMP1.16b, XTMP0.16b; 205*9f69f52bSEric Biggers#define LOAD_W_VEC_2_4() \ 206*9f69f52bSEric Biggers eor XTMP5.16b, XTMP2.16b, XTMP1.16b; 207*9f69f52bSEric Biggers#define LOAD_W_VEC_2_5() \ 208*9f69f52bSEric Biggers st1 {XTMP0.16b}, [addr0], #16; 209*9f69f52bSEric Biggers#define LOAD_W_VEC_2_6() \ 210*9f69f52bSEric Biggers st1 {XTMP4.16b}, [addr0]; \ 211*9f69f52bSEric Biggers add addr0, sp, #IW_W1_ADDR(8, 0); 212*9f69f52bSEric Biggers#define LOAD_W_VEC_2_7() \ 213*9f69f52bSEric Biggers eor XTMP6.16b, XTMP3.16b, XTMP2.16b; 214*9f69f52bSEric Biggers#define LOAD_W_VEC_2_8() \ 215*9f69f52bSEric Biggers ext W0.16b, XTMP0.16b, XTMP0.16b, #8; /* W0: xx, w0, xx, xx */ 216*9f69f52bSEric Biggers#define LOAD_W_VEC_3_1() \ 217*9f69f52bSEric Biggers mov W2.16b, XTMP1.16b; /* W2: xx, w6, w5, w4 */ 218*9f69f52bSEric Biggers#define LOAD_W_VEC_3_2() \ 219*9f69f52bSEric Biggers st1 {XTMP1.16b}, [addr1], #16; 220*9f69f52bSEric Biggers#define LOAD_W_VEC_3_3() \ 221*9f69f52bSEric Biggers st1 {XTMP5.16b}, [addr1]; \ 222*9f69f52bSEric Biggers ext W1.16b, XTMP0.16b, XTMP0.16b, #4; /* W1: xx, w3, w2, w1 */ 223*9f69f52bSEric Biggers#define LOAD_W_VEC_3_4() \ 224*9f69f52bSEric Biggers ext W3.16b, XTMP1.16b, XTMP2.16b, #12; /* W3: xx, w9, w8, w7 */ 225*9f69f52bSEric Biggers#define LOAD_W_VEC_3_5() \ 226*9f69f52bSEric Biggers ext W4.16b, XTMP2.16b, XTMP3.16b, #8; /* W4: xx, w12, w11, w10 */ 227*9f69f52bSEric Biggers#define LOAD_W_VEC_3_6() \ 228*9f69f52bSEric Biggers st1 {XTMP2.16b}, [addr0], #16; 229*9f69f52bSEric Biggers#define LOAD_W_VEC_3_7() \ 230*9f69f52bSEric Biggers st1 {XTMP6.16b}, [addr0]; 231*9f69f52bSEric Biggers#define LOAD_W_VEC_3_8() \ 232*9f69f52bSEric Biggers ext W5.16b, XTMP3.16b, XTMP3.16b, #4; /* W5: xx, w15, w14, w13 */ 233*9f69f52bSEric Biggers 234*9f69f52bSEric Biggers#define LOAD_W_VEC_1(iop_num, ...) \ 235*9f69f52bSEric Biggers LOAD_W_VEC_1_##iop_num() 236*9f69f52bSEric Biggers#define LOAD_W_VEC_2(iop_num, ...) \ 237*9f69f52bSEric Biggers LOAD_W_VEC_2_##iop_num() 238*9f69f52bSEric Biggers#define LOAD_W_VEC_3(iop_num, ...) \ 239*9f69f52bSEric Biggers LOAD_W_VEC_3_##iop_num() 240*9f69f52bSEric Biggers 241*9f69f52bSEric Biggers/* Message scheduling. Note: 3 words per vector register. 242*9f69f52bSEric Biggers * Interleaving within round function needed for in-order CPUs. */ 243*9f69f52bSEric Biggers#define SCHED_W_1_1(round, w0, w1, w2, w3, w4, w5) \ 244*9f69f52bSEric Biggers /* Load (w[i - 16]) => XTMP0 */ \ 245*9f69f52bSEric Biggers /* Load (w[i - 13]) => XTMP5 */ \ 246*9f69f52bSEric Biggers ext XTMP0.16b, w0.16b, w0.16b, #12; /* XTMP0: w0, xx, xx, xx */ 247*9f69f52bSEric Biggers#define SCHED_W_1_2(round, w0, w1, w2, w3, w4, w5) \ 248*9f69f52bSEric Biggers ext XTMP5.16b, w1.16b, w1.16b, #12; 249*9f69f52bSEric Biggers#define SCHED_W_1_3(round, w0, w1, w2, w3, w4, w5) \ 250*9f69f52bSEric Biggers ext XTMP0.16b, XTMP0.16b, w1.16b, #12; /* XTMP0: xx, w2, w1, w0 */ 251*9f69f52bSEric Biggers#define SCHED_W_1_4(round, w0, w1, w2, w3, w4, w5) \ 252*9f69f52bSEric Biggers ext XTMP5.16b, XTMP5.16b, w2.16b, #12; 253*9f69f52bSEric Biggers#define SCHED_W_1_5(round, w0, w1, w2, w3, w4, w5) \ 254*9f69f52bSEric Biggers /* w[i - 9] == w3 */ \ 255*9f69f52bSEric Biggers /* W3 ^ XTMP0 => XTMP0 */ \ 256*9f69f52bSEric Biggers eor XTMP0.16b, XTMP0.16b, w3.16b; 257*9f69f52bSEric Biggers#define SCHED_W_1_6(round, w0, w1, w2, w3, w4, w5) \ 258*9f69f52bSEric Biggers /* w[i - 3] == w5 */ \ 259*9f69f52bSEric Biggers /* rol(XMM5, 15) ^ XTMP0 => XTMP0 */ \ 260*9f69f52bSEric Biggers /* rol(XTMP5, 7) => XTMP1 */ \ 261*9f69f52bSEric Biggers add addr0, sp, #XW_W1_ADDR((round), 0); \ 262*9f69f52bSEric Biggers shl XTMP2.4s, w5.4s, #15; 263*9f69f52bSEric Biggers#define SCHED_W_1_7(round, w0, w1, w2, w3, w4, w5) \ 264*9f69f52bSEric Biggers shl XTMP1.4s, XTMP5.4s, #7; 265*9f69f52bSEric Biggers#define SCHED_W_1_8(round, w0, w1, w2, w3, w4, w5) \ 266*9f69f52bSEric Biggers sri XTMP2.4s, w5.4s, #(32-15); 267*9f69f52bSEric Biggers#define SCHED_W_2_1(round, w0, w1, w2, w3, w4, w5) \ 268*9f69f52bSEric Biggers sri XTMP1.4s, XTMP5.4s, #(32-7); 269*9f69f52bSEric Biggers#define SCHED_W_2_2(round, w0, w1, w2, w3, w4, w5) \ 270*9f69f52bSEric Biggers eor XTMP0.16b, XTMP0.16b, XTMP2.16b; 271*9f69f52bSEric Biggers#define SCHED_W_2_3(round, w0, w1, w2, w3, w4, w5) \ 272*9f69f52bSEric Biggers /* w[i - 6] == W4 */ \ 273*9f69f52bSEric Biggers /* W4 ^ XTMP1 => XTMP1 */ \ 274*9f69f52bSEric Biggers eor XTMP1.16b, XTMP1.16b, w4.16b; 275*9f69f52bSEric Biggers#define SCHED_W_2_4(round, w0, w1, w2, w3, w4, w5) \ 276*9f69f52bSEric Biggers /* P1(XTMP0) ^ XTMP1 => W0 */ \ 277*9f69f52bSEric Biggers shl XTMP3.4s, XTMP0.4s, #15; 278*9f69f52bSEric Biggers#define SCHED_W_2_5(round, w0, w1, w2, w3, w4, w5) \ 279*9f69f52bSEric Biggers shl XTMP4.4s, XTMP0.4s, #23; 280*9f69f52bSEric Biggers#define SCHED_W_2_6(round, w0, w1, w2, w3, w4, w5) \ 281*9f69f52bSEric Biggers eor w0.16b, XTMP1.16b, XTMP0.16b; 282*9f69f52bSEric Biggers#define SCHED_W_2_7(round, w0, w1, w2, w3, w4, w5) \ 283*9f69f52bSEric Biggers sri XTMP3.4s, XTMP0.4s, #(32-15); 284*9f69f52bSEric Biggers#define SCHED_W_2_8(round, w0, w1, w2, w3, w4, w5) \ 285*9f69f52bSEric Biggers sri XTMP4.4s, XTMP0.4s, #(32-23); 286*9f69f52bSEric Biggers#define SCHED_W_3_1(round, w0, w1, w2, w3, w4, w5) \ 287*9f69f52bSEric Biggers eor w0.16b, w0.16b, XTMP3.16b; 288*9f69f52bSEric Biggers#define SCHED_W_3_2(round, w0, w1, w2, w3, w4, w5) \ 289*9f69f52bSEric Biggers /* Load (w[i - 3]) => XTMP2 */ \ 290*9f69f52bSEric Biggers ext XTMP2.16b, w4.16b, w4.16b, #12; 291*9f69f52bSEric Biggers#define SCHED_W_3_3(round, w0, w1, w2, w3, w4, w5) \ 292*9f69f52bSEric Biggers eor w0.16b, w0.16b, XTMP4.16b; 293*9f69f52bSEric Biggers#define SCHED_W_3_4(round, w0, w1, w2, w3, w4, w5) \ 294*9f69f52bSEric Biggers ext XTMP2.16b, XTMP2.16b, w5.16b, #12; 295*9f69f52bSEric Biggers#define SCHED_W_3_5(round, w0, w1, w2, w3, w4, w5) \ 296*9f69f52bSEric Biggers /* W1 ^ W2 => XTMP3 */ \ 297*9f69f52bSEric Biggers eor XTMP3.16b, XTMP2.16b, w0.16b; 298*9f69f52bSEric Biggers#define SCHED_W_3_6(round, w0, w1, w2, w3, w4, w5) 299*9f69f52bSEric Biggers#define SCHED_W_3_7(round, w0, w1, w2, w3, w4, w5) \ 300*9f69f52bSEric Biggers st1 {XTMP2.16b-XTMP3.16b}, [addr0]; 301*9f69f52bSEric Biggers#define SCHED_W_3_8(round, w0, w1, w2, w3, w4, w5) 302*9f69f52bSEric Biggers 303*9f69f52bSEric Biggers#define SCHED_W_W0W1W2W3W4W5_1(iop_num, round) \ 304*9f69f52bSEric Biggers SCHED_W_1_##iop_num(round, W0, W1, W2, W3, W4, W5) 305*9f69f52bSEric Biggers#define SCHED_W_W0W1W2W3W4W5_2(iop_num, round) \ 306*9f69f52bSEric Biggers SCHED_W_2_##iop_num(round, W0, W1, W2, W3, W4, W5) 307*9f69f52bSEric Biggers#define SCHED_W_W0W1W2W3W4W5_3(iop_num, round) \ 308*9f69f52bSEric Biggers SCHED_W_3_##iop_num(round, W0, W1, W2, W3, W4, W5) 309*9f69f52bSEric Biggers 310*9f69f52bSEric Biggers#define SCHED_W_W1W2W3W4W5W0_1(iop_num, round) \ 311*9f69f52bSEric Biggers SCHED_W_1_##iop_num(round, W1, W2, W3, W4, W5, W0) 312*9f69f52bSEric Biggers#define SCHED_W_W1W2W3W4W5W0_2(iop_num, round) \ 313*9f69f52bSEric Biggers SCHED_W_2_##iop_num(round, W1, W2, W3, W4, W5, W0) 314*9f69f52bSEric Biggers#define SCHED_W_W1W2W3W4W5W0_3(iop_num, round) \ 315*9f69f52bSEric Biggers SCHED_W_3_##iop_num(round, W1, W2, W3, W4, W5, W0) 316*9f69f52bSEric Biggers 317*9f69f52bSEric Biggers#define SCHED_W_W2W3W4W5W0W1_1(iop_num, round) \ 318*9f69f52bSEric Biggers SCHED_W_1_##iop_num(round, W2, W3, W4, W5, W0, W1) 319*9f69f52bSEric Biggers#define SCHED_W_W2W3W4W5W0W1_2(iop_num, round) \ 320*9f69f52bSEric Biggers SCHED_W_2_##iop_num(round, W2, W3, W4, W5, W0, W1) 321*9f69f52bSEric Biggers#define SCHED_W_W2W3W4W5W0W1_3(iop_num, round) \ 322*9f69f52bSEric Biggers SCHED_W_3_##iop_num(round, W2, W3, W4, W5, W0, W1) 323*9f69f52bSEric Biggers 324*9f69f52bSEric Biggers#define SCHED_W_W3W4W5W0W1W2_1(iop_num, round) \ 325*9f69f52bSEric Biggers SCHED_W_1_##iop_num(round, W3, W4, W5, W0, W1, W2) 326*9f69f52bSEric Biggers#define SCHED_W_W3W4W5W0W1W2_2(iop_num, round) \ 327*9f69f52bSEric Biggers SCHED_W_2_##iop_num(round, W3, W4, W5, W0, W1, W2) 328*9f69f52bSEric Biggers#define SCHED_W_W3W4W5W0W1W2_3(iop_num, round) \ 329*9f69f52bSEric Biggers SCHED_W_3_##iop_num(round, W3, W4, W5, W0, W1, W2) 330*9f69f52bSEric Biggers 331*9f69f52bSEric Biggers#define SCHED_W_W4W5W0W1W2W3_1(iop_num, round) \ 332*9f69f52bSEric Biggers SCHED_W_1_##iop_num(round, W4, W5, W0, W1, W2, W3) 333*9f69f52bSEric Biggers#define SCHED_W_W4W5W0W1W2W3_2(iop_num, round) \ 334*9f69f52bSEric Biggers SCHED_W_2_##iop_num(round, W4, W5, W0, W1, W2, W3) 335*9f69f52bSEric Biggers#define SCHED_W_W4W5W0W1W2W3_3(iop_num, round) \ 336*9f69f52bSEric Biggers SCHED_W_3_##iop_num(round, W4, W5, W0, W1, W2, W3) 337*9f69f52bSEric Biggers 338*9f69f52bSEric Biggers#define SCHED_W_W5W0W1W2W3W4_1(iop_num, round) \ 339*9f69f52bSEric Biggers SCHED_W_1_##iop_num(round, W5, W0, W1, W2, W3, W4) 340*9f69f52bSEric Biggers#define SCHED_W_W5W0W1W2W3W4_2(iop_num, round) \ 341*9f69f52bSEric Biggers SCHED_W_2_##iop_num(round, W5, W0, W1, W2, W3, W4) 342*9f69f52bSEric Biggers#define SCHED_W_W5W0W1W2W3W4_3(iop_num, round) \ 343*9f69f52bSEric Biggers SCHED_W_3_##iop_num(round, W5, W0, W1, W2, W3, W4) 344*9f69f52bSEric Biggers 345*9f69f52bSEric Biggers 346*9f69f52bSEric Biggers /* 347*9f69f52bSEric Biggers * Transform nblocks*64 bytes (nblocks*16 32-bit words) at 'data'. 348*9f69f52bSEric Biggers * 349*9f69f52bSEric Biggers * void sm3_neon_transform(struct sm3_block_state *state, 350*9f69f52bSEric Biggers * const u8 *data, size_t nblocks) 351*9f69f52bSEric Biggers */ 352*9f69f52bSEric Biggers .text 353*9f69f52bSEric Biggers.align 3 354*9f69f52bSEric BiggersSYM_FUNC_START(sm3_neon_transform) 355*9f69f52bSEric Biggers ldp ra, rb, [RSTATE, #0] 356*9f69f52bSEric Biggers ldp rc, rd, [RSTATE, #8] 357*9f69f52bSEric Biggers ldp re, rf, [RSTATE, #16] 358*9f69f52bSEric Biggers ldp rg, rh, [RSTATE, #24] 359*9f69f52bSEric Biggers 360*9f69f52bSEric Biggers stp x28, x29, [sp, #-16]! 361*9f69f52bSEric Biggers stp x19, x20, [sp, #-16]! 362*9f69f52bSEric Biggers stp x21, x22, [sp, #-16]! 363*9f69f52bSEric Biggers stp x23, x24, [sp, #-16]! 364*9f69f52bSEric Biggers stp x25, x26, [sp, #-16]! 365*9f69f52bSEric Biggers mov RFRAME, sp 366*9f69f52bSEric Biggers 367*9f69f52bSEric Biggers sub addr0, sp, #STACK_SIZE 368*9f69f52bSEric Biggers adr_l RKPTR, .LKtable 369*9f69f52bSEric Biggers and sp, addr0, #(~63) 370*9f69f52bSEric Biggers 371*9f69f52bSEric Biggers /* Preload first block. */ 372*9f69f52bSEric Biggers LOAD_W_VEC_1(1, 0) 373*9f69f52bSEric Biggers LOAD_W_VEC_1(2, 0) 374*9f69f52bSEric Biggers LOAD_W_VEC_1(3, 0) 375*9f69f52bSEric Biggers LOAD_W_VEC_1(4, 0) 376*9f69f52bSEric Biggers LOAD_W_VEC_1(5, 0) 377*9f69f52bSEric Biggers LOAD_W_VEC_1(6, 0) 378*9f69f52bSEric Biggers LOAD_W_VEC_1(7, 0) 379*9f69f52bSEric Biggers LOAD_W_VEC_1(8, 0) 380*9f69f52bSEric Biggers LOAD_W_VEC_2(1, 0) 381*9f69f52bSEric Biggers LOAD_W_VEC_2(2, 0) 382*9f69f52bSEric Biggers LOAD_W_VEC_2(3, 0) 383*9f69f52bSEric Biggers LOAD_W_VEC_2(4, 0) 384*9f69f52bSEric Biggers LOAD_W_VEC_2(5, 0) 385*9f69f52bSEric Biggers LOAD_W_VEC_2(6, 0) 386*9f69f52bSEric Biggers LOAD_W_VEC_2(7, 0) 387*9f69f52bSEric Biggers LOAD_W_VEC_2(8, 0) 388*9f69f52bSEric Biggers LOAD_W_VEC_3(1, 0) 389*9f69f52bSEric Biggers LOAD_W_VEC_3(2, 0) 390*9f69f52bSEric Biggers LOAD_W_VEC_3(3, 0) 391*9f69f52bSEric Biggers LOAD_W_VEC_3(4, 0) 392*9f69f52bSEric Biggers LOAD_W_VEC_3(5, 0) 393*9f69f52bSEric Biggers LOAD_W_VEC_3(6, 0) 394*9f69f52bSEric Biggers LOAD_W_VEC_3(7, 0) 395*9f69f52bSEric Biggers LOAD_W_VEC_3(8, 0) 396*9f69f52bSEric Biggers 397*9f69f52bSEric Biggers.balign 16 398*9f69f52bSEric Biggers.Loop: 399*9f69f52bSEric Biggers /* Transform 0-3 */ 400*9f69f52bSEric Biggers R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 0, 0, IW, _, 0) 401*9f69f52bSEric Biggers R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 1, 1, IW, _, 0) 402*9f69f52bSEric Biggers R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 2, 2, IW, _, 0) 403*9f69f52bSEric Biggers R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 3, 3, IW, _, 0) 404*9f69f52bSEric Biggers 405*9f69f52bSEric Biggers /* Transform 4-7 + Precalc 12-14 */ 406*9f69f52bSEric Biggers R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 4, 0, IW, _, 0) 407*9f69f52bSEric Biggers R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 5, 1, IW, _, 0) 408*9f69f52bSEric Biggers R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 6, 2, IW, SCHED_W_W0W1W2W3W4W5_1, 12) 409*9f69f52bSEric Biggers R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 7, 3, IW, SCHED_W_W0W1W2W3W4W5_2, 12) 410*9f69f52bSEric Biggers 411*9f69f52bSEric Biggers /* Transform 8-11 + Precalc 12-17 */ 412*9f69f52bSEric Biggers R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 8, 0, IW, SCHED_W_W0W1W2W3W4W5_3, 12) 413*9f69f52bSEric Biggers R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 9, 1, IW, SCHED_W_W1W2W3W4W5W0_1, 15) 414*9f69f52bSEric Biggers R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 10, 2, IW, SCHED_W_W1W2W3W4W5W0_2, 15) 415*9f69f52bSEric Biggers R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 11, 3, IW, SCHED_W_W1W2W3W4W5W0_3, 15) 416*9f69f52bSEric Biggers 417*9f69f52bSEric Biggers /* Transform 12-14 + Precalc 18-20 */ 418*9f69f52bSEric Biggers R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 12, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 18) 419*9f69f52bSEric Biggers R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 13, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 18) 420*9f69f52bSEric Biggers R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 14, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 18) 421*9f69f52bSEric Biggers 422*9f69f52bSEric Biggers /* Transform 15-17 + Precalc 21-23 */ 423*9f69f52bSEric Biggers R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 15, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 21) 424*9f69f52bSEric Biggers R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 16, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 21) 425*9f69f52bSEric Biggers R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 17, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 21) 426*9f69f52bSEric Biggers 427*9f69f52bSEric Biggers /* Transform 18-20 + Precalc 24-26 */ 428*9f69f52bSEric Biggers R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 18, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 24) 429*9f69f52bSEric Biggers R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 19, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 24) 430*9f69f52bSEric Biggers R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 20, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 24) 431*9f69f52bSEric Biggers 432*9f69f52bSEric Biggers /* Transform 21-23 + Precalc 27-29 */ 433*9f69f52bSEric Biggers R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 21, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 27) 434*9f69f52bSEric Biggers R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 22, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 27) 435*9f69f52bSEric Biggers R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 23, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 27) 436*9f69f52bSEric Biggers 437*9f69f52bSEric Biggers /* Transform 24-26 + Precalc 30-32 */ 438*9f69f52bSEric Biggers R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 24, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 30) 439*9f69f52bSEric Biggers R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 25, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 30) 440*9f69f52bSEric Biggers R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 26, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 30) 441*9f69f52bSEric Biggers 442*9f69f52bSEric Biggers /* Transform 27-29 + Precalc 33-35 */ 443*9f69f52bSEric Biggers R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 27, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 33) 444*9f69f52bSEric Biggers R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 28, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 33) 445*9f69f52bSEric Biggers R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 29, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 33) 446*9f69f52bSEric Biggers 447*9f69f52bSEric Biggers /* Transform 30-32 + Precalc 36-38 */ 448*9f69f52bSEric Biggers R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 30, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 36) 449*9f69f52bSEric Biggers R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 31, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 36) 450*9f69f52bSEric Biggers R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 32, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 36) 451*9f69f52bSEric Biggers 452*9f69f52bSEric Biggers /* Transform 33-35 + Precalc 39-41 */ 453*9f69f52bSEric Biggers R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 33, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 39) 454*9f69f52bSEric Biggers R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 34, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 39) 455*9f69f52bSEric Biggers R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 35, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 39) 456*9f69f52bSEric Biggers 457*9f69f52bSEric Biggers /* Transform 36-38 + Precalc 42-44 */ 458*9f69f52bSEric Biggers R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 36, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 42) 459*9f69f52bSEric Biggers R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 37, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 42) 460*9f69f52bSEric Biggers R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 38, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 42) 461*9f69f52bSEric Biggers 462*9f69f52bSEric Biggers /* Transform 39-41 + Precalc 45-47 */ 463*9f69f52bSEric Biggers R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 39, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 45) 464*9f69f52bSEric Biggers R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 40, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 45) 465*9f69f52bSEric Biggers R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 41, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 45) 466*9f69f52bSEric Biggers 467*9f69f52bSEric Biggers /* Transform 42-44 + Precalc 48-50 */ 468*9f69f52bSEric Biggers R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 42, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 48) 469*9f69f52bSEric Biggers R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 43, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 48) 470*9f69f52bSEric Biggers R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 44, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 48) 471*9f69f52bSEric Biggers 472*9f69f52bSEric Biggers /* Transform 45-47 + Precalc 51-53 */ 473*9f69f52bSEric Biggers R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 45, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 51) 474*9f69f52bSEric Biggers R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 46, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 51) 475*9f69f52bSEric Biggers R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 47, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 51) 476*9f69f52bSEric Biggers 477*9f69f52bSEric Biggers /* Transform 48-50 + Precalc 54-56 */ 478*9f69f52bSEric Biggers R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 48, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 54) 479*9f69f52bSEric Biggers R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 49, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 54) 480*9f69f52bSEric Biggers R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 50, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 54) 481*9f69f52bSEric Biggers 482*9f69f52bSEric Biggers /* Transform 51-53 + Precalc 57-59 */ 483*9f69f52bSEric Biggers R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 51, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 57) 484*9f69f52bSEric Biggers R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 52, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 57) 485*9f69f52bSEric Biggers R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 53, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 57) 486*9f69f52bSEric Biggers 487*9f69f52bSEric Biggers /* Transform 54-56 + Precalc 60-62 */ 488*9f69f52bSEric Biggers R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 54, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 60) 489*9f69f52bSEric Biggers R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 55, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 60) 490*9f69f52bSEric Biggers R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 56, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 60) 491*9f69f52bSEric Biggers 492*9f69f52bSEric Biggers /* Transform 57-59 + Precalc 63 */ 493*9f69f52bSEric Biggers R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 57, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 63) 494*9f69f52bSEric Biggers R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 58, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 63) 495*9f69f52bSEric Biggers R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 59, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 63) 496*9f69f52bSEric Biggers 497*9f69f52bSEric Biggers /* Transform 60 */ 498*9f69f52bSEric Biggers R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 60, 0, XW, _, _) 499*9f69f52bSEric Biggers subs RNBLKS, RNBLKS, #1 500*9f69f52bSEric Biggers b.eq .Lend 501*9f69f52bSEric Biggers 502*9f69f52bSEric Biggers /* Transform 61-63 + Preload next block */ 503*9f69f52bSEric Biggers R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 61, 1, XW, LOAD_W_VEC_1, _) 504*9f69f52bSEric Biggers ldp s0, s1, [RSTATE, #0] 505*9f69f52bSEric Biggers R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, LOAD_W_VEC_2, _) 506*9f69f52bSEric Biggers ldp s2, s3, [RSTATE, #8] 507*9f69f52bSEric Biggers R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 63, 0, XW, LOAD_W_VEC_3, _) 508*9f69f52bSEric Biggers 509*9f69f52bSEric Biggers /* Update the chaining variables. */ 510*9f69f52bSEric Biggers eor ra, ra, s0 511*9f69f52bSEric Biggers eor rb, rb, s1 512*9f69f52bSEric Biggers ldp s0, s1, [RSTATE, #16] 513*9f69f52bSEric Biggers eor rc, rc, s2 514*9f69f52bSEric Biggers ldp k_even, k_odd, [RSTATE, #24] 515*9f69f52bSEric Biggers eor rd, rd, s3 516*9f69f52bSEric Biggers eor re, re, s0 517*9f69f52bSEric Biggers stp ra, rb, [RSTATE, #0] 518*9f69f52bSEric Biggers eor rf, rf, s1 519*9f69f52bSEric Biggers stp rc, rd, [RSTATE, #8] 520*9f69f52bSEric Biggers eor rg, rg, k_even 521*9f69f52bSEric Biggers stp re, rf, [RSTATE, #16] 522*9f69f52bSEric Biggers eor rh, rh, k_odd 523*9f69f52bSEric Biggers stp rg, rh, [RSTATE, #24] 524*9f69f52bSEric Biggers b .Loop 525*9f69f52bSEric Biggers 526*9f69f52bSEric Biggers.Lend: 527*9f69f52bSEric Biggers /* Transform 61-63 */ 528*9f69f52bSEric Biggers R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 61, 1, XW, _, _) 529*9f69f52bSEric Biggers ldp s0, s1, [RSTATE, #0] 530*9f69f52bSEric Biggers R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, _, _) 531*9f69f52bSEric Biggers ldp s2, s3, [RSTATE, #8] 532*9f69f52bSEric Biggers R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 63, 0, XW, _, _) 533*9f69f52bSEric Biggers 534*9f69f52bSEric Biggers /* Update the chaining variables. */ 535*9f69f52bSEric Biggers eor ra, ra, s0 536*9f69f52bSEric Biggers clear_vec(W0) 537*9f69f52bSEric Biggers eor rb, rb, s1 538*9f69f52bSEric Biggers clear_vec(W1) 539*9f69f52bSEric Biggers ldp s0, s1, [RSTATE, #16] 540*9f69f52bSEric Biggers clear_vec(W2) 541*9f69f52bSEric Biggers eor rc, rc, s2 542*9f69f52bSEric Biggers clear_vec(W3) 543*9f69f52bSEric Biggers ldp k_even, k_odd, [RSTATE, #24] 544*9f69f52bSEric Biggers clear_vec(W4) 545*9f69f52bSEric Biggers eor rd, rd, s3 546*9f69f52bSEric Biggers clear_vec(W5) 547*9f69f52bSEric Biggers eor re, re, s0 548*9f69f52bSEric Biggers clear_vec(XTMP0) 549*9f69f52bSEric Biggers stp ra, rb, [RSTATE, #0] 550*9f69f52bSEric Biggers clear_vec(XTMP1) 551*9f69f52bSEric Biggers eor rf, rf, s1 552*9f69f52bSEric Biggers clear_vec(XTMP2) 553*9f69f52bSEric Biggers stp rc, rd, [RSTATE, #8] 554*9f69f52bSEric Biggers clear_vec(XTMP3) 555*9f69f52bSEric Biggers eor rg, rg, k_even 556*9f69f52bSEric Biggers clear_vec(XTMP4) 557*9f69f52bSEric Biggers stp re, rf, [RSTATE, #16] 558*9f69f52bSEric Biggers clear_vec(XTMP5) 559*9f69f52bSEric Biggers eor rh, rh, k_odd 560*9f69f52bSEric Biggers clear_vec(XTMP6) 561*9f69f52bSEric Biggers stp rg, rh, [RSTATE, #24] 562*9f69f52bSEric Biggers 563*9f69f52bSEric Biggers /* Clear message expansion area */ 564*9f69f52bSEric Biggers add addr0, sp, #STACK_W 565*9f69f52bSEric Biggers st1 {W0.16b-W3.16b}, [addr0], #64 566*9f69f52bSEric Biggers st1 {W0.16b-W3.16b}, [addr0], #64 567*9f69f52bSEric Biggers st1 {W0.16b-W3.16b}, [addr0] 568*9f69f52bSEric Biggers 569*9f69f52bSEric Biggers mov sp, RFRAME 570*9f69f52bSEric Biggers 571*9f69f52bSEric Biggers ldp x25, x26, [sp], #16 572*9f69f52bSEric Biggers ldp x23, x24, [sp], #16 573*9f69f52bSEric Biggers ldp x21, x22, [sp], #16 574*9f69f52bSEric Biggers ldp x19, x20, [sp], #16 575*9f69f52bSEric Biggers ldp x28, x29, [sp], #16 576*9f69f52bSEric Biggers 577*9f69f52bSEric Biggers ret 578*9f69f52bSEric BiggersSYM_FUNC_END(sm3_neon_transform) 579*9f69f52bSEric Biggers 580*9f69f52bSEric Biggers 581*9f69f52bSEric Biggers .section ".rodata", "a" 582*9f69f52bSEric Biggers 583*9f69f52bSEric Biggers .align 4 584*9f69f52bSEric Biggers.LKtable: 585*9f69f52bSEric Biggers .long 0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb 586*9f69f52bSEric Biggers .long 0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc 587*9f69f52bSEric Biggers .long 0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce 588*9f69f52bSEric Biggers .long 0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6 589*9f69f52bSEric Biggers .long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c 590*9f69f52bSEric Biggers .long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce 591*9f69f52bSEric Biggers .long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec 592*9f69f52bSEric Biggers .long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5 593*9f69f52bSEric Biggers .long 0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53 594*9f69f52bSEric Biggers .long 0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d 595*9f69f52bSEric Biggers .long 0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4 596*9f69f52bSEric Biggers .long 0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43 597*9f69f52bSEric Biggers .long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c 598*9f69f52bSEric Biggers .long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce 599*9f69f52bSEric Biggers .long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec 600*9f69f52bSEric Biggers .long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5 601